pmap.c revision 117206
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $FreeBSD: head/sys/amd64/amd64/pmap.c 117206 2003-07-03 20:18:02Z alc $ 43 */ 44/*- 45 * Copyright (c) 2003 Networks Associates Technology, Inc. 46 * All rights reserved. 47 * 48 * This software was developed for the FreeBSD Project by Jake Burkholder, 49 * Safeport Network Services, and Network Associates Laboratories, the 50 * Security Research Division of Network Associates, Inc. under 51 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 52 * CHATS research program. 53 * 54 * Redistribution and use in source and binary forms, with or without 55 * modification, are permitted provided that the following conditions 56 * are met: 57 * 1. Redistributions of source code must retain the above copyright 58 * notice, this list of conditions and the following disclaimer. 59 * 2. Redistributions in binary form must reproduce the above copyright 60 * notice, this list of conditions and the following disclaimer in the 61 * documentation and/or other materials provided with the distribution. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 */ 75 76/* 77 * Manages physical address maps. 78 * 79 * In addition to hardware address maps, this 80 * module is called upon to provide software-use-only 81 * maps which may or may not be stored in the same 82 * form as hardware maps. These pseudo-maps are 83 * used to store intermediate results from copy 84 * operations to and from address spaces. 85 * 86 * Since the information managed by this module is 87 * also stored by the logical address mapping module, 88 * this module may throw away valid virtual-to-physical 89 * mappings at almost any time. However, invalidations 90 * of virtual-to-physical mappings must be done as 91 * requested. 92 * 93 * In order to cope with hardware architectures which 94 * make virtual-to-physical map invalidates expensive, 95 * this module may delay invalidate or reduced protection 96 * operations until such time as they are actually 97 * necessary. This module is given full information as 98 * to which processors are currently using which maps, 99 * and to when physical maps must be made correct. 100 */ 101 102#include "opt_msgbuf.h" 103#include "opt_kstack_pages.h" 104 105#include <sys/param.h> 106#include <sys/systm.h> 107#include <sys/kernel.h> 108#include <sys/lock.h> 109#include <sys/mman.h> 110#include <sys/msgbuf.h> 111#include <sys/mutex.h> 112#include <sys/proc.h> 113#include <sys/sx.h> 114#include <sys/user.h> 115#include <sys/vmmeter.h> 116#include <sys/sysctl.h> 117 118#include <vm/vm.h> 119#include <vm/vm_param.h> 120#include <vm/vm_kern.h> 121#include <vm/vm_page.h> 122#include <vm/vm_map.h> 123#include <vm/vm_object.h> 124#include <vm/vm_extern.h> 125#include <vm/vm_pageout.h> 126#include <vm/vm_pager.h> 127#include <vm/uma.h> 128#include <vm/uma_int.h> 129 130#include <machine/cpu.h> 131#include <machine/cputypes.h> 132#include <machine/md_var.h> 133#include <machine/specialreg.h> 134 135#define PMAP_KEEP_PDIRS 136#ifndef PMAP_SHPGPERPROC 137#define PMAP_SHPGPERPROC 200 138#endif 139 140#if defined(DIAGNOSTIC) 141#define PMAP_DIAGNOSTIC 142#endif 143 144#define MINPV 2048 145 146#if !defined(PMAP_DIAGNOSTIC) 147#define PMAP_INLINE __inline 148#else 149#define PMAP_INLINE 150#endif 151 152/* 153 * Given a map and a machine independent protection code, 154 * convert to a vax protection code. 155 */ 156#define pte_prot(m, p) (protection_codes[p]) 157static pt_entry_t protection_codes[8]; 158 159struct pmap kernel_pmap_store; 160LIST_HEAD(pmaplist, pmap); 161static struct pmaplist allpmaps; 162static struct mtx allpmaps_lock; 163 164vm_paddr_t avail_start; /* PA of first available physical page */ 165vm_paddr_t avail_end; /* PA of last available physical page */ 166vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 167vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 168static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 169 170static int nkpt; 171static int ndmpdp; 172static vm_paddr_t dmaplimit; 173vm_offset_t kernel_vm_end; 174 175static u_int64_t KPTphys; /* phys addr of kernel level 1 */ 176static u_int64_t KPDphys; /* phys addr of kernel level 2 */ 177static u_int64_t KPDPphys; /* phys addr of kernel level 3 */ 178u_int64_t KPML4phys; /* phys addr of kernel level 4 */ 179 180static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ 181static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ 182 183/* 184 * Data for the pv entry allocation mechanism 185 */ 186static uma_zone_t pvzone; 187static struct vm_object pvzone_obj; 188static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 189int pmap_pagedaemon_waken; 190 191/* 192 * All those kernel PT submaps that BSD is so fond of 193 */ 194pt_entry_t *CMAP1 = 0; 195static pt_entry_t *ptmmap; 196caddr_t CADDR1 = 0, ptvmmap = 0; 197static pt_entry_t *msgbufmap; 198struct msgbuf *msgbufp = 0; 199 200/* 201 * Crashdump maps. 202 */ 203static pt_entry_t *pt_crashdumpmap; 204static caddr_t crashdumpmap; 205 206static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 207static pv_entry_t get_pv_entry(void); 208static void amd64_protection_init(void); 209static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 210 211static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); 212static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 213static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, 214 vm_offset_t va); 215static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, 216 vm_page_t mpte, vm_page_t m); 217 218static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); 219 220static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex); 221static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); 222static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 223static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 224static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 225 226CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 227CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 228 229/* 230 * Move the kernel virtual free pointer to the next 231 * 2MB. This is used to help improve performance 232 * by using a large (2MB) page for much of the kernel 233 * (.text, .data, .bss) 234 */ 235static vm_offset_t 236pmap_kmem_choose(vm_offset_t addr) 237{ 238 vm_offset_t newaddr = addr; 239 240 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 241 return newaddr; 242} 243 244/********************/ 245/* Inline functions */ 246/********************/ 247 248/* Return a non-clipped PD index for a given VA */ 249static __inline vm_pindex_t 250pmap_pde_pindex(vm_offset_t va) 251{ 252 return va >> PDRSHIFT; 253} 254 255 256/* Return various clipped indexes for a given VA */ 257static __inline vm_pindex_t 258pmap_pte_index(vm_offset_t va) 259{ 260 261 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 262} 263 264static __inline vm_pindex_t 265pmap_pde_index(vm_offset_t va) 266{ 267 268 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 269} 270 271static __inline vm_pindex_t 272pmap_pdpe_index(vm_offset_t va) 273{ 274 275 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 276} 277 278static __inline vm_pindex_t 279pmap_pml4e_index(vm_offset_t va) 280{ 281 282 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 283} 284 285/* Return a pointer to the PML4 slot that corresponds to a VA */ 286static __inline pml4_entry_t * 287pmap_pml4e(pmap_t pmap, vm_offset_t va) 288{ 289 290 if (!pmap) 291 return NULL; 292 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 293} 294 295/* Return a pointer to the PDP slot that corresponds to a VA */ 296static __inline pdp_entry_t * 297pmap_pdpe(pmap_t pmap, vm_offset_t va) 298{ 299 pml4_entry_t *pml4e; 300 pdp_entry_t *pdpe; 301 302 pml4e = pmap_pml4e(pmap, va); 303 if (pml4e == NULL || (*pml4e & PG_V) == 0) 304 return NULL; 305 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); 306 return (&pdpe[pmap_pdpe_index(va)]); 307} 308 309/* Return a pointer to the PD slot that corresponds to a VA */ 310static __inline pd_entry_t * 311pmap_pde(pmap_t pmap, vm_offset_t va) 312{ 313 pdp_entry_t *pdpe; 314 pd_entry_t *pde; 315 316 pdpe = pmap_pdpe(pmap, va); 317 if (pdpe == NULL || (*pdpe & PG_V) == 0) 318 return NULL; 319 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); 320 return (&pde[pmap_pde_index(va)]); 321} 322 323/* Return a pointer to the PT slot that corresponds to a VA */ 324static __inline pt_entry_t * 325pmap_pte(pmap_t pmap, vm_offset_t va) 326{ 327 pd_entry_t *pde; 328 pt_entry_t *pte; 329 330 pde = pmap_pde(pmap, va); 331 if (pde == NULL || (*pde & PG_V) == 0) 332 return NULL; 333 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); 334 return (&pte[pmap_pte_index(va)]); 335} 336 337 338PMAP_INLINE pt_entry_t * 339vtopte(vm_offset_t va) 340{ 341 u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 342 343 return (PTmap + (amd64_btop(va) & mask)); 344} 345 346static u_int64_t 347allocpages(int n) 348{ 349 u_int64_t ret; 350 351 ret = avail_start; 352 bzero((void *)ret, n * PAGE_SIZE); 353 avail_start += n * PAGE_SIZE; 354 return (ret); 355} 356 357static void 358create_pagetables(void) 359{ 360 int i; 361 362 /* Allocate pages */ 363 KPTphys = allocpages(NKPT); 364 KPML4phys = allocpages(1); 365 KPDPphys = allocpages(NKPML4E); 366 KPDphys = allocpages(NKPDPE); 367 368 ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; 369 if (ndmpdp < 4) /* Minimum 4GB of dirmap */ 370 ndmpdp = 4; 371 DMPDPphys = allocpages(NDMPML4E); 372 DMPDphys = allocpages(ndmpdp); 373 dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; 374 375 /* Fill in the underlying page table pages */ 376 /* Read-only from zero to physfree */ 377 /* XXX not fully used, underneath 2M pages */ 378 for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) { 379 ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; 380 ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; 381 } 382 383 /* Now map the page tables at their location within PTmap */ 384 for (i = 0; i < NKPT; i++) { 385 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); 386 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; 387 } 388 389#if 0 390 /* Map from zero to end of allocations under 2M pages */ 391 /* This replaces some of the KPTphys entries above */ 392 for (i = 0; (i << PDRSHIFT) < avail_start; i++) { 393 ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; 394 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS; 395 } 396#endif 397 398 /* And connect up the PD to the PDP */ 399 for (i = 0; i < NKPDPE; i++) { 400 ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); 401 ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; 402 } 403 404 405 /* Now set up the direct map space using 2MB pages */ 406 for (i = 0; i < NPDEPG * ndmpdp; i++) { 407 ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; 408 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS; 409 } 410 411 /* And the direct map space's PDP */ 412 for (i = 0; i < ndmpdp; i++) { 413 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); 414 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; 415 } 416 417 /* And recursively map PML4 to itself in order to get PTmap */ 418 ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; 419 ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; 420 421 /* Connect the Direct Map slot up to the PML4 */ 422 ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; 423 ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; 424 425 /* Connect the KVA slot up to the PML4 */ 426 ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; 427 ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; 428} 429 430/* 431 * Bootstrap the system enough to run with virtual memory. 432 * 433 * On amd64 this is called after mapping has already been enabled 434 * and just syncs the pmap module with what has already been done. 435 * [We can't call it easily with mapping off since the kernel is not 436 * mapped with PA == VA, hence we would have to relocate every address 437 * from the linked base (virtual) address "KERNBASE" to the actual 438 * (physical) address starting relative to 0] 439 */ 440void 441pmap_bootstrap(firstaddr) 442 vm_paddr_t *firstaddr; 443{ 444 vm_offset_t va; 445 pt_entry_t *pte; 446 447 avail_start = *firstaddr; 448 449 /* 450 * Create an initial set of page tables to run the kernel in. 451 */ 452 create_pagetables(); 453 *firstaddr = avail_start; 454 455 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 456 virtual_avail = pmap_kmem_choose(virtual_avail); 457 458 virtual_end = VM_MAX_KERNEL_ADDRESS; 459 460 461 /* XXX do %cr0 as well */ 462 load_cr4(rcr4() | CR4_PGE | CR4_PSE); 463 load_cr3(KPML4phys); 464 465 /* 466 * Initialize protection array. 467 */ 468 amd64_protection_init(); 469 470 /* 471 * Initialize the kernel pmap (which is statically allocated). 472 */ 473 kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); 474 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 475 TAILQ_INIT(&kernel_pmap->pm_pvlist); 476 LIST_INIT(&allpmaps); 477 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 478 mtx_lock_spin(&allpmaps_lock); 479 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 480 mtx_unlock_spin(&allpmaps_lock); 481 nkpt = NKPT; 482 483 /* 484 * Reserve some special page table entries/VA space for temporary 485 * mapping of pages. 486 */ 487#define SYSMAP(c, p, v, n) \ 488 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 489 490 va = virtual_avail; 491 pte = vtopte(va); 492 493 /* 494 * CMAP1 is only used for the memory test. 495 */ 496 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 497 498 /* 499 * Crashdump maps. 500 */ 501 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 502 503 /* 504 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 505 * XXX ptmmap is not used. 506 */ 507 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 508 509 /* 510 * msgbufp is used to map the system message buffer. 511 * XXX msgbufmap is not used. 512 */ 513 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 514 atop(round_page(MSGBUF_SIZE))) 515 516 virtual_avail = va; 517 518 *CMAP1 = 0; 519 520 invltlb(); 521} 522 523static void * 524pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 525{ 526 *flags = UMA_SLAB_PRIV; 527 return (void *)kmem_alloc(kernel_map, bytes); 528} 529 530void * 531uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 532{ 533 static vm_pindex_t colour; 534 vm_page_t m; 535 int pflags; 536 void *va; 537 538 *flags = UMA_SLAB_PRIV; 539 540 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 541 pflags = VM_ALLOC_INTERRUPT; 542 else 543 pflags = VM_ALLOC_SYSTEM; 544 545 if (wait & M_ZERO) 546 pflags |= VM_ALLOC_ZERO; 547 548 for (;;) { 549 m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ); 550 if (m == NULL) { 551 if (wait & M_NOWAIT) 552 return (NULL); 553 else 554 VM_WAIT; 555 } else 556 break; 557 } 558 559 va = (void *)PHYS_TO_DMAP(m->phys_addr); 560 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 561 pagezero(va); 562 return (va); 563} 564 565void 566uma_small_free(void *mem, int size, u_int8_t flags) 567{ 568 vm_page_t m; 569 570 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem)); 571 vm_page_lock_queues(); 572 vm_page_free(m); 573 vm_page_unlock_queues(); 574} 575 576/* 577 * Initialize the pmap module. 578 * Called by vm_init, to initialize any structures that the pmap 579 * system needs to map virtual memory. 580 * pmap_init has been enhanced to support in a fairly consistant 581 * way, discontiguous physical memory. 582 */ 583void 584pmap_init(phys_start, phys_end) 585 vm_paddr_t phys_start, phys_end; 586{ 587 int i; 588 int initial_pvs; 589 590 /* 591 * Allocate memory for random pmap data structures. Includes the 592 * pv_head_table. 593 */ 594 595 for(i = 0; i < vm_page_array_size; i++) { 596 vm_page_t m; 597 598 m = &vm_page_array[i]; 599 TAILQ_INIT(&m->md.pv_list); 600 m->md.pv_list_count = 0; 601 } 602 603 /* 604 * init the pv free list 605 */ 606 initial_pvs = vm_page_array_size; 607 if (initial_pvs < MINPV) 608 initial_pvs = MINPV; 609 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 610 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); 611 uma_zone_set_allocf(pvzone, pmap_pv_allocf); 612 uma_prealloc(pvzone, initial_pvs); 613 614 /* 615 * Now it is safe to enable pv_table recording. 616 */ 617 pmap_initialized = TRUE; 618} 619 620/* 621 * Initialize the address space (zone) for the pv_entries. Set a 622 * high water mark so that the system can recover from excessive 623 * numbers of pv entries. 624 */ 625void 626pmap_init2() 627{ 628 int shpgperproc = PMAP_SHPGPERPROC; 629 630 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 631 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 632 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 633 pv_entry_high_water = 9 * (pv_entry_max / 10); 634 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 635} 636 637 638/*************************************************** 639 * Low level helper routines..... 640 ***************************************************/ 641 642#if defined(PMAP_DIAGNOSTIC) 643 644/* 645 * This code checks for non-writeable/modified pages. 646 * This should be an invalid condition. 647 */ 648static int 649pmap_nw_modified(pt_entry_t ptea) 650{ 651 int pte; 652 653 pte = (int) ptea; 654 655 if ((pte & (PG_M|PG_RW)) == PG_M) 656 return 1; 657 else 658 return 0; 659} 660#endif 661 662 663/* 664 * this routine defines the region(s) of memory that should 665 * not be tested for the modified bit. 666 */ 667static PMAP_INLINE int 668pmap_track_modified(vm_offset_t va) 669{ 670 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 671 return 1; 672 else 673 return 0; 674} 675 676/* 677 * Normal invalidation functions. 678 * We inline these within pmap.c for speed. 679 */ 680PMAP_INLINE void 681pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 682{ 683 684 if (pmap == kernel_pmap || pmap->pm_active) 685 invlpg(va); 686} 687 688PMAP_INLINE void 689pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 690{ 691 vm_offset_t addr; 692 693 if (pmap == kernel_pmap || pmap->pm_active) 694 for (addr = sva; addr < eva; addr += PAGE_SIZE) 695 invlpg(addr); 696} 697 698PMAP_INLINE void 699pmap_invalidate_all(pmap_t pmap) 700{ 701 702 if (pmap == kernel_pmap || pmap->pm_active) 703 invltlb(); 704} 705 706/* 707 * Are we current address space or kernel? 708 */ 709static __inline int 710pmap_is_current(pmap_t pmap) 711{ 712 return (pmap == kernel_pmap || 713 (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME)); 714} 715 716/* 717 * Routine: pmap_extract 718 * Function: 719 * Extract the physical page address associated 720 * with the given map/virtual_address pair. 721 */ 722vm_paddr_t 723pmap_extract(pmap, va) 724 register pmap_t pmap; 725 vm_offset_t va; 726{ 727 vm_paddr_t rtval; 728 pt_entry_t *pte; 729 pd_entry_t pde, *pdep; 730 731 if (pmap == 0) 732 return 0; 733 pdep = pmap_pde(pmap, va); 734 if (pdep) { 735 pde = *pdep; 736 if (pde) { 737 if ((pde & PG_PS) != 0) { 738 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 739 return rtval; 740 } 741 pte = pmap_pte(pmap, va); 742 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 743 return rtval; 744 } 745 } 746 return 0; 747 748} 749 750vm_paddr_t 751pmap_kextract(vm_offset_t va) 752{ 753 pd_entry_t *pde; 754 vm_paddr_t pa; 755 756 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 757 pa = DMAP_TO_PHYS(va); 758 } else { 759 pde = pmap_pde(kernel_pmap, va); 760 if (*pde & PG_PS) { 761 pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1)); 762 } else { 763 pa = *vtopte(va); 764 pa = (pa & PG_FRAME) | (va & PAGE_MASK); 765 } 766 } 767 return pa; 768} 769 770/*************************************************** 771 * Low level mapping routines..... 772 ***************************************************/ 773 774/* 775 * Add a wired page to the kva. 776 * Note: not SMP coherent. 777 */ 778PMAP_INLINE void 779pmap_kenter(vm_offset_t va, vm_paddr_t pa) 780{ 781 pt_entry_t *pte; 782 783 pte = vtopte(va); 784 pte_store(pte, pa | PG_RW | PG_V | PG_G); 785} 786 787/* 788 * Remove a page from the kernel pagetables. 789 * Note: not SMP coherent. 790 */ 791PMAP_INLINE void 792pmap_kremove(vm_offset_t va) 793{ 794 pt_entry_t *pte; 795 796 pte = vtopte(va); 797 pte_clear(pte); 798} 799 800/* 801 * Used to map a range of physical addresses into kernel 802 * virtual address space. 803 * 804 * The value passed in '*virt' is a suggested virtual address for 805 * the mapping. Architectures which can support a direct-mapped 806 * physical to virtual region can return the appropriate address 807 * within that region, leaving '*virt' unchanged. Other 808 * architectures should map the pages starting at '*virt' and 809 * update '*virt' with the first usable address after the mapped 810 * region. 811 */ 812vm_offset_t 813pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 814{ 815 return PHYS_TO_DMAP(start); 816} 817 818 819/* 820 * Add a list of wired pages to the kva 821 * this routine is only used for temporary 822 * kernel mappings that do not need to have 823 * page modification or references recorded. 824 * Note that old mappings are simply written 825 * over. The page *must* be wired. 826 * Note: SMP coherent. Uses a ranged shootdown IPI. 827 */ 828void 829pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 830{ 831 vm_offset_t va; 832 833 va = sva; 834 while (count-- > 0) { 835 pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 836 va += PAGE_SIZE; 837 m++; 838 } 839 pmap_invalidate_range(kernel_pmap, sva, va); 840} 841 842/* 843 * This routine tears out page mappings from the 844 * kernel -- it is meant only for temporary mappings. 845 * Note: SMP coherent. Uses a ranged shootdown IPI. 846 */ 847void 848pmap_qremove(vm_offset_t sva, int count) 849{ 850 vm_offset_t va; 851 852 va = sva; 853 while (count-- > 0) { 854 pmap_kremove(va); 855 va += PAGE_SIZE; 856 } 857 pmap_invalidate_range(kernel_pmap, sva, va); 858} 859 860static vm_page_t 861pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 862{ 863 vm_page_t m; 864 865retry: 866 m = vm_page_lookup(object, pindex); 867 if (m != NULL) { 868 vm_page_lock_queues(); 869 if (vm_page_sleep_if_busy(m, FALSE, "pplookp")) 870 goto retry; 871 vm_page_unlock_queues(); 872 } 873 return m; 874} 875 876/*************************************************** 877 * Page table page management routines..... 878 ***************************************************/ 879 880/* 881 * This routine unholds page table pages, and if the hold count 882 * drops to zero, then it decrements the wire count. 883 */ 884static int 885_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 886{ 887 888 while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) 889 vm_page_lock_queues(); 890 891 if (m->hold_count == 0) { 892 vm_offset_t pteva; 893 /* 894 * unmap the page table page 895 */ 896 if (m->pindex >= (NUPDE + NUPDPE)) { 897 /* PDP page */ 898 pml4_entry_t *pml4; 899 pml4 = pmap_pml4e(pmap, va); 900 pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE)); 901 *pml4 = 0; 902 } else if (m->pindex >= NUPDE) { 903 /* PD page */ 904 pdp_entry_t *pdp; 905 pdp = pmap_pdpe(pmap, va); 906 pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE); 907 *pdp = 0; 908 } else { 909 /* PTE page */ 910 pd_entry_t *pd; 911 pd = pmap_pde(pmap, va); 912 pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex); 913 *pd = 0; 914 } 915 --pmap->pm_stats.resident_count; 916 if (m->pindex < NUPDE) { 917 /* Unhold the PD page */ 918 vm_page_t pdpg; 919 pdpg = vm_page_lookup(pmap->pm_pteobj, NUPDE + pmap_pdpe_index(va)); 920 while (vm_page_sleep_if_busy(pdpg, FALSE, "pulook")) 921 vm_page_lock_queues(); 922 vm_page_unhold(pdpg); 923 if (pdpg->hold_count == 0) 924 _pmap_unwire_pte_hold(pmap, va, pdpg); 925 } 926 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 927 /* Unhold the PDP page */ 928 vm_page_t pdppg; 929 pdppg = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pmap_pml4e_index(va)); 930 while (vm_page_sleep_if_busy(pdppg, FALSE, "pulooK")) 931 vm_page_lock_queues(); 932 vm_page_unhold(pdppg); 933 if (pdppg->hold_count == 0) 934 _pmap_unwire_pte_hold(pmap, va, pdppg); 935 } 936 if (pmap_is_current(pmap)) { 937 /* 938 * Do an invltlb to make the invalidated mapping 939 * take effect immediately. 940 */ 941 pmap_invalidate_page(pmap, pteva); 942 } 943 944 /* 945 * If the page is finally unwired, simply free it. 946 */ 947 --m->wire_count; 948 if (m->wire_count == 0) { 949 vm_page_busy(m); 950 vm_page_free_zero(m); 951 atomic_subtract_int(&cnt.v_wire_count, 1); 952 } 953 return 1; 954 } 955 return 0; 956} 957 958static PMAP_INLINE int 959pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 960{ 961 vm_page_unhold(m); 962 if (m->hold_count == 0) 963 return _pmap_unwire_pte_hold(pmap, va, m); 964 else 965 return 0; 966} 967 968/* 969 * After removing a page table entry, this routine is used to 970 * conditionally free the page, and manage the hold/wire counts. 971 */ 972static int 973pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 974{ 975 vm_pindex_t ptepindex; 976 977 if (va >= VM_MAXUSER_ADDRESS) 978 return 0; 979 980 if (mpte == NULL) { 981 ptepindex = pmap_pde_pindex(va); 982 if (pmap->pm_pteobj->root && 983 pmap->pm_pteobj->root->pindex == ptepindex) { 984 mpte = pmap->pm_pteobj->root; 985 } else { 986 while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && 987 vm_page_sleep_if_busy(mpte, FALSE, "pulook")) 988 vm_page_lock_queues(); 989 } 990 } 991 992 return pmap_unwire_pte_hold(pmap, va, mpte); 993} 994 995void 996pmap_pinit0(pmap) 997 struct pmap *pmap; 998{ 999 1000 pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); 1001 pmap->pm_active = 0; 1002 TAILQ_INIT(&pmap->pm_pvlist); 1003 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1004 mtx_lock_spin(&allpmaps_lock); 1005 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1006 mtx_unlock_spin(&allpmaps_lock); 1007} 1008 1009/* 1010 * Initialize a preallocated and zeroed pmap structure, 1011 * such as one in a vmspace structure. 1012 */ 1013void 1014pmap_pinit(pmap) 1015 register struct pmap *pmap; 1016{ 1017 vm_page_t pml4pg; 1018 1019 /* 1020 * allocate object for the ptes 1021 */ 1022 if (pmap->pm_pteobj == NULL) 1023 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + NUPML4E + 1); 1024 1025 /* 1026 * allocate the page directory page 1027 */ 1028 pml4pg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + NUPML4E, 1029 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1030 vm_page_lock_queues(); 1031 vm_page_flag_clear(pml4pg, PG_BUSY); 1032 pml4pg->valid = VM_PAGE_BITS_ALL; 1033 vm_page_unlock_queues(); 1034 1035 pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); 1036 1037 if ((pml4pg->flags & PG_ZERO) == 0) 1038 bzero(pmap->pm_pml4, PAGE_SIZE); 1039 1040 mtx_lock_spin(&allpmaps_lock); 1041 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1042 mtx_unlock_spin(&allpmaps_lock); 1043 1044 /* Wire in kernel global address entries. */ 1045 pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; 1046 pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; 1047 1048 /* install self-referential address mapping entry(s) */ 1049 pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; 1050 1051 pmap->pm_active = 0; 1052 TAILQ_INIT(&pmap->pm_pvlist); 1053 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1054} 1055 1056/* 1057 * Wire in kernel global address entries. To avoid a race condition 1058 * between pmap initialization and pmap_growkernel, this procedure 1059 * should be called after the vmspace is attached to the process 1060 * but before this pmap is activated. 1061 */ 1062void 1063pmap_pinit2(pmap) 1064 struct pmap *pmap; 1065{ 1066 /* XXX: Remove this stub when no longer called */ 1067} 1068 1069/* 1070 * this routine is called if the page table page is not 1071 * mapped correctly. 1072 */ 1073static vm_page_t 1074_pmap_allocpte(pmap, ptepindex) 1075 pmap_t pmap; 1076 vm_pindex_t ptepindex; 1077{ 1078 vm_page_t m, pdppg, pdpg; 1079 1080 /* 1081 * Find or fabricate a new pagetable page 1082 */ 1083 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1084 VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1085 1086 KASSERT(m->queue == PQ_NONE, 1087 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1088 1089 /* 1090 * Increment the hold count for the page table page 1091 * (denoting a new mapping.) 1092 */ 1093 m->hold_count++; 1094 1095 /* 1096 * Map the pagetable page into the process address space, if 1097 * it isn't already there. 1098 */ 1099 1100 pmap->pm_stats.resident_count++; 1101 1102 if (ptepindex >= (NUPDE + NUPDPE)) { 1103 pml4_entry_t *pml4; 1104 vm_pindex_t pml4index; 1105 1106 /* Wire up a new PDPE page */ 1107 pml4index = ptepindex - (NUPDE + NUPDPE); 1108 pml4 = &pmap->pm_pml4[pml4index]; 1109 *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1110 1111 } else if (ptepindex >= NUPDE) { 1112 vm_pindex_t pml4index; 1113 vm_pindex_t pdpindex; 1114 pml4_entry_t *pml4; 1115 pdp_entry_t *pdp; 1116 1117 /* Wire up a new PDE page */ 1118 pdpindex = ptepindex - NUPDE; 1119 pml4index = pdpindex >> NPML4EPGSHIFT; 1120 1121 pml4 = &pmap->pm_pml4[pml4index]; 1122 if ((*pml4 & PG_V) == 0) { 1123 /* Have to allocate a new pdp, recurse */ 1124 _pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index); 1125 } else { 1126 /* Add reference to pdp page */ 1127 pdppg = pmap_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pml4index); 1128 pdppg->hold_count++; 1129 } 1130 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1131 1132 /* Now find the pdp page */ 1133 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1134 *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1135 1136 } else { 1137 vm_pindex_t pml4index; 1138 vm_pindex_t pdpindex; 1139 pml4_entry_t *pml4; 1140 pdp_entry_t *pdp; 1141 pd_entry_t *pd; 1142 1143 /* Wire up a new PTE page */ 1144 pdpindex = ptepindex >> NPDPEPGSHIFT; 1145 pml4index = pdpindex >> NPML4EPGSHIFT; 1146 1147 /* First, find the pdp and check that its valid. */ 1148 pml4 = &pmap->pm_pml4[pml4index]; 1149 if ((*pml4 & PG_V) == 0) { 1150 /* Have to allocate a new pd, recurse */ 1151 _pmap_allocpte(pmap, NUPDE + pdpindex); 1152 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1153 pdp = &pdp[pdpindex]; 1154 } else { 1155 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1156 pdp = &pdp[pdpindex]; 1157 if ((*pdp & PG_V) == 0) { 1158 /* Have to allocate a new pd, recurse */ 1159 _pmap_allocpte(pmap, NUPDE + pdpindex); 1160 } else { 1161 /* Add reference to the pd page */ 1162 pdpg = pmap_page_lookup(pmap->pm_pteobj, NUPDE + pdpindex); 1163 pdpg->hold_count++; 1164 } 1165 } 1166 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); 1167 1168 /* Now we know where the page directory page is */ 1169 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1170 *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1171 } 1172 1173 /* 1174 * Try to use the new mapping, but if we cannot, then 1175 * do it with the routine that maps the page explicitly. 1176 */ 1177 if ((m->flags & PG_ZERO) == 0) 1178 pmap_zero_page(m); 1179 vm_page_lock_queues(); 1180 m->valid = VM_PAGE_BITS_ALL; 1181 vm_page_flag_clear(m, PG_ZERO); 1182 vm_page_wakeup(m); 1183 vm_page_unlock_queues(); 1184 1185 return m; 1186} 1187 1188static vm_page_t 1189pmap_allocpte(pmap_t pmap, vm_offset_t va) 1190{ 1191 vm_pindex_t ptepindex; 1192 pd_entry_t *pd; 1193 vm_page_t m; 1194 1195 /* 1196 * Calculate pagetable page index 1197 */ 1198 ptepindex = pmap_pde_pindex(va); 1199 1200 /* 1201 * Get the page directory entry 1202 */ 1203 pd = pmap_pde(pmap, va); 1204 1205 /* 1206 * This supports switching from a 2MB page to a 1207 * normal 4K page. 1208 */ 1209 if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 1210 *pd = 0; 1211 pd = 0; 1212 pmap_invalidate_all(kernel_pmap); 1213 } 1214 1215 /* 1216 * If the page table page is mapped, we just increment the 1217 * hold count, and activate it. 1218 */ 1219 if (pd != 0 && (*pd & PG_V) != 0) { 1220 /* 1221 * In order to get the page table page, try the 1222 * hint first. 1223 */ 1224 if (pmap->pm_pteobj->root && 1225 (pmap->pm_pteobj->root->pindex == ptepindex)) { 1226 m = pmap->pm_pteobj->root; 1227 } else { 1228 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1229 } 1230 m->hold_count++; 1231 return m; 1232 } 1233 /* 1234 * Here if the pte page isn't mapped, or if it has been deallocated. 1235 */ 1236 m = _pmap_allocpte(pmap, ptepindex); 1237 return m; 1238} 1239 1240 1241/*************************************************** 1242* Pmap allocation/deallocation routines. 1243 ***************************************************/ 1244 1245/* 1246 * Release any resources held by the given physical map. 1247 * Called when a pmap initialized by pmap_pinit is being released. 1248 * Should only be called if the map contains no valid mappings. 1249 */ 1250void 1251pmap_release(pmap_t pmap) 1252{ 1253 vm_object_t object; 1254 vm_page_t m; 1255 1256 object = pmap->pm_pteobj; 1257 1258 KASSERT(object->ref_count == 1, 1259 ("pmap_release: pteobj reference count %d != 1", 1260 object->ref_count)); 1261 KASSERT(pmap->pm_stats.resident_count == 0, 1262 ("pmap_release: pmap resident count %ld != 0", 1263 pmap->pm_stats.resident_count)); 1264 1265 mtx_lock_spin(&allpmaps_lock); 1266 LIST_REMOVE(pmap, pm_list); 1267 mtx_unlock_spin(&allpmaps_lock); 1268 1269 vm_page_lock_queues(); 1270 while ((m = TAILQ_FIRST(&object->memq)) != NULL) { 1271 m->wire_count--; 1272 atomic_subtract_int(&cnt.v_wire_count, 1); 1273 vm_page_busy(m); 1274 vm_page_free(m); 1275 } 1276 KASSERT(TAILQ_EMPTY(&object->memq), 1277 ("pmap_release: leaking page table pages")); 1278 vm_page_unlock_queues(); 1279} 1280 1281static int 1282kvm_size(SYSCTL_HANDLER_ARGS) 1283{ 1284 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1285 1286 return sysctl_handle_long(oidp, &ksize, 0, req); 1287} 1288SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1289 0, 0, kvm_size, "IU", "Size of KVM"); 1290 1291static int 1292kvm_free(SYSCTL_HANDLER_ARGS) 1293{ 1294 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1295 1296 return sysctl_handle_long(oidp, &kfree, 0, req); 1297} 1298SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1299 0, 0, kvm_free, "IU", "Amount of KVM free"); 1300 1301/* 1302 * grow the number of kernel page table entries, if needed 1303 */ 1304void 1305pmap_growkernel(vm_offset_t addr) 1306{ 1307 int s; 1308 vm_paddr_t paddr; 1309 vm_page_t nkpg; 1310 pd_entry_t *pde, newpdir; 1311 pdp_entry_t newpdp; 1312 1313 s = splhigh(); 1314 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1315 if (kernel_vm_end == 0) { 1316 kernel_vm_end = KERNBASE; 1317 nkpt = 0; 1318 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) { 1319 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1320 nkpt++; 1321 } 1322 } 1323 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1324 while (kernel_vm_end < addr) { 1325 pde = pmap_pde(kernel_pmap, kernel_vm_end); 1326 if (pde == NULL) { 1327 /* We need a new PDP entry */ 1328 nkpg = vm_page_alloc(NULL, nkpt, 1329 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1330 if (!nkpg) 1331 panic("pmap_growkernel: no memory to grow kernel"); 1332 pmap_zero_page(nkpg); 1333 paddr = VM_PAGE_TO_PHYS(nkpg); 1334 newpdp = (pdp_entry_t) 1335 (paddr | PG_V | PG_RW | PG_A | PG_M); 1336 *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp; 1337 continue; /* try again */ 1338 } 1339 if ((*pde & PG_V) != 0) { 1340 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1341 continue; 1342 } 1343 1344 /* 1345 * This index is bogus, but out of the way 1346 */ 1347 nkpg = vm_page_alloc(NULL, nkpt, 1348 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1349 if (!nkpg) 1350 panic("pmap_growkernel: no memory to grow kernel"); 1351 1352 nkpt++; 1353 1354 pmap_zero_page(nkpg); 1355 paddr = VM_PAGE_TO_PHYS(nkpg); 1356 newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); 1357 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1358 1359 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1360 } 1361 splx(s); 1362} 1363 1364 1365/*************************************************** 1366 * page management routines. 1367 ***************************************************/ 1368 1369/* 1370 * free the pv_entry back to the free list 1371 */ 1372static PMAP_INLINE void 1373free_pv_entry(pv_entry_t pv) 1374{ 1375 pv_entry_count--; 1376 uma_zfree(pvzone, pv); 1377} 1378 1379/* 1380 * get a new pv_entry, allocating a block from the system 1381 * when needed. 1382 * the memory allocation is performed bypassing the malloc code 1383 * because of the possibility of allocations at interrupt time. 1384 */ 1385static pv_entry_t 1386get_pv_entry(void) 1387{ 1388 pv_entry_count++; 1389 if (pv_entry_high_water && 1390 (pv_entry_count > pv_entry_high_water) && 1391 (pmap_pagedaemon_waken == 0)) { 1392 pmap_pagedaemon_waken = 1; 1393 wakeup (&vm_pages_needed); 1394 } 1395 return uma_zalloc(pvzone, M_NOWAIT); 1396} 1397 1398/* 1399 * If it is the first entry on the list, it is actually 1400 * in the header and we must copy the following entry up 1401 * to the header. Otherwise we must search the list for 1402 * the entry. In either case we free the now unused entry. 1403 */ 1404 1405static int 1406pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1407{ 1408 pv_entry_t pv; 1409 int rtval; 1410 int s; 1411 1412 s = splvm(); 1413 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1414 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1415 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1416 if (pmap == pv->pv_pmap && va == pv->pv_va) 1417 break; 1418 } 1419 } else { 1420 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1421 if (va == pv->pv_va) 1422 break; 1423 } 1424 } 1425 1426 rtval = 0; 1427 if (pv) { 1428 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1429 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1430 m->md.pv_list_count--; 1431 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1432 vm_page_flag_clear(m, PG_WRITEABLE); 1433 1434 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1435 free_pv_entry(pv); 1436 } 1437 1438 splx(s); 1439 return rtval; 1440} 1441 1442/* 1443 * Create a pv entry for page at pa for 1444 * (pmap, va). 1445 */ 1446static void 1447pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1448{ 1449 1450 int s; 1451 pv_entry_t pv; 1452 1453 s = splvm(); 1454 pv = get_pv_entry(); 1455 pv->pv_va = va; 1456 pv->pv_pmap = pmap; 1457 pv->pv_ptem = mpte; 1458 1459 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1460 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1461 m->md.pv_list_count++; 1462 1463 splx(s); 1464} 1465 1466/* 1467 * pmap_remove_pte: do the things to unmap a page in a process 1468 */ 1469static int 1470pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1471{ 1472 pt_entry_t oldpte; 1473 vm_page_t m; 1474 1475 oldpte = pte_load_clear(ptq); 1476 if (oldpte & PG_W) 1477 pmap->pm_stats.wired_count -= 1; 1478 /* 1479 * Machines that don't support invlpg, also don't support 1480 * PG_G. 1481 */ 1482 if (oldpte & PG_G) 1483 pmap_invalidate_page(kernel_pmap, va); 1484 pmap->pm_stats.resident_count -= 1; 1485 if (oldpte & PG_MANAGED) { 1486 m = PHYS_TO_VM_PAGE(oldpte); 1487 if (oldpte & PG_M) { 1488#if defined(PMAP_DIAGNOSTIC) 1489 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1490 printf( 1491 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1492 va, oldpte); 1493 } 1494#endif 1495 if (pmap_track_modified(va)) 1496 vm_page_dirty(m); 1497 } 1498 if (oldpte & PG_A) 1499 vm_page_flag_set(m, PG_REFERENCED); 1500 return pmap_remove_entry(pmap, m, va); 1501 } else { 1502 return pmap_unuse_pt(pmap, va, NULL); 1503 } 1504 1505 return 0; 1506} 1507 1508/* 1509 * Remove a single page from a process address space 1510 */ 1511static void 1512pmap_remove_page(pmap_t pmap, vm_offset_t va) 1513{ 1514 pt_entry_t *pte; 1515 1516 pte = pmap_pte(pmap, va); 1517 if (pte == NULL || (*pte & PG_V) == 0) 1518 return; 1519 pmap_remove_pte(pmap, pte, va); 1520 pmap_invalidate_page(pmap, va); 1521} 1522 1523/* 1524 * Remove the given range of addresses from the specified map. 1525 * 1526 * It is assumed that the start and end are properly 1527 * rounded to the page size. 1528 */ 1529void 1530pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1531{ 1532 vm_offset_t pdnxt; 1533 pd_entry_t ptpaddr, *pde; 1534 pt_entry_t *pte; 1535 int anyvalid; 1536 1537 if (pmap == NULL) 1538 return; 1539 1540 if (pmap->pm_stats.resident_count == 0) 1541 return; 1542 1543 /* 1544 * special handling of removing one page. a very 1545 * common operation and easy to short circuit some 1546 * code. 1547 */ 1548 if (sva + PAGE_SIZE == eva) { 1549 pde = pmap_pde(pmap, sva); 1550 if (pde && (*pde & PG_PS) == 0) { 1551 pmap_remove_page(pmap, sva); 1552 return; 1553 } 1554 } 1555 1556 anyvalid = 0; 1557 1558 for (; sva < eva; sva = pdnxt) { 1559 1560 if (pmap->pm_stats.resident_count == 0) 1561 break; 1562 1563 /* 1564 * Calculate index for next page table. 1565 */ 1566 pdnxt = (sva + NBPDR) & ~PDRMASK; 1567 1568 pde = pmap_pde(pmap, sva); 1569 if (pde == 0) 1570 continue; 1571 ptpaddr = *pde; 1572 1573 /* 1574 * Weed out invalid mappings. Note: we assume that the page 1575 * directory table is always allocated, and in kernel virtual. 1576 */ 1577 if (ptpaddr == 0) 1578 continue; 1579 1580 /* 1581 * Check for large page. 1582 */ 1583 if ((ptpaddr & PG_PS) != 0) { 1584 *pde = 0; 1585 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1586 anyvalid = 1; 1587 continue; 1588 } 1589 1590 /* 1591 * Limit our scan to either the end of the va represented 1592 * by the current page table page, or to the end of the 1593 * range being removed. 1594 */ 1595 if (pdnxt > eva) 1596 pdnxt = eva; 1597 1598 for (; sva != pdnxt; sva += PAGE_SIZE) { 1599 pte = pmap_pte(pmap, sva); 1600 if (pte == NULL || *pte == 0) 1601 continue; 1602 anyvalid = 1; 1603 if (pmap_remove_pte(pmap, pte, sva)) 1604 break; 1605 } 1606 } 1607 1608 if (anyvalid) 1609 pmap_invalidate_all(pmap); 1610} 1611 1612/* 1613 * Routine: pmap_remove_all 1614 * Function: 1615 * Removes this physical page from 1616 * all physical maps in which it resides. 1617 * Reflects back modify bits to the pager. 1618 * 1619 * Notes: 1620 * Original versions of this routine were very 1621 * inefficient because they iteratively called 1622 * pmap_remove (slow...) 1623 */ 1624 1625void 1626pmap_remove_all(vm_page_t m) 1627{ 1628 register pv_entry_t pv; 1629 pt_entry_t *pte, tpte; 1630 int s; 1631 1632#if defined(PMAP_DIAGNOSTIC) 1633 /* 1634 * XXX This makes pmap_remove_all() illegal for non-managed pages! 1635 */ 1636 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1637 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", 1638 VM_PAGE_TO_PHYS(m)); 1639 } 1640#endif 1641 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1642 s = splvm(); 1643 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1644 pv->pv_pmap->pm_stats.resident_count--; 1645 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1646 tpte = pte_load_clear(pte); 1647 if (tpte & PG_W) 1648 pv->pv_pmap->pm_stats.wired_count--; 1649 if (tpte & PG_A) 1650 vm_page_flag_set(m, PG_REFERENCED); 1651 1652 /* 1653 * Update the vm_page_t clean and reference bits. 1654 */ 1655 if (tpte & PG_M) { 1656#if defined(PMAP_DIAGNOSTIC) 1657 if (pmap_nw_modified((pt_entry_t) tpte)) { 1658 printf( 1659 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1660 pv->pv_va, tpte); 1661 } 1662#endif 1663 if (pmap_track_modified(pv->pv_va)) 1664 vm_page_dirty(m); 1665 } 1666 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1667 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1668 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1669 m->md.pv_list_count--; 1670 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1671 free_pv_entry(pv); 1672 } 1673 vm_page_flag_clear(m, PG_WRITEABLE); 1674 splx(s); 1675} 1676 1677/* 1678 * Set the physical protection on the 1679 * specified range of this map as requested. 1680 */ 1681void 1682pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1683{ 1684 vm_offset_t pdnxt; 1685 pd_entry_t ptpaddr, *pde; 1686 int anychanged; 1687 1688 if (pmap == NULL) 1689 return; 1690 1691 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1692 pmap_remove(pmap, sva, eva); 1693 return; 1694 } 1695 1696 if (prot & VM_PROT_WRITE) 1697 return; 1698 1699 anychanged = 0; 1700 1701 for (; sva < eva; sva = pdnxt) { 1702 1703 pdnxt = (sva + NBPDR) & ~PDRMASK; 1704 1705 pde = pmap_pde(pmap, sva); 1706 if (pde == NULL) 1707 continue; 1708 ptpaddr = *pde; 1709 1710 /* 1711 * Weed out invalid mappings. Note: we assume that the page 1712 * directory table is always allocated, and in kernel virtual. 1713 */ 1714 if (ptpaddr == 0) 1715 continue; 1716 1717 /* 1718 * Check for large page. 1719 */ 1720 if ((ptpaddr & PG_PS) != 0) { 1721 *pde &= ~(PG_M|PG_RW); 1722 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1723 anychanged = 1; 1724 continue; 1725 } 1726 1727 if (pdnxt > eva) 1728 pdnxt = eva; 1729 1730 for (; sva != pdnxt; sva += PAGE_SIZE) { 1731 pt_entry_t pbits; 1732 pt_entry_t *pte; 1733 vm_page_t m; 1734 1735 pte = pmap_pte(pmap, sva); 1736 if (pte == NULL) 1737 continue; 1738 pbits = *pte; 1739 if (pbits & PG_MANAGED) { 1740 m = NULL; 1741 if (pbits & PG_A) { 1742 m = PHYS_TO_VM_PAGE(pbits); 1743 vm_page_flag_set(m, PG_REFERENCED); 1744 pbits &= ~PG_A; 1745 } 1746 if ((pbits & PG_M) != 0 && 1747 pmap_track_modified(sva)) { 1748 if (m == NULL) 1749 m = PHYS_TO_VM_PAGE(pbits); 1750 vm_page_dirty(m); 1751 pbits &= ~PG_M; 1752 } 1753 } 1754 1755 pbits &= ~PG_RW; 1756 1757 if (pbits != *pte) { 1758 pte_store(pte, pbits); 1759 anychanged = 1; 1760 } 1761 } 1762 } 1763 if (anychanged) 1764 pmap_invalidate_all(pmap); 1765} 1766 1767/* 1768 * Insert the given physical page (p) at 1769 * the specified virtual address (v) in the 1770 * target physical map with the protection requested. 1771 * 1772 * If specified, the page will be wired down, meaning 1773 * that the related pte can not be reclaimed. 1774 * 1775 * NB: This is the only routine which MAY NOT lazy-evaluate 1776 * or lose information. That is, this routine must actually 1777 * insert this page into the given map NOW. 1778 */ 1779void 1780pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1781 boolean_t wired) 1782{ 1783 vm_paddr_t pa; 1784 register pt_entry_t *pte; 1785 vm_paddr_t opa; 1786 pt_entry_t origpte, newpte; 1787 vm_page_t mpte; 1788 1789 if (pmap == NULL) 1790 return; 1791 1792 va &= PG_FRAME; 1793#ifdef PMAP_DIAGNOSTIC 1794 if (va > VM_MAX_KERNEL_ADDRESS) 1795 panic("pmap_enter: toobig"); 1796 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 1797 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 1798#endif 1799 1800 mpte = NULL; 1801 /* 1802 * In the case that a page table page is not 1803 * resident, we are creating it here. 1804 */ 1805 if (va < VM_MAXUSER_ADDRESS) { 1806 mpte = pmap_allocpte(pmap, va); 1807 } 1808#if 0 && defined(PMAP_DIAGNOSTIC) 1809 else { 1810 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 1811 origpte = *pdeaddr; 1812 if ((origpte & PG_V) == 0) { 1813 panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n", 1814 origpte, va); 1815 } 1816 } 1817#endif 1818 1819 pte = pmap_pte(pmap, va); 1820 1821 /* 1822 * Page Directory table entry not valid, we need a new PT page 1823 */ 1824 if (pte == NULL) 1825 panic("pmap_enter: invalid page directory va=%#lx\n", va); 1826 1827 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 1828 origpte = *pte; 1829 opa = origpte & PG_FRAME; 1830 1831 if (origpte & PG_PS) 1832 panic("pmap_enter: attempted pmap_enter on 2MB page"); 1833 1834 /* 1835 * Mapping has not changed, must be protection or wiring change. 1836 */ 1837 if (origpte && (opa == pa)) { 1838 /* 1839 * Wiring change, just update stats. We don't worry about 1840 * wiring PT pages as they remain resident as long as there 1841 * are valid mappings in them. Hence, if a user page is wired, 1842 * the PT page will be also. 1843 */ 1844 if (wired && ((origpte & PG_W) == 0)) 1845 pmap->pm_stats.wired_count++; 1846 else if (!wired && (origpte & PG_W)) 1847 pmap->pm_stats.wired_count--; 1848 1849#if defined(PMAP_DIAGNOSTIC) 1850 if (pmap_nw_modified((pt_entry_t) origpte)) { 1851 printf( 1852 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1853 va, origpte); 1854 } 1855#endif 1856 1857 /* 1858 * Remove extra pte reference 1859 */ 1860 if (mpte) 1861 mpte->hold_count--; 1862 1863 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 1864 if ((origpte & PG_RW) == 0) { 1865 pte_store(pte, origpte | PG_RW); 1866 pmap_invalidate_page(pmap, va); 1867 } 1868 return; 1869 } 1870 1871 /* 1872 * We might be turning off write access to the page, 1873 * so we go ahead and sense modify status. 1874 */ 1875 if (origpte & PG_MANAGED) { 1876 if ((origpte & PG_M) && pmap_track_modified(va)) { 1877 vm_page_t om; 1878 om = PHYS_TO_VM_PAGE(opa); 1879 vm_page_dirty(om); 1880 } 1881 pa |= PG_MANAGED; 1882 } 1883 goto validate; 1884 } 1885 /* 1886 * Mapping has changed, invalidate old range and fall through to 1887 * handle validating new mapping. 1888 */ 1889 if (opa) { 1890 int err; 1891 vm_page_lock_queues(); 1892 err = pmap_remove_pte(pmap, pte, va); 1893 vm_page_unlock_queues(); 1894 if (err) 1895 panic("pmap_enter: pte vanished, va: 0x%lx", va); 1896 } 1897 1898 /* 1899 * Enter on the PV list if part of our managed memory. Note that we 1900 * raise IPL while manipulating pv_table since pmap_enter can be 1901 * called at interrupt time. 1902 */ 1903 if (pmap_initialized && 1904 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 1905 pmap_insert_entry(pmap, va, mpte, m); 1906 pa |= PG_MANAGED; 1907 } 1908 1909 /* 1910 * Increment counters 1911 */ 1912 pmap->pm_stats.resident_count++; 1913 if (wired) 1914 pmap->pm_stats.wired_count++; 1915 1916validate: 1917 /* 1918 * Now validate mapping with desired protection/wiring. 1919 */ 1920 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); 1921 1922 if (wired) 1923 newpte |= PG_W; 1924 if (va < VM_MAXUSER_ADDRESS) 1925 newpte |= PG_U; 1926 if (pmap == kernel_pmap) 1927 newpte |= PG_G; 1928 1929 /* 1930 * if the mapping or permission bits are different, we need 1931 * to update the pte. 1932 */ 1933 if ((origpte & ~(PG_M|PG_A)) != newpte) { 1934 pte_store(pte, newpte | PG_A); 1935 /*if (origpte)*/ { 1936 pmap_invalidate_page(pmap, va); 1937 } 1938 } 1939} 1940 1941/* 1942 * this code makes some *MAJOR* assumptions: 1943 * 1. Current pmap & pmap exists. 1944 * 2. Not wired. 1945 * 3. Read access. 1946 * 4. No page table pages. 1947 * 5. Tlbflush is deferred to calling procedure. 1948 * 6. Page IS managed. 1949 * but is *MUCH* faster than pmap_enter... 1950 */ 1951 1952vm_page_t 1953pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 1954{ 1955 pt_entry_t *pte; 1956 vm_paddr_t pa; 1957 1958 /* 1959 * In the case that a page table page is not 1960 * resident, we are creating it here. 1961 */ 1962 if (va < VM_MAXUSER_ADDRESS) { 1963 vm_pindex_t ptepindex; 1964 pd_entry_t *ptepa; 1965 1966 /* 1967 * Calculate pagetable page index 1968 */ 1969 ptepindex = pmap_pde_pindex(va); 1970 if (mpte && (mpte->pindex == ptepindex)) { 1971 mpte->hold_count++; 1972 } else { 1973retry: 1974 /* 1975 * Get the page directory entry 1976 */ 1977 ptepa = pmap_pde(pmap, va); 1978 1979 /* 1980 * If the page table page is mapped, we just increment 1981 * the hold count, and activate it. 1982 */ 1983 if (ptepa && (*ptepa & PG_V) != 0) { 1984 if (*ptepa & PG_PS) 1985 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 1986 if (pmap->pm_pteobj->root && 1987 (pmap->pm_pteobj->root->pindex == ptepindex)) { 1988 mpte = pmap->pm_pteobj->root; 1989 } else { 1990 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1991 } 1992 if (mpte == NULL) 1993 goto retry; 1994 mpte->hold_count++; 1995 } else { 1996 mpte = _pmap_allocpte(pmap, ptepindex); 1997 } 1998 } 1999 } else { 2000 mpte = NULL; 2001 } 2002 2003 /* 2004 * This call to vtopte makes the assumption that we are 2005 * entering the page into the current pmap. In order to support 2006 * quick entry into any pmap, one would likely use pmap_pte. 2007 * But that isn't as quick as vtopte. 2008 */ 2009 pte = vtopte(va); 2010 if (*pte) { 2011 if (mpte != NULL) { 2012 vm_page_lock_queues(); 2013 pmap_unwire_pte_hold(pmap, va, mpte); 2014 vm_page_unlock_queues(); 2015 } 2016 return 0; 2017 } 2018 2019 /* 2020 * Enter on the PV list if part of our managed memory. Note that we 2021 * raise IPL while manipulating pv_table since pmap_enter can be 2022 * called at interrupt time. 2023 */ 2024 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2025 pmap_insert_entry(pmap, va, mpte, m); 2026 2027 /* 2028 * Increment counters 2029 */ 2030 pmap->pm_stats.resident_count++; 2031 2032 pa = VM_PAGE_TO_PHYS(m); 2033 2034 /* 2035 * Now validate mapping with RO protection 2036 */ 2037 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2038 pte_store(pte, pa | PG_V | PG_U); 2039 else 2040 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2041 2042 return mpte; 2043} 2044 2045/* 2046 * Make a temporary mapping for a physical address. This is only intended 2047 * to be used for panic dumps. 2048 */ 2049void * 2050pmap_kenter_temporary(vm_offset_t pa, int i) 2051{ 2052 vm_offset_t va; 2053 2054 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2055 pmap_kenter(va, pa); 2056 invlpg(va); 2057 return ((void *)crashdumpmap); 2058} 2059 2060/* 2061 * This code maps large physical mmap regions into the 2062 * processor address space. Note that some shortcuts 2063 * are taken, but the code works. 2064 */ 2065void 2066pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2067 vm_object_t object, vm_pindex_t pindex, 2068 vm_size_t size) 2069{ 2070 vm_page_t p; 2071 2072 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2073 KASSERT(object->type == OBJT_DEVICE, 2074 ("pmap_object_init_pt: non-device object")); 2075 if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2076 int i; 2077 vm_page_t m[1]; 2078 int npdes; 2079 pd_entry_t ptepa, *pde; 2080 2081 pde = pmap_pde(pmap, addr); 2082 if (pde != 0 && (*pde & PG_V) != 0) 2083 return; 2084retry: 2085 p = vm_page_lookup(object, pindex); 2086 if (p != NULL) { 2087 vm_page_lock_queues(); 2088 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2089 goto retry; 2090 } else { 2091 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2092 if (p == NULL) 2093 return; 2094 m[0] = p; 2095 2096 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2097 vm_page_lock_queues(); 2098 vm_page_free(p); 2099 vm_page_unlock_queues(); 2100 return; 2101 } 2102 2103 p = vm_page_lookup(object, pindex); 2104 vm_page_lock_queues(); 2105 vm_page_wakeup(p); 2106 } 2107 vm_page_unlock_queues(); 2108 2109 ptepa = VM_PAGE_TO_PHYS(p); 2110 if (ptepa & (NBPDR - 1)) 2111 return; 2112 2113 p->valid = VM_PAGE_BITS_ALL; 2114 2115 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2116 npdes = size >> PDRSHIFT; 2117 for(i = 0; i < npdes; i++) { 2118 pde_store(pde, ptepa | PG_U | PG_RW | PG_V | PG_PS); 2119 ptepa += NBPDR; 2120 pde++; 2121 } 2122 pmap_invalidate_all(kernel_pmap); 2123 } 2124} 2125 2126/* 2127 * pmap_prefault provides a quick way of clustering 2128 * pagefaults into a processes address space. It is a "cousin" 2129 * of pmap_object_init_pt, except it runs at page fault time instead 2130 * of mmap time. 2131 */ 2132#define PFBAK 4 2133#define PFFOR 4 2134#define PAGEORDER_SIZE (PFBAK+PFFOR) 2135 2136static int pmap_prefault_pageorder[] = { 2137 -1 * PAGE_SIZE, 1 * PAGE_SIZE, 2138 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2139 -3 * PAGE_SIZE, 3 * PAGE_SIZE, 2140 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2141}; 2142 2143void 2144pmap_prefault(pmap, addra, entry) 2145 pmap_t pmap; 2146 vm_offset_t addra; 2147 vm_map_entry_t entry; 2148{ 2149 int i; 2150 vm_offset_t starta; 2151 vm_offset_t addr; 2152 vm_pindex_t pindex; 2153 vm_page_t m, mpte; 2154 vm_object_t object; 2155 pd_entry_t *pde; 2156 2157 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2158 return; 2159 2160 object = entry->object.vm_object; 2161 2162 starta = addra - PFBAK * PAGE_SIZE; 2163 if (starta < entry->start) { 2164 starta = entry->start; 2165 } else if (starta > addra) { 2166 starta = 0; 2167 } 2168 2169 mpte = NULL; 2170 for (i = 0; i < PAGEORDER_SIZE; i++) { 2171 vm_object_t lobject; 2172 pt_entry_t *pte; 2173 2174 addr = addra + pmap_prefault_pageorder[i]; 2175 if (addr > addra + (PFFOR * PAGE_SIZE)) 2176 addr = 0; 2177 2178 if (addr < starta || addr >= entry->end) 2179 continue; 2180 2181 pde = pmap_pde(pmap, addr); 2182 if (pde == NULL || (*pde & PG_V) == 0) 2183 continue; 2184 2185 pte = vtopte(addr); 2186 if ((*pte & PG_V) == 0) 2187 continue; 2188 2189 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2190 lobject = object; 2191 for (m = vm_page_lookup(lobject, pindex); 2192 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2193 lobject = lobject->backing_object) { 2194 if (lobject->backing_object_offset & PAGE_MASK) 2195 break; 2196 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2197 m = vm_page_lookup(lobject->backing_object, pindex); 2198 } 2199 2200 /* 2201 * give-up when a page is not in memory 2202 */ 2203 if (m == NULL) 2204 break; 2205 vm_page_lock_queues(); 2206 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2207 (m->busy == 0) && 2208 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2209 2210 if ((m->queue - m->pc) == PQ_CACHE) { 2211 vm_page_deactivate(m); 2212 } 2213 vm_page_busy(m); 2214 vm_page_unlock_queues(); 2215 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2216 vm_page_lock_queues(); 2217 vm_page_wakeup(m); 2218 } 2219 vm_page_unlock_queues(); 2220 } 2221} 2222 2223/* 2224 * Routine: pmap_change_wiring 2225 * Function: Change the wiring attribute for a map/virtual-address 2226 * pair. 2227 * In/out conditions: 2228 * The mapping must already exist in the pmap. 2229 */ 2230void 2231pmap_change_wiring(pmap, va, wired) 2232 register pmap_t pmap; 2233 vm_offset_t va; 2234 boolean_t wired; 2235{ 2236 register pt_entry_t *pte; 2237 2238 if (pmap == NULL) 2239 return; 2240 2241 /* 2242 * Wiring is not a hardware characteristic so there is no need to 2243 * invalidate TLB. 2244 */ 2245 pte = pmap_pte(pmap, va); 2246 if (wired && (*pte & PG_W) == 0) { 2247 pmap->pm_stats.wired_count++; 2248 *pte |= PG_W; 2249 } else if (!wired && (*pte & PG_W) != 0) { 2250 pmap->pm_stats.wired_count--; 2251 *pte &= ~PG_W; 2252 } 2253} 2254 2255 2256 2257/* 2258 * Copy the range specified by src_addr/len 2259 * from the source map to the range dst_addr/len 2260 * in the destination map. 2261 * 2262 * This routine is only advisory and need not do anything. 2263 */ 2264 2265void 2266pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2267 vm_offset_t src_addr) 2268{ 2269 vm_offset_t addr; 2270 vm_offset_t end_addr = src_addr + len; 2271 vm_offset_t pdnxt; 2272 vm_page_t m; 2273 2274 if (dst_addr != src_addr) 2275 return; 2276 2277 if (!pmap_is_current(src_pmap)) 2278 return; 2279 2280 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2281 pt_entry_t *src_pte, *dst_pte; 2282 vm_page_t dstmpte, srcmpte; 2283 pd_entry_t srcptepaddr, *pde; 2284 vm_pindex_t ptepindex; 2285 2286 if (addr >= UPT_MIN_ADDRESS) 2287 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2288 2289 /* 2290 * Don't let optional prefaulting of pages make us go 2291 * way below the low water mark of free pages or way 2292 * above high water mark of used pv entries. 2293 */ 2294 if (cnt.v_free_count < cnt.v_free_reserved || 2295 pv_entry_count > pv_entry_high_water) 2296 break; 2297 2298 pdnxt = (addr + NBPDR) & ~PDRMASK; 2299 ptepindex = pmap_pde_pindex(addr); 2300 2301 pde = pmap_pde(src_pmap, addr); 2302 if (pde) 2303 srcptepaddr = *pde; 2304 else 2305 continue; 2306 if (srcptepaddr == 0) 2307 continue; 2308 2309 if (srcptepaddr & PG_PS) { 2310 pde = pmap_pde(dst_pmap, addr); 2311 if (pde == 0) { 2312 /* 2313 * XXX should do an allocpte here to 2314 * instantiate the pde 2315 */ 2316 continue; 2317 } 2318 if (*pde == 0) { 2319 *pde = srcptepaddr; 2320 dst_pmap->pm_stats.resident_count += 2321 NBPDR / PAGE_SIZE; 2322 } 2323 continue; 2324 } 2325 2326 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2327 if ((srcmpte == NULL) || 2328 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2329 continue; 2330 2331 if (pdnxt > end_addr) 2332 pdnxt = end_addr; 2333 2334 src_pte = vtopte(addr); 2335 while (addr < pdnxt) { 2336 pt_entry_t ptetemp; 2337 ptetemp = *src_pte; 2338 /* 2339 * we only virtual copy managed pages 2340 */ 2341 if ((ptetemp & PG_MANAGED) != 0) { 2342 /* 2343 * We have to check after allocpte for the 2344 * pte still being around... allocpte can 2345 * block. 2346 */ 2347 dstmpte = pmap_allocpte(dst_pmap, addr); 2348 dst_pte = pmap_pte(dst_pmap, addr); 2349 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2350 /* 2351 * Clear the modified and 2352 * accessed (referenced) bits 2353 * during the copy. 2354 */ 2355 m = PHYS_TO_VM_PAGE(ptetemp); 2356 *dst_pte = ptetemp & ~(PG_M | PG_A); 2357 dst_pmap->pm_stats.resident_count++; 2358 pmap_insert_entry(dst_pmap, addr, 2359 dstmpte, m); 2360 } else { 2361 vm_page_lock_queues(); 2362 pmap_unwire_pte_hold(dst_pmap, addr, dstmpte); 2363 vm_page_unlock_queues(); 2364 } 2365 if (dstmpte->hold_count >= srcmpte->hold_count) 2366 break; 2367 } 2368 addr += PAGE_SIZE; 2369 src_pte++; 2370 } 2371 } 2372} 2373 2374/* 2375 * pmap_zero_page zeros the specified hardware page by mapping 2376 * the page into KVM and using bzero to clear its contents. 2377 */ 2378void 2379pmap_zero_page(vm_page_t m) 2380{ 2381 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2382 2383 pagezero((void *)va); 2384} 2385 2386/* 2387 * pmap_zero_page_area zeros the specified hardware page by mapping 2388 * the page into KVM and using bzero to clear its contents. 2389 * 2390 * off and size may not cover an area beyond a single hardware page. 2391 */ 2392void 2393pmap_zero_page_area(vm_page_t m, int off, int size) 2394{ 2395 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2396 2397 if (off == 0 && size == PAGE_SIZE) 2398 pagezero((void *)va); 2399 else 2400 bzero((char *)va + off, size); 2401} 2402 2403/* 2404 * pmap_zero_page_idle zeros the specified hardware page by mapping 2405 * the page into KVM and using bzero to clear its contents. This 2406 * is intended to be called from the vm_pagezero process only and 2407 * outside of Giant. 2408 */ 2409void 2410pmap_zero_page_idle(vm_page_t m) 2411{ 2412 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2413 2414 pagezero((void *)va); 2415} 2416 2417/* 2418 * pmap_copy_page copies the specified (machine independent) 2419 * page by mapping the page into virtual memory and using 2420 * bcopy to copy the page, one machine dependent page at a 2421 * time. 2422 */ 2423void 2424pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2425{ 2426 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2427 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2428 2429 bcopy((void *)src, (void *)dst, PAGE_SIZE); 2430} 2431 2432/* 2433 * Returns true if the pmap's pv is one of the first 2434 * 16 pvs linked to from this page. This count may 2435 * be changed upwards or downwards in the future; it 2436 * is only necessary that true be returned for a small 2437 * subset of pmaps for proper page aging. 2438 */ 2439boolean_t 2440pmap_page_exists_quick(pmap, m) 2441 pmap_t pmap; 2442 vm_page_t m; 2443{ 2444 pv_entry_t pv; 2445 int loops = 0; 2446 int s; 2447 2448 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2449 return FALSE; 2450 2451 s = splvm(); 2452 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2453 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2454 if (pv->pv_pmap == pmap) { 2455 splx(s); 2456 return TRUE; 2457 } 2458 loops++; 2459 if (loops >= 16) 2460 break; 2461 } 2462 splx(s); 2463 return (FALSE); 2464} 2465 2466#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2467/* 2468 * Remove all pages from specified address space 2469 * this aids process exit speeds. Also, this code 2470 * is special cased for current process only, but 2471 * can have the more generic (and slightly slower) 2472 * mode enabled. This is much faster than pmap_remove 2473 * in the case of running down an entire address space. 2474 */ 2475void 2476pmap_remove_pages(pmap, sva, eva) 2477 pmap_t pmap; 2478 vm_offset_t sva, eva; 2479{ 2480 pt_entry_t *pte, tpte; 2481 vm_page_t m; 2482 pv_entry_t pv, npv; 2483 int s; 2484 2485#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2486 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2487 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2488 return; 2489 } 2490#endif 2491 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2492 s = splvm(); 2493 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2494 2495 if (pv->pv_va >= eva || pv->pv_va < sva) { 2496 npv = TAILQ_NEXT(pv, pv_plist); 2497 continue; 2498 } 2499 2500#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2501 pte = vtopte(pv->pv_va); 2502#else 2503 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2504#endif 2505 tpte = *pte; 2506 2507 if (tpte == 0) { 2508 printf("TPTE at %p IS ZERO @ VA %08lx\n", 2509 pte, pv->pv_va); 2510 panic("bad pte"); 2511 } 2512 2513/* 2514 * We cannot remove wired pages from a process' mapping at this time 2515 */ 2516 if (tpte & PG_W) { 2517 npv = TAILQ_NEXT(pv, pv_plist); 2518 continue; 2519 } 2520 2521 m = PHYS_TO_VM_PAGE(tpte); 2522 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2523 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2524 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 2525 2526 KASSERT(m < &vm_page_array[vm_page_array_size], 2527 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 2528 2529 pv->pv_pmap->pm_stats.resident_count--; 2530 2531 pte_clear(pte); 2532 2533 /* 2534 * Update the vm_page_t clean and reference bits. 2535 */ 2536 if (tpte & PG_M) { 2537 vm_page_dirty(m); 2538 } 2539 2540 npv = TAILQ_NEXT(pv, pv_plist); 2541 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2542 2543 m->md.pv_list_count--; 2544 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2545 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2546 vm_page_flag_clear(m, PG_WRITEABLE); 2547 } 2548 2549 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2550 free_pv_entry(pv); 2551 } 2552 splx(s); 2553 pmap_invalidate_all(pmap); 2554} 2555 2556/* 2557 * pmap_is_modified: 2558 * 2559 * Return whether or not the specified physical page was modified 2560 * in any physical maps. 2561 */ 2562boolean_t 2563pmap_is_modified(vm_page_t m) 2564{ 2565 pv_entry_t pv; 2566 pt_entry_t *pte; 2567 int s; 2568 2569 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2570 return FALSE; 2571 2572 s = splvm(); 2573 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2574 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2575 /* 2576 * if the bit being tested is the modified bit, then 2577 * mark clean_map and ptes as never 2578 * modified. 2579 */ 2580 if (!pmap_track_modified(pv->pv_va)) 2581 continue; 2582#if defined(PMAP_DIAGNOSTIC) 2583 if (!pv->pv_pmap) { 2584 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2585 continue; 2586 } 2587#endif 2588 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2589 if (*pte & PG_M) { 2590 splx(s); 2591 return TRUE; 2592 } 2593 } 2594 splx(s); 2595 return (FALSE); 2596} 2597 2598/* 2599 * this routine is used to modify bits in ptes 2600 */ 2601static __inline void 2602pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2603{ 2604 register pv_entry_t pv; 2605 register pt_entry_t *pte; 2606 int s; 2607 2608 if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || 2609 (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 2610 return; 2611 2612 s = splvm(); 2613 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2614 /* 2615 * Loop over all current mappings setting/clearing as appropos If 2616 * setting RO do we need to clear the VAC? 2617 */ 2618 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2619 /* 2620 * don't write protect pager mappings 2621 */ 2622 if (!setem && (bit == PG_RW)) { 2623 if (!pmap_track_modified(pv->pv_va)) 2624 continue; 2625 } 2626 2627#if defined(PMAP_DIAGNOSTIC) 2628 if (!pv->pv_pmap) { 2629 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2630 continue; 2631 } 2632#endif 2633 2634 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2635 2636 if (setem) { 2637 *pte |= bit; 2638 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2639 } else { 2640 pt_entry_t pbits = *pte; 2641 if (pbits & bit) { 2642 if (bit == PG_RW) { 2643 if (pbits & PG_M) { 2644 vm_page_dirty(m); 2645 } 2646 pte_store(pte, pbits & ~(PG_M|PG_RW)); 2647 } else { 2648 pte_store(pte, pbits & ~bit); 2649 } 2650 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2651 } 2652 } 2653 } 2654 if (!setem && bit == PG_RW) 2655 vm_page_flag_clear(m, PG_WRITEABLE); 2656 splx(s); 2657} 2658 2659/* 2660 * pmap_page_protect: 2661 * 2662 * Lower the permission for all mappings to a given page. 2663 */ 2664void 2665pmap_page_protect(vm_page_t m, vm_prot_t prot) 2666{ 2667 if ((prot & VM_PROT_WRITE) == 0) { 2668 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2669 pmap_changebit(m, PG_RW, FALSE); 2670 } else { 2671 pmap_remove_all(m); 2672 } 2673 } 2674} 2675 2676/* 2677 * pmap_ts_referenced: 2678 * 2679 * Return a count of reference bits for a page, clearing those bits. 2680 * It is not necessary for every reference bit to be cleared, but it 2681 * is necessary that 0 only be returned when there are truly no 2682 * reference bits set. 2683 * 2684 * XXX: The exact number of bits to check and clear is a matter that 2685 * should be tested and standardized at some point in the future for 2686 * optimal aging of shared pages. 2687 */ 2688int 2689pmap_ts_referenced(vm_page_t m) 2690{ 2691 register pv_entry_t pv, pvf, pvn; 2692 pt_entry_t *pte; 2693 pt_entry_t v; 2694 int s; 2695 int rtval = 0; 2696 2697 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2698 return (rtval); 2699 2700 s = splvm(); 2701 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2702 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2703 2704 pvf = pv; 2705 2706 do { 2707 pvn = TAILQ_NEXT(pv, pv_list); 2708 2709 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2710 2711 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2712 2713 if (!pmap_track_modified(pv->pv_va)) 2714 continue; 2715 2716 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2717 2718 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 2719 pte_store(pte, v & ~PG_A); 2720 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2721 2722 rtval++; 2723 if (rtval > 4) { 2724 break; 2725 } 2726 } 2727 } while ((pv = pvn) != NULL && pv != pvf); 2728 } 2729 splx(s); 2730 2731 return (rtval); 2732} 2733 2734/* 2735 * Clear the modify bits on the specified physical page. 2736 */ 2737void 2738pmap_clear_modify(vm_page_t m) 2739{ 2740 pmap_changebit(m, PG_M, FALSE); 2741} 2742 2743/* 2744 * pmap_clear_reference: 2745 * 2746 * Clear the reference bit on the specified physical page. 2747 */ 2748void 2749pmap_clear_reference(vm_page_t m) 2750{ 2751 pmap_changebit(m, PG_A, FALSE); 2752} 2753 2754/* 2755 * Miscellaneous support routines follow 2756 */ 2757 2758static void 2759amd64_protection_init() 2760{ 2761 register long *kp, prot; 2762 2763#if 0 2764#define PG_NX (1ul << 63) 2765#else 2766#define PG_NX 0 2767#endif 2768 2769 kp = protection_codes; 2770 for (prot = 0; prot < 8; prot++) { 2771 switch (prot) { 2772 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2773 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2774 *kp++ = PG_NX; 2775 break; 2776 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2777 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2778 *kp++ = 0; 2779 break; 2780 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2781 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2782 *kp++ = PG_RW | PG_NX; 2783 break; 2784 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2785 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2786 *kp++ = PG_RW; 2787 break; 2788 } 2789 } 2790} 2791 2792/* 2793 * Map a set of physical memory pages into the kernel virtual 2794 * address space. Return a pointer to where it is mapped. This 2795 * routine is intended to be used for mapping device memory, 2796 * NOT real memory. 2797 */ 2798void * 2799pmap_mapdev(pa, size) 2800 vm_paddr_t pa; 2801 vm_size_t size; 2802{ 2803 vm_offset_t va, tmpva, offset; 2804 2805 /* If this fits within the direct map window, use it */ 2806 if (pa < dmaplimit && (pa + size) < dmaplimit) 2807 return ((void *)PHYS_TO_DMAP(pa)); 2808 offset = pa & PAGE_MASK; 2809 size = roundup(offset + size, PAGE_SIZE); 2810 va = kmem_alloc_pageable(kernel_map, size); 2811 if (!va) 2812 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2813 pa = pa & PG_FRAME; 2814 for (tmpva = va; size > 0; ) { 2815 pmap_kenter(tmpva, pa); 2816 size -= PAGE_SIZE; 2817 tmpva += PAGE_SIZE; 2818 pa += PAGE_SIZE; 2819 } 2820 pmap_invalidate_range(kernel_pmap, va, tmpva); 2821 return ((void *)(va + offset)); 2822} 2823 2824void 2825pmap_unmapdev(va, size) 2826 vm_offset_t va; 2827 vm_size_t size; 2828{ 2829 vm_offset_t base, offset, tmpva; 2830 pt_entry_t *pte; 2831 2832 /* If we gave a direct map region in pmap_mapdev, do nothing */ 2833 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) 2834 return; 2835 base = va & PG_FRAME; 2836 offset = va & PAGE_MASK; 2837 size = roundup(offset + size, PAGE_SIZE); 2838 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 2839 pte = vtopte(tmpva); 2840 pte_clear(pte); 2841 } 2842 pmap_invalidate_range(kernel_pmap, va, tmpva); 2843 kmem_free(kernel_map, base, size); 2844} 2845 2846/* 2847 * perform the pmap work for mincore 2848 */ 2849int 2850pmap_mincore(pmap, addr) 2851 pmap_t pmap; 2852 vm_offset_t addr; 2853{ 2854 pt_entry_t *ptep, pte; 2855 vm_page_t m; 2856 int val = 0; 2857 2858 ptep = pmap_pte(pmap, addr); 2859 if (ptep == 0) { 2860 return 0; 2861 } 2862 2863 if ((pte = *ptep) != 0) { 2864 vm_paddr_t pa; 2865 2866 val = MINCORE_INCORE; 2867 if ((pte & PG_MANAGED) == 0) 2868 return val; 2869 2870 pa = pte & PG_FRAME; 2871 2872 m = PHYS_TO_VM_PAGE(pa); 2873 2874 /* 2875 * Modified by us 2876 */ 2877 if (pte & PG_M) 2878 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2879 else { 2880 /* 2881 * Modified by someone else 2882 */ 2883 vm_page_lock_queues(); 2884 if (m->dirty || pmap_is_modified(m)) 2885 val |= MINCORE_MODIFIED_OTHER; 2886 vm_page_unlock_queues(); 2887 } 2888 /* 2889 * Referenced by us 2890 */ 2891 if (pte & PG_A) 2892 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2893 else { 2894 /* 2895 * Referenced by someone else 2896 */ 2897 vm_page_lock_queues(); 2898 if ((m->flags & PG_REFERENCED) || 2899 pmap_ts_referenced(m)) { 2900 val |= MINCORE_REFERENCED_OTHER; 2901 vm_page_flag_set(m, PG_REFERENCED); 2902 } 2903 vm_page_unlock_queues(); 2904 } 2905 } 2906 return val; 2907} 2908 2909void 2910pmap_activate(struct thread *td) 2911{ 2912 struct proc *p = td->td_proc; 2913 pmap_t pmap; 2914 u_int64_t cr3; 2915 2916 critical_enter(); 2917 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2918 pmap->pm_active |= PCPU_GET(cpumask); 2919 cr3 = vtophys(pmap->pm_pml4); 2920 /* XXXKSE this is wrong. 2921 * pmap_activate is for the current thread on the current cpu 2922 */ 2923 if (p->p_flag & P_SA) { 2924 /* Make sure all other cr3 entries are updated. */ 2925 /* what if they are running? XXXKSE (maybe abort them) */ 2926 FOREACH_THREAD_IN_PROC(p, td) { 2927 td->td_pcb->pcb_cr3 = cr3; 2928 } 2929 } else { 2930 td->td_pcb->pcb_cr3 = cr3; 2931 } 2932 load_cr3(cr3); 2933 critical_exit(); 2934} 2935 2936vm_offset_t 2937pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2938{ 2939 2940 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 2941 return addr; 2942 } 2943 2944 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 2945 return addr; 2946} 2947