pmap.c revision 246855
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu> 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 */ 45/*- 46 * Copyright (c) 2003 Networks Associates Technology, Inc. 47 * All rights reserved. 48 * 49 * This software was developed for the FreeBSD Project by Jake Burkholder, 50 * Safeport Network Services, and Network Associates Laboratories, the 51 * Security Research Division of Network Associates, Inc. under 52 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53 * CHATS research program. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 */ 76 77#include <sys/cdefs.h> 78__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 246855 2013-02-15 22:43:08Z jkim $"); 79 80/* 81 * Manages physical address maps. 82 * 83 * Since the information managed by this module is 84 * also stored by the logical address mapping module, 85 * this module may throw away valid virtual-to-physical 86 * mappings at almost any time. However, invalidations 87 * of virtual-to-physical mappings must be done as 88 * requested. 89 * 90 * In order to cope with hardware architectures which 91 * make virtual-to-physical map invalidates expensive, 92 * this module may delay invalidate or reduced protection 93 * operations until such time as they are actually 94 * necessary. This module is given full information as 95 * to which processors are currently using which maps, 96 * and to when physical maps must be made correct. 97 */ 98 99#include "opt_cpu.h" 100#include "opt_pmap.h" 101#include "opt_smp.h" 102#include "opt_xbox.h" 103 104#include <sys/param.h> 105#include <sys/systm.h> 106#include <sys/kernel.h> 107#include <sys/ktr.h> 108#include <sys/lock.h> 109#include <sys/malloc.h> 110#include <sys/mman.h> 111#include <sys/msgbuf.h> 112#include <sys/mutex.h> 113#include <sys/proc.h> 114#include <sys/rwlock.h> 115#include <sys/sf_buf.h> 116#include <sys/sx.h> 117#include <sys/vmmeter.h> 118#include <sys/sched.h> 119#include <sys/sysctl.h> 120#ifdef SMP 121#include <sys/smp.h> 122#else 123#include <sys/cpuset.h> 124#endif 125 126#include <vm/vm.h> 127#include <vm/vm_param.h> 128#include <vm/vm_kern.h> 129#include <vm/vm_page.h> 130#include <vm/vm_map.h> 131#include <vm/vm_object.h> 132#include <vm/vm_extern.h> 133#include <vm/vm_pageout.h> 134#include <vm/vm_pager.h> 135#include <vm/uma.h> 136 137#include <machine/cpu.h> 138#include <machine/cputypes.h> 139#include <machine/md_var.h> 140#include <machine/pcb.h> 141#include <machine/specialreg.h> 142#ifdef SMP 143#include <machine/smp.h> 144#endif 145 146#ifdef XBOX 147#include <machine/xbox.h> 148#endif 149 150#include <xen/interface/xen.h> 151#include <xen/hypervisor.h> 152#include <machine/xen/hypercall.h> 153#include <machine/xen/xenvar.h> 154#include <machine/xen/xenfunc.h> 155 156#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 157#define CPU_ENABLE_SSE 158#endif 159 160#ifndef PMAP_SHPGPERPROC 161#define PMAP_SHPGPERPROC 200 162#endif 163 164#define DIAGNOSTIC 165 166#if !defined(DIAGNOSTIC) 167#ifdef __GNUC_GNU_INLINE__ 168#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 169#else 170#define PMAP_INLINE extern inline 171#endif 172#else 173#define PMAP_INLINE 174#endif 175 176#ifdef PV_STATS 177#define PV_STAT(x) do { x ; } while (0) 178#else 179#define PV_STAT(x) do { } while (0) 180#endif 181 182/* 183 * Get PDEs and PTEs for user/kernel address space 184 */ 185#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 186#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 187 188#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 189#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 190#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 191#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 192#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 193 194#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 195 196#define HAMFISTED_LOCKING 197#ifdef HAMFISTED_LOCKING 198static struct mtx createdelete_lock; 199#endif 200 201struct pmap kernel_pmap_store; 202LIST_HEAD(pmaplist, pmap); 203static struct pmaplist allpmaps; 204static struct mtx allpmaps_lock; 205 206vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 207vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 208int pgeflag = 0; /* PG_G or-in */ 209int pseflag = 0; /* PG_PS or-in */ 210 211int nkpt; 212vm_offset_t kernel_vm_end; 213extern u_int32_t KERNend; 214 215#ifdef PAE 216pt_entry_t pg_nx; 217#endif 218 219static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 220 221static int pat_works; /* Is page attribute table sane? */ 222 223/* 224 * This lock is defined as static in other pmap implementations. It cannot, 225 * however, be defined as static here, because it is (ab)used to serialize 226 * queued page table changes in other sources files. 227 */ 228struct rwlock pvh_global_lock; 229 230/* 231 * Data for the pv entry allocation mechanism 232 */ 233static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 234static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 235static int shpgperproc = PMAP_SHPGPERPROC; 236 237struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 238int pv_maxchunks; /* How many chunks we have KVA for */ 239vm_offset_t pv_vafree; /* freelist stored in the PTE */ 240 241/* 242 * All those kernel PT submaps that BSD is so fond of 243 */ 244struct sysmaps { 245 struct mtx lock; 246 pt_entry_t *CMAP1; 247 pt_entry_t *CMAP2; 248 caddr_t CADDR1; 249 caddr_t CADDR2; 250}; 251static struct sysmaps sysmaps_pcpu[MAXCPU]; 252static pt_entry_t *CMAP3; 253caddr_t ptvmmap = 0; 254static caddr_t CADDR3; 255struct msgbuf *msgbufp = 0; 256 257/* 258 * Crashdump maps. 259 */ 260static caddr_t crashdumpmap; 261 262static pt_entry_t *PMAP1 = 0, *PMAP2; 263static pt_entry_t *PADDR1 = 0, *PADDR2; 264#ifdef SMP 265static int PMAP1cpu; 266static int PMAP1changedcpu; 267SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 268 &PMAP1changedcpu, 0, 269 "Number of times pmap_pte_quick changed CPU with same PMAP1"); 270#endif 271static int PMAP1changed; 272SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 273 &PMAP1changed, 0, 274 "Number of times pmap_pte_quick changed PMAP1"); 275static int PMAP1unchanged; 276SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 277 &PMAP1unchanged, 0, 278 "Number of times pmap_pte_quick didn't change PMAP1"); 279static struct mtx PMAP2mutex; 280 281static void free_pv_chunk(struct pv_chunk *pc); 282static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 283static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 284static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 285static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 286 vm_offset_t va); 287 288static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, 289 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 290static void pmap_flush_page(vm_page_t m); 291static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 292static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 293 vm_page_t *free); 294static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 295 vm_page_t *free); 296static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 297 vm_offset_t va); 298static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 299 vm_page_t m); 300 301static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 302 303static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags); 304static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free); 305static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 306static void pmap_pte_release(pt_entry_t *pte); 307static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 308static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); 309 310static __inline void pagezero(void *page); 311 312CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 313CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 314 315/* 316 * If you get an error here, then you set KVA_PAGES wrong! See the 317 * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 318 * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 319 */ 320CTASSERT(KERNBASE % (1 << 24) == 0); 321 322void 323pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) 324{ 325 vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); 326 327 switch (type) { 328 case SH_PD_SET_VA: 329#if 0 330 xen_queue_pt_update(shadow_pdir_ma, 331 xpmap_ptom(val & ~(PG_RW))); 332#endif 333 xen_queue_pt_update(pdir_ma, 334 xpmap_ptom(val)); 335 break; 336 case SH_PD_SET_VA_MA: 337#if 0 338 xen_queue_pt_update(shadow_pdir_ma, 339 val & ~(PG_RW)); 340#endif 341 xen_queue_pt_update(pdir_ma, val); 342 break; 343 case SH_PD_SET_VA_CLEAR: 344#if 0 345 xen_queue_pt_update(shadow_pdir_ma, 0); 346#endif 347 xen_queue_pt_update(pdir_ma, 0); 348 break; 349 } 350} 351 352/* 353 * Bootstrap the system enough to run with virtual memory. 354 * 355 * On the i386 this is called after mapping has already been enabled 356 * and just syncs the pmap module with what has already been done. 357 * [We can't call it easily with mapping off since the kernel is not 358 * mapped with PA == VA, hence we would have to relocate every address 359 * from the linked base (virtual) address "KERNBASE" to the actual 360 * (physical) address starting relative to 0] 361 */ 362void 363pmap_bootstrap(vm_paddr_t firstaddr) 364{ 365 vm_offset_t va; 366 pt_entry_t *pte, *unused; 367 struct sysmaps *sysmaps; 368 int i; 369 370 /* 371 * Initialize the first available kernel virtual address. However, 372 * using "firstaddr" may waste a few pages of the kernel virtual 373 * address space, because locore may not have mapped every physical 374 * page that it allocated. Preferably, locore would provide a first 375 * unused virtual address in addition to "firstaddr". 376 */ 377 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 378 379 virtual_end = VM_MAX_KERNEL_ADDRESS; 380 381 /* 382 * Initialize the kernel pmap (which is statically allocated). 383 */ 384 PMAP_LOCK_INIT(kernel_pmap); 385 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 386#ifdef PAE 387 kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 388#endif 389 CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ 390 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 391 392 /* 393 * Initialize the global pv list lock. 394 */ 395 rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE); 396 397 LIST_INIT(&allpmaps); 398 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 399 mtx_lock_spin(&allpmaps_lock); 400 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 401 mtx_unlock_spin(&allpmaps_lock); 402 if (nkpt == 0) 403 nkpt = NKPT; 404 405 /* 406 * Reserve some special page table entries/VA space for temporary 407 * mapping of pages. 408 */ 409#define SYSMAP(c, p, v, n) \ 410 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 411 412 va = virtual_avail; 413 pte = vtopte(va); 414 415 /* 416 * CMAP1/CMAP2 are used for zeroing and copying pages. 417 * CMAP3 is used for the idle process page zeroing. 418 */ 419 for (i = 0; i < MAXCPU; i++) { 420 sysmaps = &sysmaps_pcpu[i]; 421 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 422 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 423 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 424 PT_SET_MA(sysmaps->CADDR1, 0); 425 PT_SET_MA(sysmaps->CADDR2, 0); 426 } 427 SYSMAP(caddr_t, CMAP3, CADDR3, 1) 428 PT_SET_MA(CADDR3, 0); 429 430 /* 431 * Crashdump maps. 432 */ 433 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 434 435 /* 436 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 437 */ 438 SYSMAP(caddr_t, unused, ptvmmap, 1) 439 440 /* 441 * msgbufp is used to map the system message buffer. 442 */ 443 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) 444 445 /* 446 * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(), 447 * respectively. 448 */ 449 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) 450 SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) 451 452 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 453 454 virtual_avail = va; 455 456 /* 457 * Leave in place an identity mapping (virt == phys) for the low 1 MB 458 * physical memory region that is used by the ACPI wakeup code. This 459 * mapping must not have PG_G set. 460 */ 461#ifndef XEN 462 /* 463 * leave here deliberately to show that this is not supported 464 */ 465#ifdef XBOX 466 /* FIXME: This is gross, but needed for the XBOX. Since we are in such 467 * an early stadium, we cannot yet neatly map video memory ... :-( 468 * Better fixes are very welcome! */ 469 if (!arch_i386_is_xbox) 470#endif 471 for (i = 1; i < NKPT; i++) 472 PTD[i] = 0; 473 474 /* Initialize the PAT MSR if present. */ 475 pmap_init_pat(); 476 477 /* Turn on PG_G on kernel page(s) */ 478 pmap_set_pg(); 479#endif 480 481#ifdef HAMFISTED_LOCKING 482 mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF); 483#endif 484} 485 486/* 487 * Setup the PAT MSR. 488 */ 489void 490pmap_init_pat(void) 491{ 492 uint64_t pat_msr; 493 494 /* Bail if this CPU doesn't implement PAT. */ 495 if (!(cpu_feature & CPUID_PAT)) 496 return; 497 498 if (cpu_vendor_id != CPU_VENDOR_INTEL || 499 (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) { 500 /* 501 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. 502 * Program 4 and 5 as WP and WC. 503 * Leave 6 and 7 as UC and UC-. 504 */ 505 pat_msr = rdmsr(MSR_PAT); 506 pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 507 pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 508 PAT_VALUE(5, PAT_WRITE_COMBINING); 509 pat_works = 1; 510 } else { 511 /* 512 * Due to some Intel errata, we can only safely use the lower 4 513 * PAT entries. Thus, just replace PAT Index 2 with WC instead 514 * of UC-. 515 * 516 * Intel Pentium III Processor Specification Update 517 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 518 * or Mode C Paging) 519 * 520 * Intel Pentium IV Processor Specification Update 521 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 522 */ 523 pat_msr = rdmsr(MSR_PAT); 524 pat_msr &= ~PAT_MASK(2); 525 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 526 pat_works = 0; 527 } 528 wrmsr(MSR_PAT, pat_msr); 529} 530 531/* 532 * Initialize a vm_page's machine-dependent fields. 533 */ 534void 535pmap_page_init(vm_page_t m) 536{ 537 538 TAILQ_INIT(&m->md.pv_list); 539 m->md.pat_mode = PAT_WRITE_BACK; 540} 541 542/* 543 * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 544 * Requirements: 545 * - Must deal with pages in order to ensure that none of the PG_* bits 546 * are ever set, PG_V in particular. 547 * - Assumes we can write to ptes without pte_store() atomic ops, even 548 * on PAE systems. This should be ok. 549 * - Assumes nothing will ever test these addresses for 0 to indicate 550 * no mapping instead of correctly checking PG_V. 551 * - Assumes a vm_offset_t will fit in a pte (true for i386). 552 * Because PG_V is never set, there can be no mappings to invalidate. 553 */ 554static int ptelist_count = 0; 555static vm_offset_t 556pmap_ptelist_alloc(vm_offset_t *head) 557{ 558 vm_offset_t va; 559 vm_offset_t *phead = (vm_offset_t *)*head; 560 561 if (ptelist_count == 0) { 562 printf("out of memory!!!!!!\n"); 563 return (0); /* Out of memory */ 564 } 565 ptelist_count--; 566 va = phead[ptelist_count]; 567 return (va); 568} 569 570static void 571pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 572{ 573 vm_offset_t *phead = (vm_offset_t *)*head; 574 575 phead[ptelist_count++] = va; 576} 577 578static void 579pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 580{ 581 int i, nstackpages; 582 vm_offset_t va; 583 vm_page_t m; 584 585 nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t)); 586 for (i = 0; i < nstackpages; i++) { 587 va = (vm_offset_t)base + i * PAGE_SIZE; 588 m = vm_page_alloc(NULL, i, 589 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 590 VM_ALLOC_ZERO); 591 pmap_qenter(va, &m, 1); 592 } 593 594 *head = (vm_offset_t)base; 595 for (i = npages - 1; i >= nstackpages; i--) { 596 va = (vm_offset_t)base + i * PAGE_SIZE; 597 pmap_ptelist_free(head, va); 598 } 599} 600 601 602/* 603 * Initialize the pmap module. 604 * Called by vm_init, to initialize any structures that the pmap 605 * system needs to map virtual memory. 606 */ 607void 608pmap_init(void) 609{ 610 611 /* 612 * Initialize the address space (zone) for the pv entries. Set a 613 * high water mark so that the system can recover from excessive 614 * numbers of pv entries. 615 */ 616 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 617 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 618 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 619 pv_entry_max = roundup(pv_entry_max, _NPCPV); 620 pv_entry_high_water = 9 * (pv_entry_max / 10); 621 622 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 623 pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 624 PAGE_SIZE * pv_maxchunks); 625 if (pv_chunkbase == NULL) 626 panic("pmap_init: not enough kvm for pv chunks"); 627 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 628} 629 630 631SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 632 "Max number of PV entries"); 633SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 634 "Page share factor per proc"); 635 636static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 637 "2/4MB page mapping counters"); 638 639static u_long pmap_pde_mappings; 640SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 641 &pmap_pde_mappings, 0, "2/4MB page mappings"); 642 643/*************************************************** 644 * Low level helper routines..... 645 ***************************************************/ 646 647/* 648 * Determine the appropriate bits to set in a PTE or PDE for a specified 649 * caching mode. 650 */ 651int 652pmap_cache_bits(int mode, boolean_t is_pde) 653{ 654 int pat_flag, pat_index, cache_bits; 655 656 /* The PAT bit is different for PTE's and PDE's. */ 657 pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 658 659 /* If we don't support PAT, map extended modes to older ones. */ 660 if (!(cpu_feature & CPUID_PAT)) { 661 switch (mode) { 662 case PAT_UNCACHEABLE: 663 case PAT_WRITE_THROUGH: 664 case PAT_WRITE_BACK: 665 break; 666 case PAT_UNCACHED: 667 case PAT_WRITE_COMBINING: 668 case PAT_WRITE_PROTECTED: 669 mode = PAT_UNCACHEABLE; 670 break; 671 } 672 } 673 674 /* Map the caching mode to a PAT index. */ 675 if (pat_works) { 676 switch (mode) { 677 case PAT_UNCACHEABLE: 678 pat_index = 3; 679 break; 680 case PAT_WRITE_THROUGH: 681 pat_index = 1; 682 break; 683 case PAT_WRITE_BACK: 684 pat_index = 0; 685 break; 686 case PAT_UNCACHED: 687 pat_index = 2; 688 break; 689 case PAT_WRITE_COMBINING: 690 pat_index = 5; 691 break; 692 case PAT_WRITE_PROTECTED: 693 pat_index = 4; 694 break; 695 default: 696 panic("Unknown caching mode %d\n", mode); 697 } 698 } else { 699 switch (mode) { 700 case PAT_UNCACHED: 701 case PAT_UNCACHEABLE: 702 case PAT_WRITE_PROTECTED: 703 pat_index = 3; 704 break; 705 case PAT_WRITE_THROUGH: 706 pat_index = 1; 707 break; 708 case PAT_WRITE_BACK: 709 pat_index = 0; 710 break; 711 case PAT_WRITE_COMBINING: 712 pat_index = 2; 713 break; 714 default: 715 panic("Unknown caching mode %d\n", mode); 716 } 717 } 718 719 /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 720 cache_bits = 0; 721 if (pat_index & 0x4) 722 cache_bits |= pat_flag; 723 if (pat_index & 0x2) 724 cache_bits |= PG_NC_PCD; 725 if (pat_index & 0x1) 726 cache_bits |= PG_NC_PWT; 727 return (cache_bits); 728} 729#ifdef SMP 730/* 731 * For SMP, these functions have to use the IPI mechanism for coherence. 732 * 733 * N.B.: Before calling any of the following TLB invalidation functions, 734 * the calling processor must ensure that all stores updating a non- 735 * kernel page table are globally performed. Otherwise, another 736 * processor could cache an old, pre-update entry without being 737 * invalidated. This can happen one of two ways: (1) The pmap becomes 738 * active on another processor after its pm_active field is checked by 739 * one of the following functions but before a store updating the page 740 * table is globally performed. (2) The pmap becomes active on another 741 * processor before its pm_active field is checked but due to 742 * speculative loads one of the following functions stills reads the 743 * pmap as inactive on the other processor. 744 * 745 * The kernel page table is exempt because its pm_active field is 746 * immutable. The kernel page table is always active on every 747 * processor. 748 */ 749void 750pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 751{ 752 cpuset_t other_cpus; 753 u_int cpuid; 754 755 CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 756 pmap, va); 757 758 sched_pin(); 759 if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 760 invlpg(va); 761 smp_invlpg(va); 762 } else { 763 cpuid = PCPU_GET(cpuid); 764 other_cpus = all_cpus; 765 CPU_CLR(cpuid, &other_cpus); 766 if (CPU_ISSET(cpuid, &pmap->pm_active)) 767 invlpg(va); 768 CPU_AND(&other_cpus, &pmap->pm_active); 769 if (!CPU_EMPTY(&other_cpus)) 770 smp_masked_invlpg(other_cpus, va); 771 } 772 sched_unpin(); 773 PT_UPDATES_FLUSH(); 774} 775 776void 777pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 778{ 779 cpuset_t other_cpus; 780 vm_offset_t addr; 781 u_int cpuid; 782 783 CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", 784 pmap, sva, eva); 785 786 sched_pin(); 787 if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 788 for (addr = sva; addr < eva; addr += PAGE_SIZE) 789 invlpg(addr); 790 smp_invlpg_range(sva, eva); 791 } else { 792 cpuid = PCPU_GET(cpuid); 793 other_cpus = all_cpus; 794 CPU_CLR(cpuid, &other_cpus); 795 if (CPU_ISSET(cpuid, &pmap->pm_active)) 796 for (addr = sva; addr < eva; addr += PAGE_SIZE) 797 invlpg(addr); 798 CPU_AND(&other_cpus, &pmap->pm_active); 799 if (!CPU_EMPTY(&other_cpus)) 800 smp_masked_invlpg_range(other_cpus, sva, eva); 801 } 802 sched_unpin(); 803 PT_UPDATES_FLUSH(); 804} 805 806void 807pmap_invalidate_all(pmap_t pmap) 808{ 809 cpuset_t other_cpus; 810 u_int cpuid; 811 812 CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); 813 814 sched_pin(); 815 if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 816 invltlb(); 817 smp_invltlb(); 818 } else { 819 cpuid = PCPU_GET(cpuid); 820 other_cpus = all_cpus; 821 CPU_CLR(cpuid, &other_cpus); 822 if (CPU_ISSET(cpuid, &pmap->pm_active)) 823 invltlb(); 824 CPU_AND(&other_cpus, &pmap->pm_active); 825 if (!CPU_EMPTY(&other_cpus)) 826 smp_masked_invltlb(other_cpus); 827 } 828 sched_unpin(); 829} 830 831void 832pmap_invalidate_cache(void) 833{ 834 835 sched_pin(); 836 wbinvd(); 837 smp_cache_flush(); 838 sched_unpin(); 839} 840#else /* !SMP */ 841/* 842 * Normal, non-SMP, 486+ invalidation functions. 843 * We inline these within pmap.c for speed. 844 */ 845PMAP_INLINE void 846pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 847{ 848 CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 849 pmap, va); 850 851 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 852 invlpg(va); 853 PT_UPDATES_FLUSH(); 854} 855 856PMAP_INLINE void 857pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 858{ 859 vm_offset_t addr; 860 861 if (eva - sva > PAGE_SIZE) 862 CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", 863 pmap, sva, eva); 864 865 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 866 for (addr = sva; addr < eva; addr += PAGE_SIZE) 867 invlpg(addr); 868 PT_UPDATES_FLUSH(); 869} 870 871PMAP_INLINE void 872pmap_invalidate_all(pmap_t pmap) 873{ 874 875 CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); 876 877 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 878 invltlb(); 879} 880 881PMAP_INLINE void 882pmap_invalidate_cache(void) 883{ 884 885 wbinvd(); 886} 887#endif /* !SMP */ 888 889#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) 890 891void 892pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 893{ 894 895 KASSERT((sva & PAGE_MASK) == 0, 896 ("pmap_invalidate_cache_range: sva not page-aligned")); 897 KASSERT((eva & PAGE_MASK) == 0, 898 ("pmap_invalidate_cache_range: eva not page-aligned")); 899 900 if (cpu_feature & CPUID_SS) 901 ; /* If "Self Snoop" is supported, do nothing. */ 902 else if ((cpu_feature & CPUID_CLFSH) != 0 && 903 eva - sva < PMAP_CLFLUSH_THRESHOLD) { 904 905 /* 906 * Otherwise, do per-cache line flush. Use the mfence 907 * instruction to insure that previous stores are 908 * included in the write-back. The processor 909 * propagates flush to other processors in the cache 910 * coherence domain. 911 */ 912 mfence(); 913 for (; sva < eva; sva += cpu_clflush_line_size) 914 clflush(sva); 915 mfence(); 916 } else { 917 918 /* 919 * No targeted cache flush methods are supported by CPU, 920 * or the supplied range is bigger than 2MB. 921 * Globally invalidate cache. 922 */ 923 pmap_invalidate_cache(); 924 } 925} 926 927void 928pmap_invalidate_cache_pages(vm_page_t *pages, int count) 929{ 930 int i; 931 932 if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || 933 (cpu_feature & CPUID_CLFSH) == 0) { 934 pmap_invalidate_cache(); 935 } else { 936 for (i = 0; i < count; i++) 937 pmap_flush_page(pages[i]); 938 } 939} 940 941/* 942 * Are we current address space or kernel? N.B. We return FALSE when 943 * a pmap's page table is in use because a kernel thread is borrowing 944 * it. The borrowed page table can change spontaneously, making any 945 * dependence on its continued use subject to a race condition. 946 */ 947static __inline int 948pmap_is_current(pmap_t pmap) 949{ 950 951 return (pmap == kernel_pmap || 952 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 953 (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 954} 955 956/* 957 * If the given pmap is not the current or kernel pmap, the returned pte must 958 * be released by passing it to pmap_pte_release(). 959 */ 960pt_entry_t * 961pmap_pte(pmap_t pmap, vm_offset_t va) 962{ 963 pd_entry_t newpf; 964 pd_entry_t *pde; 965 966 pde = pmap_pde(pmap, va); 967 if (*pde & PG_PS) 968 return (pde); 969 if (*pde != 0) { 970 /* are we current address space or kernel? */ 971 if (pmap_is_current(pmap)) 972 return (vtopte(va)); 973 mtx_lock(&PMAP2mutex); 974 newpf = *pde & PG_FRAME; 975 if ((*PMAP2 & PG_FRAME) != newpf) { 976 PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); 977 CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", 978 pmap, va, (*PMAP2 & 0xffffffff)); 979 } 980 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 981 } 982 return (NULL); 983} 984 985/* 986 * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 987 * being NULL. 988 */ 989static __inline void 990pmap_pte_release(pt_entry_t *pte) 991{ 992 993 if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { 994 CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", 995 *PMAP2); 996 rw_wlock(&pvh_global_lock); 997 PT_SET_VA(PMAP2, 0, TRUE); 998 rw_wunlock(&pvh_global_lock); 999 mtx_unlock(&PMAP2mutex); 1000 } 1001} 1002 1003static __inline void 1004invlcaddr(void *caddr) 1005{ 1006 1007 invlpg((u_int)caddr); 1008 PT_UPDATES_FLUSH(); 1009} 1010 1011/* 1012 * Super fast pmap_pte routine best used when scanning 1013 * the pv lists. This eliminates many coarse-grained 1014 * invltlb calls. Note that many of the pv list 1015 * scans are across different pmaps. It is very wasteful 1016 * to do an entire invltlb for checking a single mapping. 1017 * 1018 * If the given pmap is not the current pmap, pvh_global_lock 1019 * must be held and curthread pinned to a CPU. 1020 */ 1021static pt_entry_t * 1022pmap_pte_quick(pmap_t pmap, vm_offset_t va) 1023{ 1024 pd_entry_t newpf; 1025 pd_entry_t *pde; 1026 1027 pde = pmap_pde(pmap, va); 1028 if (*pde & PG_PS) 1029 return (pde); 1030 if (*pde != 0) { 1031 /* are we current address space or kernel? */ 1032 if (pmap_is_current(pmap)) 1033 return (vtopte(va)); 1034 rw_assert(&pvh_global_lock, RA_WLOCKED); 1035 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1036 newpf = *pde & PG_FRAME; 1037 if ((*PMAP1 & PG_FRAME) != newpf) { 1038 PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); 1039 CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", 1040 pmap, va, (u_long)*PMAP1); 1041 1042#ifdef SMP 1043 PMAP1cpu = PCPU_GET(cpuid); 1044#endif 1045 PMAP1changed++; 1046 } else 1047#ifdef SMP 1048 if (PMAP1cpu != PCPU_GET(cpuid)) { 1049 PMAP1cpu = PCPU_GET(cpuid); 1050 invlcaddr(PADDR1); 1051 PMAP1changedcpu++; 1052 } else 1053#endif 1054 PMAP1unchanged++; 1055 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1056 } 1057 return (0); 1058} 1059 1060/* 1061 * Routine: pmap_extract 1062 * Function: 1063 * Extract the physical page address associated 1064 * with the given map/virtual_address pair. 1065 */ 1066vm_paddr_t 1067pmap_extract(pmap_t pmap, vm_offset_t va) 1068{ 1069 vm_paddr_t rtval; 1070 pt_entry_t *pte; 1071 pd_entry_t pde; 1072 pt_entry_t pteval; 1073 1074 rtval = 0; 1075 PMAP_LOCK(pmap); 1076 pde = pmap->pm_pdir[va >> PDRSHIFT]; 1077 if (pde != 0) { 1078 if ((pde & PG_PS) != 0) { 1079 rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); 1080 PMAP_UNLOCK(pmap); 1081 return rtval; 1082 } 1083 pte = pmap_pte(pmap, va); 1084 pteval = *pte ? xpmap_mtop(*pte) : 0; 1085 rtval = (pteval & PG_FRAME) | (va & PAGE_MASK); 1086 pmap_pte_release(pte); 1087 } 1088 PMAP_UNLOCK(pmap); 1089 return (rtval); 1090} 1091 1092/* 1093 * Routine: pmap_extract_ma 1094 * Function: 1095 * Like pmap_extract, but returns machine address 1096 */ 1097vm_paddr_t 1098pmap_extract_ma(pmap_t pmap, vm_offset_t va) 1099{ 1100 vm_paddr_t rtval; 1101 pt_entry_t *pte; 1102 pd_entry_t pde; 1103 1104 rtval = 0; 1105 PMAP_LOCK(pmap); 1106 pde = pmap->pm_pdir[va >> PDRSHIFT]; 1107 if (pde != 0) { 1108 if ((pde & PG_PS) != 0) { 1109 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 1110 PMAP_UNLOCK(pmap); 1111 return rtval; 1112 } 1113 pte = pmap_pte(pmap, va); 1114 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1115 pmap_pte_release(pte); 1116 } 1117 PMAP_UNLOCK(pmap); 1118 return (rtval); 1119} 1120 1121/* 1122 * Routine: pmap_extract_and_hold 1123 * Function: 1124 * Atomically extract and hold the physical page 1125 * with the given pmap and virtual address pair 1126 * if that mapping permits the given protection. 1127 */ 1128vm_page_t 1129pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1130{ 1131 pd_entry_t pde; 1132 pt_entry_t pte, *ptep; 1133 vm_page_t m; 1134 vm_paddr_t pa; 1135 1136 pa = 0; 1137 m = NULL; 1138 PMAP_LOCK(pmap); 1139retry: 1140 pde = PT_GET(pmap_pde(pmap, va)); 1141 if (pde != 0) { 1142 if (pde & PG_PS) { 1143 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1144 if (vm_page_pa_tryrelock(pmap, (pde & 1145 PG_PS_FRAME) | (va & PDRMASK), &pa)) 1146 goto retry; 1147 m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1148 (va & PDRMASK)); 1149 vm_page_hold(m); 1150 } 1151 } else { 1152 ptep = pmap_pte(pmap, va); 1153 pte = PT_GET(ptep); 1154 pmap_pte_release(ptep); 1155 if (pte != 0 && 1156 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1157 if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, 1158 &pa)) 1159 goto retry; 1160 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1161 vm_page_hold(m); 1162 } 1163 } 1164 } 1165 PA_UNLOCK_COND(pa); 1166 PMAP_UNLOCK(pmap); 1167 return (m); 1168} 1169 1170/*************************************************** 1171 * Low level mapping routines..... 1172 ***************************************************/ 1173 1174/* 1175 * Add a wired page to the kva. 1176 * Note: not SMP coherent. 1177 * 1178 * This function may be used before pmap_bootstrap() is called. 1179 */ 1180void 1181pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1182{ 1183 1184 PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); 1185} 1186 1187void 1188pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) 1189{ 1190 pt_entry_t *pte; 1191 1192 pte = vtopte(va); 1193 pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); 1194} 1195 1196static __inline void 1197pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1198{ 1199 1200 PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1201} 1202 1203/* 1204 * Remove a page from the kernel pagetables. 1205 * Note: not SMP coherent. 1206 * 1207 * This function may be used before pmap_bootstrap() is called. 1208 */ 1209PMAP_INLINE void 1210pmap_kremove(vm_offset_t va) 1211{ 1212 pt_entry_t *pte; 1213 1214 pte = vtopte(va); 1215 PT_CLEAR_VA(pte, FALSE); 1216} 1217 1218/* 1219 * Used to map a range of physical addresses into kernel 1220 * virtual address space. 1221 * 1222 * The value passed in '*virt' is a suggested virtual address for 1223 * the mapping. Architectures which can support a direct-mapped 1224 * physical to virtual region can return the appropriate address 1225 * within that region, leaving '*virt' unchanged. Other 1226 * architectures should map the pages starting at '*virt' and 1227 * update '*virt' with the first usable address after the mapped 1228 * region. 1229 */ 1230vm_offset_t 1231pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1232{ 1233 vm_offset_t va, sva; 1234 1235 va = sva = *virt; 1236 CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x", 1237 va, start, end, prot); 1238 while (start < end) { 1239 pmap_kenter(va, start); 1240 va += PAGE_SIZE; 1241 start += PAGE_SIZE; 1242 } 1243 pmap_invalidate_range(kernel_pmap, sva, va); 1244 *virt = va; 1245 return (sva); 1246} 1247 1248 1249/* 1250 * Add a list of wired pages to the kva 1251 * this routine is only used for temporary 1252 * kernel mappings that do not need to have 1253 * page modification or references recorded. 1254 * Note that old mappings are simply written 1255 * over. The page *must* be wired. 1256 * Note: SMP coherent. Uses a ranged shootdown IPI. 1257 */ 1258void 1259pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1260{ 1261 pt_entry_t *endpte, *pte; 1262 vm_paddr_t pa; 1263 vm_offset_t va = sva; 1264 int mclcount = 0; 1265 multicall_entry_t mcl[16]; 1266 multicall_entry_t *mclp = mcl; 1267 int error; 1268 1269 CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count); 1270 pte = vtopte(sva); 1271 endpte = pte + count; 1272 while (pte < endpte) { 1273 pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A; 1274 1275 mclp->op = __HYPERVISOR_update_va_mapping; 1276 mclp->args[0] = va; 1277 mclp->args[1] = (uint32_t)(pa & 0xffffffff); 1278 mclp->args[2] = (uint32_t)(pa >> 32); 1279 mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0; 1280 1281 va += PAGE_SIZE; 1282 pte++; 1283 ma++; 1284 mclp++; 1285 mclcount++; 1286 if (mclcount == 16) { 1287 error = HYPERVISOR_multicall(mcl, mclcount); 1288 mclp = mcl; 1289 mclcount = 0; 1290 KASSERT(error == 0, ("bad multicall %d", error)); 1291 } 1292 } 1293 if (mclcount) { 1294 error = HYPERVISOR_multicall(mcl, mclcount); 1295 KASSERT(error == 0, ("bad multicall %d", error)); 1296 } 1297 1298#ifdef INVARIANTS 1299 for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++) 1300 KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE)); 1301#endif 1302} 1303 1304/* 1305 * This routine tears out page mappings from the 1306 * kernel -- it is meant only for temporary mappings. 1307 * Note: SMP coherent. Uses a ranged shootdown IPI. 1308 */ 1309void 1310pmap_qremove(vm_offset_t sva, int count) 1311{ 1312 vm_offset_t va; 1313 1314 CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count); 1315 va = sva; 1316 rw_wlock(&pvh_global_lock); 1317 critical_enter(); 1318 while (count-- > 0) { 1319 pmap_kremove(va); 1320 va += PAGE_SIZE; 1321 } 1322 PT_UPDATES_FLUSH(); 1323 pmap_invalidate_range(kernel_pmap, sva, va); 1324 critical_exit(); 1325 rw_wunlock(&pvh_global_lock); 1326} 1327 1328/*************************************************** 1329 * Page table page management routines..... 1330 ***************************************************/ 1331static __inline void 1332pmap_free_zero_pages(vm_page_t free) 1333{ 1334 vm_page_t m; 1335 1336 while (free != NULL) { 1337 m = free; 1338 free = m->right; 1339 vm_page_free_zero(m); 1340 } 1341} 1342 1343/* 1344 * Decrements a page table page's wire count, which is used to record the 1345 * number of valid page table entries within the page. If the wire count 1346 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1347 * page table page was unmapped and FALSE otherwise. 1348 */ 1349static inline boolean_t 1350pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free) 1351{ 1352 1353 --m->wire_count; 1354 if (m->wire_count == 0) { 1355 _pmap_unwire_ptp(pmap, m, free); 1356 return (TRUE); 1357 } else 1358 return (FALSE); 1359} 1360 1361static void 1362_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free) 1363{ 1364 vm_offset_t pteva; 1365 1366 PT_UPDATES_FLUSH(); 1367 /* 1368 * unmap the page table page 1369 */ 1370 xen_pt_unpin(pmap->pm_pdir[m->pindex]); 1371 /* 1372 * page *might* contain residual mapping :-/ 1373 */ 1374 PD_CLEAR_VA(pmap, m->pindex, TRUE); 1375 pmap_zero_page(m); 1376 --pmap->pm_stats.resident_count; 1377 1378 /* 1379 * This is a release store so that the ordinary store unmapping 1380 * the page table page is globally performed before TLB shoot- 1381 * down is begun. 1382 */ 1383 atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1384 1385 /* 1386 * Do an invltlb to make the invalidated mapping 1387 * take effect immediately. 1388 */ 1389 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1390 pmap_invalidate_page(pmap, pteva); 1391 1392 /* 1393 * Put page on a list so that it is released after 1394 * *ALL* TLB shootdown is done 1395 */ 1396 m->right = *free; 1397 *free = m; 1398} 1399 1400/* 1401 * After removing a page table entry, this routine is used to 1402 * conditionally free the page, and manage the hold/wire counts. 1403 */ 1404static int 1405pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1406{ 1407 pd_entry_t ptepde; 1408 vm_page_t mpte; 1409 1410 if (va >= VM_MAXUSER_ADDRESS) 1411 return (0); 1412 ptepde = PT_GET(pmap_pde(pmap, va)); 1413 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1414 return (pmap_unwire_ptp(pmap, mpte, free)); 1415} 1416 1417/* 1418 * Initialize the pmap for the swapper process. 1419 */ 1420void 1421pmap_pinit0(pmap_t pmap) 1422{ 1423 1424 PMAP_LOCK_INIT(pmap); 1425 /* 1426 * Since the page table directory is shared with the kernel pmap, 1427 * which is already included in the list "allpmaps", this pmap does 1428 * not need to be inserted into that list. 1429 */ 1430 pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1431#ifdef PAE 1432 pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1433#endif 1434 CPU_ZERO(&pmap->pm_active); 1435 PCPU_SET(curpmap, pmap); 1436 TAILQ_INIT(&pmap->pm_pvchunk); 1437 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1438} 1439 1440/* 1441 * Initialize a preallocated and zeroed pmap structure, 1442 * such as one in a vmspace structure. 1443 */ 1444int 1445pmap_pinit(pmap_t pmap) 1446{ 1447 vm_page_t m, ptdpg[NPGPTD + 1]; 1448 int npgptd = NPGPTD + 1; 1449 int i; 1450 1451#ifdef HAMFISTED_LOCKING 1452 mtx_lock(&createdelete_lock); 1453#endif 1454 1455 PMAP_LOCK_INIT(pmap); 1456 1457 /* 1458 * No need to allocate page table space yet but we do need a valid 1459 * page directory table. 1460 */ 1461 if (pmap->pm_pdir == NULL) { 1462 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1463 NBPTD); 1464 if (pmap->pm_pdir == NULL) { 1465 PMAP_LOCK_DESTROY(pmap); 1466#ifdef HAMFISTED_LOCKING 1467 mtx_unlock(&createdelete_lock); 1468#endif 1469 return (0); 1470 } 1471#ifdef PAE 1472 pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1); 1473#endif 1474 } 1475 1476 /* 1477 * allocate the page directory page(s) 1478 */ 1479 for (i = 0; i < npgptd;) { 1480 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1481 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1482 if (m == NULL) 1483 VM_WAIT; 1484 else { 1485 ptdpg[i++] = m; 1486 } 1487 } 1488 1489 pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1490 1491 for (i = 0; i < NPGPTD; i++) 1492 if ((ptdpg[i]->flags & PG_ZERO) == 0) 1493 pagezero(pmap->pm_pdir + (i * NPDEPG)); 1494 1495 mtx_lock_spin(&allpmaps_lock); 1496 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1497 /* Copy the kernel page table directory entries. */ 1498 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1499 mtx_unlock_spin(&allpmaps_lock); 1500 1501#ifdef PAE 1502 pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); 1503 if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) 1504 bzero(pmap->pm_pdpt, PAGE_SIZE); 1505 for (i = 0; i < NPGPTD; i++) { 1506 vm_paddr_t ma; 1507 1508 ma = VM_PAGE_TO_MACH(ptdpg[i]); 1509 pmap->pm_pdpt[i] = ma | PG_V; 1510 1511 } 1512#endif 1513 for (i = 0; i < NPGPTD; i++) { 1514 pt_entry_t *pd; 1515 vm_paddr_t ma; 1516 1517 ma = VM_PAGE_TO_MACH(ptdpg[i]); 1518 pd = pmap->pm_pdir + (i * NPDEPG); 1519 PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); 1520#if 0 1521 xen_pgd_pin(ma); 1522#endif 1523 } 1524 1525#ifdef PAE 1526 PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); 1527#endif 1528 rw_wlock(&pvh_global_lock); 1529 xen_flush_queue(); 1530 xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD])); 1531 for (i = 0; i < NPGPTD; i++) { 1532 vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]); 1533 PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); 1534 } 1535 xen_flush_queue(); 1536 rw_wunlock(&pvh_global_lock); 1537 CPU_ZERO(&pmap->pm_active); 1538 TAILQ_INIT(&pmap->pm_pvchunk); 1539 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1540 1541#ifdef HAMFISTED_LOCKING 1542 mtx_unlock(&createdelete_lock); 1543#endif 1544 return (1); 1545} 1546 1547/* 1548 * this routine is called if the page table page is not 1549 * mapped correctly. 1550 */ 1551static vm_page_t 1552_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags) 1553{ 1554 vm_paddr_t ptema; 1555 vm_page_t m; 1556 1557 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1558 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1559 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1560 1561 /* 1562 * Allocate a page table page. 1563 */ 1564 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1565 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1566 if (flags & M_WAITOK) { 1567 PMAP_UNLOCK(pmap); 1568 rw_wunlock(&pvh_global_lock); 1569 VM_WAIT; 1570 rw_wlock(&pvh_global_lock); 1571 PMAP_LOCK(pmap); 1572 } 1573 1574 /* 1575 * Indicate the need to retry. While waiting, the page table 1576 * page may have been allocated. 1577 */ 1578 return (NULL); 1579 } 1580 if ((m->flags & PG_ZERO) == 0) 1581 pmap_zero_page(m); 1582 1583 /* 1584 * Map the pagetable page into the process address space, if 1585 * it isn't already there. 1586 */ 1587 1588 pmap->pm_stats.resident_count++; 1589 1590 ptema = VM_PAGE_TO_MACH(m); 1591 xen_pt_pin(ptema); 1592 PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], 1593 (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); 1594 1595 KASSERT(pmap->pm_pdir[ptepindex], 1596 ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); 1597 return (m); 1598} 1599 1600static vm_page_t 1601pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1602{ 1603 u_int ptepindex; 1604 pd_entry_t ptema; 1605 vm_page_t m; 1606 1607 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1608 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1609 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1610 1611 /* 1612 * Calculate pagetable page index 1613 */ 1614 ptepindex = va >> PDRSHIFT; 1615retry: 1616 /* 1617 * Get the page directory entry 1618 */ 1619 ptema = pmap->pm_pdir[ptepindex]; 1620 1621 /* 1622 * This supports switching from a 4MB page to a 1623 * normal 4K page. 1624 */ 1625 if (ptema & PG_PS) { 1626 /* 1627 * XXX 1628 */ 1629 pmap->pm_pdir[ptepindex] = 0; 1630 ptema = 0; 1631 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1632 pmap_invalidate_all(kernel_pmap); 1633 } 1634 1635 /* 1636 * If the page table page is mapped, we just increment the 1637 * hold count, and activate it. 1638 */ 1639 if (ptema & PG_V) { 1640 m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 1641 m->wire_count++; 1642 } else { 1643 /* 1644 * Here if the pte page isn't mapped, or if it has 1645 * been deallocated. 1646 */ 1647 CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", 1648 pmap, va, flags); 1649 m = _pmap_allocpte(pmap, ptepindex, flags); 1650 if (m == NULL && (flags & M_WAITOK)) 1651 goto retry; 1652 1653 KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex)); 1654 } 1655 return (m); 1656} 1657 1658 1659/*************************************************** 1660* Pmap allocation/deallocation routines. 1661 ***************************************************/ 1662 1663#ifdef SMP 1664/* 1665 * Deal with a SMP shootdown of other users of the pmap that we are 1666 * trying to dispose of. This can be a bit hairy. 1667 */ 1668static cpuset_t *lazymask; 1669static u_int lazyptd; 1670static volatile u_int lazywait; 1671 1672void pmap_lazyfix_action(void); 1673 1674void 1675pmap_lazyfix_action(void) 1676{ 1677 1678#ifdef COUNT_IPIS 1679 (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1680#endif 1681 if (rcr3() == lazyptd) 1682 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1683 CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); 1684 atomic_store_rel_int(&lazywait, 1); 1685} 1686 1687static void 1688pmap_lazyfix_self(u_int cpuid) 1689{ 1690 1691 if (rcr3() == lazyptd) 1692 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1693 CPU_CLR_ATOMIC(cpuid, lazymask); 1694} 1695 1696 1697static void 1698pmap_lazyfix(pmap_t pmap) 1699{ 1700 cpuset_t mymask, mask; 1701 u_int cpuid, spins; 1702 int lsb; 1703 1704 mask = pmap->pm_active; 1705 while (!CPU_EMPTY(&mask)) { 1706 spins = 50000000; 1707 1708 /* Find least significant set bit. */ 1709 lsb = cpusetobj_ffs(&mask); 1710 MPASS(lsb != 0); 1711 lsb--; 1712 CPU_SETOF(lsb, &mask); 1713 mtx_lock_spin(&smp_ipi_mtx); 1714#ifdef PAE 1715 lazyptd = vtophys(pmap->pm_pdpt); 1716#else 1717 lazyptd = vtophys(pmap->pm_pdir); 1718#endif 1719 cpuid = PCPU_GET(cpuid); 1720 1721 /* Use a cpuset just for having an easy check. */ 1722 CPU_SETOF(cpuid, &mymask); 1723 if (!CPU_CMP(&mask, &mymask)) { 1724 lazymask = &pmap->pm_active; 1725 pmap_lazyfix_self(cpuid); 1726 } else { 1727 atomic_store_rel_int((u_int *)&lazymask, 1728 (u_int)&pmap->pm_active); 1729 atomic_store_rel_int(&lazywait, 0); 1730 ipi_selected(mask, IPI_LAZYPMAP); 1731 while (lazywait == 0) { 1732 ia32_pause(); 1733 if (--spins == 0) 1734 break; 1735 } 1736 } 1737 mtx_unlock_spin(&smp_ipi_mtx); 1738 if (spins == 0) 1739 printf("pmap_lazyfix: spun for 50000000\n"); 1740 mask = pmap->pm_active; 1741 } 1742} 1743 1744#else /* SMP */ 1745 1746/* 1747 * Cleaning up on uniprocessor is easy. For various reasons, we're 1748 * unlikely to have to even execute this code, including the fact 1749 * that the cleanup is deferred until the parent does a wait(2), which 1750 * means that another userland process has run. 1751 */ 1752static void 1753pmap_lazyfix(pmap_t pmap) 1754{ 1755 u_int cr3; 1756 1757 cr3 = vtophys(pmap->pm_pdir); 1758 if (cr3 == rcr3()) { 1759 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1760 CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 1761 } 1762} 1763#endif /* SMP */ 1764 1765/* 1766 * Release any resources held by the given physical map. 1767 * Called when a pmap initialized by pmap_pinit is being released. 1768 * Should only be called if the map contains no valid mappings. 1769 */ 1770void 1771pmap_release(pmap_t pmap) 1772{ 1773 vm_page_t m, ptdpg[2*NPGPTD+1]; 1774 vm_paddr_t ma; 1775 int i; 1776#ifdef PAE 1777 int npgptd = NPGPTD + 1; 1778#else 1779 int npgptd = NPGPTD; 1780#endif 1781 1782 KASSERT(pmap->pm_stats.resident_count == 0, 1783 ("pmap_release: pmap resident count %ld != 0", 1784 pmap->pm_stats.resident_count)); 1785 PT_UPDATES_FLUSH(); 1786 1787#ifdef HAMFISTED_LOCKING 1788 mtx_lock(&createdelete_lock); 1789#endif 1790 1791 pmap_lazyfix(pmap); 1792 mtx_lock_spin(&allpmaps_lock); 1793 LIST_REMOVE(pmap, pm_list); 1794 mtx_unlock_spin(&allpmaps_lock); 1795 1796 for (i = 0; i < NPGPTD; i++) 1797 ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME); 1798 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1799#ifdef PAE 1800 ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); 1801#endif 1802 1803 for (i = 0; i < npgptd; i++) { 1804 m = ptdpg[i]; 1805 ma = VM_PAGE_TO_MACH(m); 1806 /* unpinning L1 and L2 treated the same */ 1807#if 0 1808 xen_pgd_unpin(ma); 1809#else 1810 if (i == NPGPTD) 1811 xen_pgd_unpin(ma); 1812#endif 1813#ifdef PAE 1814 if (i < NPGPTD) 1815 KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1816 ("pmap_release: got wrong ptd page")); 1817#endif 1818 m->wire_count--; 1819 atomic_subtract_int(&cnt.v_wire_count, 1); 1820 vm_page_free(m); 1821 } 1822#ifdef PAE 1823 pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1); 1824#endif 1825 PMAP_LOCK_DESTROY(pmap); 1826 1827#ifdef HAMFISTED_LOCKING 1828 mtx_unlock(&createdelete_lock); 1829#endif 1830} 1831 1832static int 1833kvm_size(SYSCTL_HANDLER_ARGS) 1834{ 1835 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1836 1837 return (sysctl_handle_long(oidp, &ksize, 0, req)); 1838} 1839SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1840 0, 0, kvm_size, "IU", "Size of KVM"); 1841 1842static int 1843kvm_free(SYSCTL_HANDLER_ARGS) 1844{ 1845 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1846 1847 return (sysctl_handle_long(oidp, &kfree, 0, req)); 1848} 1849SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1850 0, 0, kvm_free, "IU", "Amount of KVM free"); 1851 1852/* 1853 * grow the number of kernel page table entries, if needed 1854 */ 1855void 1856pmap_growkernel(vm_offset_t addr) 1857{ 1858 struct pmap *pmap; 1859 vm_paddr_t ptppaddr; 1860 vm_page_t nkpg; 1861 pd_entry_t newpdir; 1862 1863 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1864 if (kernel_vm_end == 0) { 1865 kernel_vm_end = KERNBASE; 1866 nkpt = 0; 1867 while (pdir_pde(PTD, kernel_vm_end)) { 1868 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1869 nkpt++; 1870 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1871 kernel_vm_end = kernel_map->max_offset; 1872 break; 1873 } 1874 } 1875 } 1876 addr = roundup2(addr, NBPDR); 1877 if (addr - 1 >= kernel_map->max_offset) 1878 addr = kernel_map->max_offset; 1879 while (kernel_vm_end < addr) { 1880 if (pdir_pde(PTD, kernel_vm_end)) { 1881 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1882 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1883 kernel_vm_end = kernel_map->max_offset; 1884 break; 1885 } 1886 continue; 1887 } 1888 1889 nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, 1890 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1891 VM_ALLOC_ZERO); 1892 if (nkpg == NULL) 1893 panic("pmap_growkernel: no memory to grow kernel"); 1894 1895 nkpt++; 1896 1897 if ((nkpg->flags & PG_ZERO) == 0) 1898 pmap_zero_page(nkpg); 1899 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1900 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1901 rw_wlock(&pvh_global_lock); 1902 PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1903 mtx_lock_spin(&allpmaps_lock); 1904 LIST_FOREACH(pmap, &allpmaps, pm_list) 1905 PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1906 1907 mtx_unlock_spin(&allpmaps_lock); 1908 rw_wunlock(&pvh_global_lock); 1909 1910 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1911 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1912 kernel_vm_end = kernel_map->max_offset; 1913 break; 1914 } 1915 } 1916} 1917 1918 1919/*************************************************** 1920 * page management routines. 1921 ***************************************************/ 1922 1923CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1924CTASSERT(_NPCM == 11); 1925CTASSERT(_NPCPV == 336); 1926 1927static __inline struct pv_chunk * 1928pv_to_chunk(pv_entry_t pv) 1929{ 1930 1931 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1932} 1933 1934#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1935 1936#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1937#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1938 1939static const uint32_t pc_freemask[_NPCM] = { 1940 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1941 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1942 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1943 PC_FREE0_9, PC_FREE10 1944}; 1945 1946SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1947 "Current number of pv entries"); 1948 1949#ifdef PV_STATS 1950static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1951 1952SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1953 "Current number of pv entry chunks"); 1954SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1955 "Current number of pv entry chunks allocated"); 1956SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1957 "Current number of pv entry chunks frees"); 1958SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1959 "Number of times tried to get a chunk page but failed."); 1960 1961static long pv_entry_frees, pv_entry_allocs; 1962static int pv_entry_spare; 1963 1964SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1965 "Current number of pv entry frees"); 1966SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1967 "Current number of pv entry allocs"); 1968SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1969 "Current number of spare pv entries"); 1970#endif 1971 1972/* 1973 * We are in a serious low memory condition. Resort to 1974 * drastic measures to free some pages so we can allocate 1975 * another pv entry chunk. 1976 */ 1977static vm_page_t 1978pmap_pv_reclaim(pmap_t locked_pmap) 1979{ 1980 struct pch newtail; 1981 struct pv_chunk *pc; 1982 pmap_t pmap; 1983 pt_entry_t *pte, tpte; 1984 pv_entry_t pv; 1985 vm_offset_t va; 1986 vm_page_t free, m, m_pc; 1987 uint32_t inuse; 1988 int bit, field, freed; 1989 1990 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1991 pmap = NULL; 1992 free = m_pc = NULL; 1993 TAILQ_INIT(&newtail); 1994 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 1995 free == NULL)) { 1996 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1997 if (pmap != pc->pc_pmap) { 1998 if (pmap != NULL) { 1999 pmap_invalidate_all(pmap); 2000 if (pmap != locked_pmap) 2001 PMAP_UNLOCK(pmap); 2002 } 2003 pmap = pc->pc_pmap; 2004 /* Avoid deadlock and lock recursion. */ 2005 if (pmap > locked_pmap) 2006 PMAP_LOCK(pmap); 2007 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 2008 pmap = NULL; 2009 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2010 continue; 2011 } 2012 } 2013 2014 /* 2015 * Destroy every non-wired, 4 KB page mapping in the chunk. 2016 */ 2017 freed = 0; 2018 for (field = 0; field < _NPCM; field++) { 2019 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 2020 inuse != 0; inuse &= ~(1UL << bit)) { 2021 bit = bsfl(inuse); 2022 pv = &pc->pc_pventry[field * 32 + bit]; 2023 va = pv->pv_va; 2024 pte = pmap_pte(pmap, va); 2025 tpte = *pte; 2026 if ((tpte & PG_W) == 0) 2027 tpte = pte_load_clear(pte); 2028 pmap_pte_release(pte); 2029 if ((tpte & PG_W) != 0) 2030 continue; 2031 KASSERT(tpte != 0, 2032 ("pmap_pv_reclaim: pmap %p va %x zero pte", 2033 pmap, va)); 2034 if ((tpte & PG_G) != 0) 2035 pmap_invalidate_page(pmap, va); 2036 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 2037 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2038 vm_page_dirty(m); 2039 if ((tpte & PG_A) != 0) 2040 vm_page_aflag_set(m, PGA_REFERENCED); 2041 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2042 if (TAILQ_EMPTY(&m->md.pv_list)) 2043 vm_page_aflag_clear(m, PGA_WRITEABLE); 2044 pc->pc_map[field] |= 1UL << bit; 2045 pmap_unuse_pt(pmap, va, &free); 2046 freed++; 2047 } 2048 } 2049 if (freed == 0) { 2050 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2051 continue; 2052 } 2053 /* Every freed mapping is for a 4 KB page. */ 2054 pmap->pm_stats.resident_count -= freed; 2055 PV_STAT(pv_entry_frees += freed); 2056 PV_STAT(pv_entry_spare += freed); 2057 pv_entry_count -= freed; 2058 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2059 for (field = 0; field < _NPCM; field++) 2060 if (pc->pc_map[field] != pc_freemask[field]) { 2061 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2062 pc_list); 2063 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2064 2065 /* 2066 * One freed pv entry in locked_pmap is 2067 * sufficient. 2068 */ 2069 if (pmap == locked_pmap) 2070 goto out; 2071 break; 2072 } 2073 if (field == _NPCM) { 2074 PV_STAT(pv_entry_spare -= _NPCPV); 2075 PV_STAT(pc_chunk_count--); 2076 PV_STAT(pc_chunk_frees++); 2077 /* Entire chunk is free; return it. */ 2078 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2079 pmap_qremove((vm_offset_t)pc, 1); 2080 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2081 break; 2082 } 2083 } 2084out: 2085 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 2086 if (pmap != NULL) { 2087 pmap_invalidate_all(pmap); 2088 if (pmap != locked_pmap) 2089 PMAP_UNLOCK(pmap); 2090 } 2091 if (m_pc == NULL && pv_vafree != 0 && free != NULL) { 2092 m_pc = free; 2093 free = m_pc->right; 2094 /* Recycle a freed page table page. */ 2095 m_pc->wire_count = 1; 2096 atomic_add_int(&cnt.v_wire_count, 1); 2097 } 2098 pmap_free_zero_pages(free); 2099 return (m_pc); 2100} 2101 2102/* 2103 * free the pv_entry back to the free list 2104 */ 2105static void 2106free_pv_entry(pmap_t pmap, pv_entry_t pv) 2107{ 2108 struct pv_chunk *pc; 2109 int idx, field, bit; 2110 2111 rw_assert(&pvh_global_lock, RA_WLOCKED); 2112 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2113 PV_STAT(pv_entry_frees++); 2114 PV_STAT(pv_entry_spare++); 2115 pv_entry_count--; 2116 pc = pv_to_chunk(pv); 2117 idx = pv - &pc->pc_pventry[0]; 2118 field = idx / 32; 2119 bit = idx % 32; 2120 pc->pc_map[field] |= 1ul << bit; 2121 for (idx = 0; idx < _NPCM; idx++) 2122 if (pc->pc_map[idx] != pc_freemask[idx]) { 2123 /* 2124 * 98% of the time, pc is already at the head of the 2125 * list. If it isn't already, move it to the head. 2126 */ 2127 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 2128 pc)) { 2129 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2130 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2131 pc_list); 2132 } 2133 return; 2134 } 2135 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2136 free_pv_chunk(pc); 2137} 2138 2139static void 2140free_pv_chunk(struct pv_chunk *pc) 2141{ 2142 vm_page_t m; 2143 2144 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2145 PV_STAT(pv_entry_spare -= _NPCPV); 2146 PV_STAT(pc_chunk_count--); 2147 PV_STAT(pc_chunk_frees++); 2148 /* entire chunk is free, return it */ 2149 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2150 pmap_qremove((vm_offset_t)pc, 1); 2151 vm_page_unwire(m, 0); 2152 vm_page_free(m); 2153 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2154} 2155 2156/* 2157 * get a new pv_entry, allocating a block from the system 2158 * when needed. 2159 */ 2160static pv_entry_t 2161get_pv_entry(pmap_t pmap, boolean_t try) 2162{ 2163 static const struct timeval printinterval = { 60, 0 }; 2164 static struct timeval lastprint; 2165 int bit, field; 2166 pv_entry_t pv; 2167 struct pv_chunk *pc; 2168 vm_page_t m; 2169 2170 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2171 rw_assert(&pvh_global_lock, RA_WLOCKED); 2172 PV_STAT(pv_entry_allocs++); 2173 pv_entry_count++; 2174 if (pv_entry_count > pv_entry_high_water) 2175 if (ratecheck(&lastprint, &printinterval)) 2176 printf("Approaching the limit on PV entries, consider " 2177 "increasing either the vm.pmap.shpgperproc or the " 2178 "vm.pmap.pv_entry_max tunable.\n"); 2179retry: 2180 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2181 if (pc != NULL) { 2182 for (field = 0; field < _NPCM; field++) { 2183 if (pc->pc_map[field]) { 2184 bit = bsfl(pc->pc_map[field]); 2185 break; 2186 } 2187 } 2188 if (field < _NPCM) { 2189 pv = &pc->pc_pventry[field * 32 + bit]; 2190 pc->pc_map[field] &= ~(1ul << bit); 2191 /* If this was the last item, move it to tail */ 2192 for (field = 0; field < _NPCM; field++) 2193 if (pc->pc_map[field] != 0) { 2194 PV_STAT(pv_entry_spare--); 2195 return (pv); /* not full, return */ 2196 } 2197 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2198 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2199 PV_STAT(pv_entry_spare--); 2200 return (pv); 2201 } 2202 } 2203 /* 2204 * Access to the ptelist "pv_vafree" is synchronized by the page 2205 * queues lock. If "pv_vafree" is currently non-empty, it will 2206 * remain non-empty until pmap_ptelist_alloc() completes. 2207 */ 2208 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2209 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2210 if (try) { 2211 pv_entry_count--; 2212 PV_STAT(pc_chunk_tryfail++); 2213 return (NULL); 2214 } 2215 m = pmap_pv_reclaim(pmap); 2216 if (m == NULL) 2217 goto retry; 2218 } 2219 PV_STAT(pc_chunk_count++); 2220 PV_STAT(pc_chunk_allocs++); 2221 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2222 pmap_qenter((vm_offset_t)pc, &m, 1); 2223 if ((m->flags & PG_ZERO) == 0) 2224 pagezero(pc); 2225 pc->pc_pmap = pmap; 2226 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2227 for (field = 1; field < _NPCM; field++) 2228 pc->pc_map[field] = pc_freemask[field]; 2229 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2230 pv = &pc->pc_pventry[0]; 2231 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2232 PV_STAT(pv_entry_spare += _NPCPV - 1); 2233 return (pv); 2234} 2235 2236static __inline pv_entry_t 2237pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2238{ 2239 pv_entry_t pv; 2240 2241 rw_assert(&pvh_global_lock, RA_WLOCKED); 2242 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 2243 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2244 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2245 break; 2246 } 2247 } 2248 return (pv); 2249} 2250 2251static void 2252pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2253{ 2254 pv_entry_t pv; 2255 2256 pv = pmap_pvh_remove(pvh, pmap, va); 2257 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2258 free_pv_entry(pmap, pv); 2259} 2260 2261static void 2262pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2263{ 2264 2265 rw_assert(&pvh_global_lock, RA_WLOCKED); 2266 pmap_pvh_free(&m->md, pmap, va); 2267 if (TAILQ_EMPTY(&m->md.pv_list)) 2268 vm_page_aflag_clear(m, PGA_WRITEABLE); 2269} 2270 2271/* 2272 * Conditionally create a pv entry. 2273 */ 2274static boolean_t 2275pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2276{ 2277 pv_entry_t pv; 2278 2279 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2280 rw_assert(&pvh_global_lock, RA_WLOCKED); 2281 if (pv_entry_count < pv_entry_high_water && 2282 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2283 pv->pv_va = va; 2284 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2285 return (TRUE); 2286 } else 2287 return (FALSE); 2288} 2289 2290/* 2291 * pmap_remove_pte: do the things to unmap a page in a process 2292 */ 2293static int 2294pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2295{ 2296 pt_entry_t oldpte; 2297 vm_page_t m; 2298 2299 CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", 2300 pmap, (u_long)*ptq, va); 2301 2302 rw_assert(&pvh_global_lock, RA_WLOCKED); 2303 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2304 oldpte = *ptq; 2305 PT_SET_VA_MA(ptq, 0, TRUE); 2306 KASSERT(oldpte != 0, 2307 ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va)); 2308 if (oldpte & PG_W) 2309 pmap->pm_stats.wired_count -= 1; 2310 /* 2311 * Machines that don't support invlpg, also don't support 2312 * PG_G. 2313 */ 2314 if (oldpte & PG_G) 2315 pmap_invalidate_page(kernel_pmap, va); 2316 pmap->pm_stats.resident_count -= 1; 2317 if (oldpte & PG_MANAGED) { 2318 m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); 2319 if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2320 vm_page_dirty(m); 2321 if (oldpte & PG_A) 2322 vm_page_aflag_set(m, PGA_REFERENCED); 2323 pmap_remove_entry(pmap, m, va); 2324 } 2325 return (pmap_unuse_pt(pmap, va, free)); 2326} 2327 2328/* 2329 * Remove a single page from a process address space 2330 */ 2331static void 2332pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2333{ 2334 pt_entry_t *pte; 2335 2336 CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", 2337 pmap, va); 2338 2339 rw_assert(&pvh_global_lock, RA_WLOCKED); 2340 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2341 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2342 if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0) 2343 return; 2344 pmap_remove_pte(pmap, pte, va, free); 2345 pmap_invalidate_page(pmap, va); 2346 if (*PMAP1) 2347 PT_SET_MA(PADDR1, 0); 2348 2349} 2350 2351/* 2352 * Remove the given range of addresses from the specified map. 2353 * 2354 * It is assumed that the start and end are properly 2355 * rounded to the page size. 2356 */ 2357void 2358pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2359{ 2360 vm_offset_t pdnxt; 2361 pd_entry_t ptpaddr; 2362 pt_entry_t *pte; 2363 vm_page_t free = NULL; 2364 int anyvalid; 2365 2366 CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", 2367 pmap, sva, eva); 2368 2369 /* 2370 * Perform an unsynchronized read. This is, however, safe. 2371 */ 2372 if (pmap->pm_stats.resident_count == 0) 2373 return; 2374 2375 anyvalid = 0; 2376 2377 rw_wlock(&pvh_global_lock); 2378 sched_pin(); 2379 PMAP_LOCK(pmap); 2380 2381 /* 2382 * special handling of removing one page. a very 2383 * common operation and easy to short circuit some 2384 * code. 2385 */ 2386 if ((sva + PAGE_SIZE == eva) && 2387 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2388 pmap_remove_page(pmap, sva, &free); 2389 goto out; 2390 } 2391 2392 for (; sva < eva; sva = pdnxt) { 2393 u_int pdirindex; 2394 2395 /* 2396 * Calculate index for next page table. 2397 */ 2398 pdnxt = (sva + NBPDR) & ~PDRMASK; 2399 if (pdnxt < sva) 2400 pdnxt = eva; 2401 if (pmap->pm_stats.resident_count == 0) 2402 break; 2403 2404 pdirindex = sva >> PDRSHIFT; 2405 ptpaddr = pmap->pm_pdir[pdirindex]; 2406 2407 /* 2408 * Weed out invalid mappings. Note: we assume that the page 2409 * directory table is always allocated, and in kernel virtual. 2410 */ 2411 if (ptpaddr == 0) 2412 continue; 2413 2414 /* 2415 * Check for large page. 2416 */ 2417 if ((ptpaddr & PG_PS) != 0) { 2418 PD_CLEAR_VA(pmap, pdirindex, TRUE); 2419 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2420 anyvalid = 1; 2421 continue; 2422 } 2423 2424 /* 2425 * Limit our scan to either the end of the va represented 2426 * by the current page table page, or to the end of the 2427 * range being removed. 2428 */ 2429 if (pdnxt > eva) 2430 pdnxt = eva; 2431 2432 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2433 sva += PAGE_SIZE) { 2434 if ((*pte & PG_V) == 0) 2435 continue; 2436 2437 /* 2438 * The TLB entry for a PG_G mapping is invalidated 2439 * by pmap_remove_pte(). 2440 */ 2441 if ((*pte & PG_G) == 0) 2442 anyvalid = 1; 2443 if (pmap_remove_pte(pmap, pte, sva, &free)) 2444 break; 2445 } 2446 } 2447 PT_UPDATES_FLUSH(); 2448 if (*PMAP1) 2449 PT_SET_VA_MA(PMAP1, 0, TRUE); 2450out: 2451 if (anyvalid) 2452 pmap_invalidate_all(pmap); 2453 sched_unpin(); 2454 rw_wunlock(&pvh_global_lock); 2455 PMAP_UNLOCK(pmap); 2456 pmap_free_zero_pages(free); 2457} 2458 2459/* 2460 * Routine: pmap_remove_all 2461 * Function: 2462 * Removes this physical page from 2463 * all physical maps in which it resides. 2464 * Reflects back modify bits to the pager. 2465 * 2466 * Notes: 2467 * Original versions of this routine were very 2468 * inefficient because they iteratively called 2469 * pmap_remove (slow...) 2470 */ 2471 2472void 2473pmap_remove_all(vm_page_t m) 2474{ 2475 pv_entry_t pv; 2476 pmap_t pmap; 2477 pt_entry_t *pte, tpte; 2478 vm_page_t free; 2479 2480 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2481 ("pmap_remove_all: page %p is not managed", m)); 2482 free = NULL; 2483 rw_wlock(&pvh_global_lock); 2484 sched_pin(); 2485 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2486 pmap = PV_PMAP(pv); 2487 PMAP_LOCK(pmap); 2488 pmap->pm_stats.resident_count--; 2489 pte = pmap_pte_quick(pmap, pv->pv_va); 2490 tpte = *pte; 2491 PT_SET_VA_MA(pte, 0, TRUE); 2492 KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte", 2493 pmap, pv->pv_va)); 2494 if (tpte & PG_W) 2495 pmap->pm_stats.wired_count--; 2496 if (tpte & PG_A) 2497 vm_page_aflag_set(m, PGA_REFERENCED); 2498 2499 /* 2500 * Update the vm_page_t clean and reference bits. 2501 */ 2502 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2503 vm_page_dirty(m); 2504 pmap_unuse_pt(pmap, pv->pv_va, &free); 2505 pmap_invalidate_page(pmap, pv->pv_va); 2506 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2507 free_pv_entry(pmap, pv); 2508 PMAP_UNLOCK(pmap); 2509 } 2510 vm_page_aflag_clear(m, PGA_WRITEABLE); 2511 PT_UPDATES_FLUSH(); 2512 if (*PMAP1) 2513 PT_SET_MA(PADDR1, 0); 2514 sched_unpin(); 2515 rw_wunlock(&pvh_global_lock); 2516 pmap_free_zero_pages(free); 2517} 2518 2519/* 2520 * Set the physical protection on the 2521 * specified range of this map as requested. 2522 */ 2523void 2524pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2525{ 2526 vm_offset_t pdnxt; 2527 pd_entry_t ptpaddr; 2528 pt_entry_t *pte; 2529 int anychanged; 2530 2531 CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", 2532 pmap, sva, eva, prot); 2533 2534 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2535 pmap_remove(pmap, sva, eva); 2536 return; 2537 } 2538 2539#ifdef PAE 2540 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2541 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2542 return; 2543#else 2544 if (prot & VM_PROT_WRITE) 2545 return; 2546#endif 2547 2548 anychanged = 0; 2549 2550 rw_wlock(&pvh_global_lock); 2551 sched_pin(); 2552 PMAP_LOCK(pmap); 2553 for (; sva < eva; sva = pdnxt) { 2554 pt_entry_t obits, pbits; 2555 u_int pdirindex; 2556 2557 pdnxt = (sva + NBPDR) & ~PDRMASK; 2558 if (pdnxt < sva) 2559 pdnxt = eva; 2560 2561 pdirindex = sva >> PDRSHIFT; 2562 ptpaddr = pmap->pm_pdir[pdirindex]; 2563 2564 /* 2565 * Weed out invalid mappings. Note: we assume that the page 2566 * directory table is always allocated, and in kernel virtual. 2567 */ 2568 if (ptpaddr == 0) 2569 continue; 2570 2571 /* 2572 * Check for large page. 2573 */ 2574 if ((ptpaddr & PG_PS) != 0) { 2575 if ((prot & VM_PROT_WRITE) == 0) 2576 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2577#ifdef PAE 2578 if ((prot & VM_PROT_EXECUTE) == 0) 2579 pmap->pm_pdir[pdirindex] |= pg_nx; 2580#endif 2581 anychanged = 1; 2582 continue; 2583 } 2584 2585 if (pdnxt > eva) 2586 pdnxt = eva; 2587 2588 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2589 sva += PAGE_SIZE) { 2590 vm_page_t m; 2591 2592retry: 2593 /* 2594 * Regardless of whether a pte is 32 or 64 bits in 2595 * size, PG_RW, PG_A, and PG_M are among the least 2596 * significant 32 bits. 2597 */ 2598 obits = pbits = *pte; 2599 if ((pbits & PG_V) == 0) 2600 continue; 2601 2602 if ((prot & VM_PROT_WRITE) == 0) { 2603 if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == 2604 (PG_MANAGED | PG_M | PG_RW)) { 2605 m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & 2606 PG_FRAME); 2607 vm_page_dirty(m); 2608 } 2609 pbits &= ~(PG_RW | PG_M); 2610 } 2611#ifdef PAE 2612 if ((prot & VM_PROT_EXECUTE) == 0) 2613 pbits |= pg_nx; 2614#endif 2615 2616 if (pbits != obits) { 2617 obits = *pte; 2618 PT_SET_VA_MA(pte, pbits, TRUE); 2619 if (*pte != pbits) 2620 goto retry; 2621 if (obits & PG_G) 2622 pmap_invalidate_page(pmap, sva); 2623 else 2624 anychanged = 1; 2625 } 2626 } 2627 } 2628 PT_UPDATES_FLUSH(); 2629 if (*PMAP1) 2630 PT_SET_VA_MA(PMAP1, 0, TRUE); 2631 if (anychanged) 2632 pmap_invalidate_all(pmap); 2633 sched_unpin(); 2634 rw_wunlock(&pvh_global_lock); 2635 PMAP_UNLOCK(pmap); 2636} 2637 2638/* 2639 * Insert the given physical page (p) at 2640 * the specified virtual address (v) in the 2641 * target physical map with the protection requested. 2642 * 2643 * If specified, the page will be wired down, meaning 2644 * that the related pte can not be reclaimed. 2645 * 2646 * NB: This is the only routine which MAY NOT lazy-evaluate 2647 * or lose information. That is, this routine must actually 2648 * insert this page into the given map NOW. 2649 */ 2650void 2651pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2652 vm_prot_t prot, boolean_t wired) 2653{ 2654 pd_entry_t *pde; 2655 pt_entry_t *pte; 2656 pt_entry_t newpte, origpte; 2657 pv_entry_t pv; 2658 vm_paddr_t opa, pa; 2659 vm_page_t mpte, om; 2660 boolean_t invlva; 2661 2662 CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d", 2663 pmap, va, access, VM_PAGE_TO_MACH(m), prot, wired); 2664 va = trunc_page(va); 2665 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2666 KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 2667 ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", 2668 va)); 2669 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || 2670 VM_OBJECT_LOCKED(m->object), 2671 ("pmap_enter: page %p is not busy", m)); 2672 2673 mpte = NULL; 2674 2675 rw_wlock(&pvh_global_lock); 2676 PMAP_LOCK(pmap); 2677 sched_pin(); 2678 2679 /* 2680 * In the case that a page table page is not 2681 * resident, we are creating it here. 2682 */ 2683 if (va < VM_MAXUSER_ADDRESS) { 2684 mpte = pmap_allocpte(pmap, va, M_WAITOK); 2685 } 2686 2687 pde = pmap_pde(pmap, va); 2688 if ((*pde & PG_PS) != 0) 2689 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2690 pte = pmap_pte_quick(pmap, va); 2691 2692 /* 2693 * Page Directory table entry not valid, we need a new PT page 2694 */ 2695 if (pte == NULL) { 2696 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 2697 (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); 2698 } 2699 2700 pa = VM_PAGE_TO_PHYS(m); 2701 om = NULL; 2702 opa = origpte = 0; 2703 2704#if 0 2705 KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx", 2706 pte, *pte)); 2707#endif 2708 origpte = *pte; 2709 if (origpte) 2710 origpte = xpmap_mtop(origpte); 2711 opa = origpte & PG_FRAME; 2712 2713 /* 2714 * Mapping has not changed, must be protection or wiring change. 2715 */ 2716 if (origpte && (opa == pa)) { 2717 /* 2718 * Wiring change, just update stats. We don't worry about 2719 * wiring PT pages as they remain resident as long as there 2720 * are valid mappings in them. Hence, if a user page is wired, 2721 * the PT page will be also. 2722 */ 2723 if (wired && ((origpte & PG_W) == 0)) 2724 pmap->pm_stats.wired_count++; 2725 else if (!wired && (origpte & PG_W)) 2726 pmap->pm_stats.wired_count--; 2727 2728 /* 2729 * Remove extra pte reference 2730 */ 2731 if (mpte) 2732 mpte->wire_count--; 2733 2734 if (origpte & PG_MANAGED) { 2735 om = m; 2736 pa |= PG_MANAGED; 2737 } 2738 goto validate; 2739 } 2740 2741 pv = NULL; 2742 2743 /* 2744 * Mapping has changed, invalidate old range and fall through to 2745 * handle validating new mapping. 2746 */ 2747 if (opa) { 2748 if (origpte & PG_W) 2749 pmap->pm_stats.wired_count--; 2750 if (origpte & PG_MANAGED) { 2751 om = PHYS_TO_VM_PAGE(opa); 2752 pv = pmap_pvh_remove(&om->md, pmap, va); 2753 } else if (va < VM_MAXUSER_ADDRESS) 2754 printf("va=0x%x is unmanaged :-( \n", va); 2755 2756 if (mpte != NULL) { 2757 mpte->wire_count--; 2758 KASSERT(mpte->wire_count > 0, 2759 ("pmap_enter: missing reference to page table page," 2760 " va: 0x%x", va)); 2761 } 2762 } else 2763 pmap->pm_stats.resident_count++; 2764 2765 /* 2766 * Enter on the PV list if part of our managed memory. 2767 */ 2768 if ((m->oflags & VPO_UNMANAGED) == 0) { 2769 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 2770 ("pmap_enter: managed mapping within the clean submap")); 2771 if (pv == NULL) 2772 pv = get_pv_entry(pmap, FALSE); 2773 pv->pv_va = va; 2774 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2775 pa |= PG_MANAGED; 2776 } else if (pv != NULL) 2777 free_pv_entry(pmap, pv); 2778 2779 /* 2780 * Increment counters 2781 */ 2782 if (wired) 2783 pmap->pm_stats.wired_count++; 2784 2785validate: 2786 /* 2787 * Now validate mapping with desired protection/wiring. 2788 */ 2789 newpte = (pt_entry_t)(pa | PG_V); 2790 if ((prot & VM_PROT_WRITE) != 0) { 2791 newpte |= PG_RW; 2792 if ((newpte & PG_MANAGED) != 0) 2793 vm_page_aflag_set(m, PGA_WRITEABLE); 2794 } 2795#ifdef PAE 2796 if ((prot & VM_PROT_EXECUTE) == 0) 2797 newpte |= pg_nx; 2798#endif 2799 if (wired) 2800 newpte |= PG_W; 2801 if (va < VM_MAXUSER_ADDRESS) 2802 newpte |= PG_U; 2803 if (pmap == kernel_pmap) 2804 newpte |= pgeflag; 2805 2806 critical_enter(); 2807 /* 2808 * if the mapping or permission bits are different, we need 2809 * to update the pte. 2810 */ 2811 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2812 if (origpte) { 2813 invlva = FALSE; 2814 origpte = *pte; 2815 PT_SET_VA(pte, newpte | PG_A, FALSE); 2816 if (origpte & PG_A) { 2817 if (origpte & PG_MANAGED) 2818 vm_page_aflag_set(om, PGA_REFERENCED); 2819 if (opa != VM_PAGE_TO_PHYS(m)) 2820 invlva = TRUE; 2821#ifdef PAE 2822 if ((origpte & PG_NX) == 0 && 2823 (newpte & PG_NX) != 0) 2824 invlva = TRUE; 2825#endif 2826 } 2827 if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 2828 if ((origpte & PG_MANAGED) != 0) 2829 vm_page_dirty(om); 2830 if ((prot & VM_PROT_WRITE) == 0) 2831 invlva = TRUE; 2832 } 2833 if ((origpte & PG_MANAGED) != 0 && 2834 TAILQ_EMPTY(&om->md.pv_list)) 2835 vm_page_aflag_clear(om, PGA_WRITEABLE); 2836 if (invlva) 2837 pmap_invalidate_page(pmap, va); 2838 } else{ 2839 PT_SET_VA(pte, newpte | PG_A, FALSE); 2840 } 2841 2842 } 2843 PT_UPDATES_FLUSH(); 2844 critical_exit(); 2845 if (*PMAP1) 2846 PT_SET_VA_MA(PMAP1, 0, TRUE); 2847 sched_unpin(); 2848 rw_wunlock(&pvh_global_lock); 2849 PMAP_UNLOCK(pmap); 2850} 2851 2852/* 2853 * Maps a sequence of resident pages belonging to the same object. 2854 * The sequence begins with the given page m_start. This page is 2855 * mapped at the given virtual address start. Each subsequent page is 2856 * mapped at a virtual address that is offset from start by the same 2857 * amount as the page is offset from m_start within the object. The 2858 * last page in the sequence is the page with the largest offset from 2859 * m_start that can be mapped at a virtual address less than the given 2860 * virtual address end. Not every virtual page between start and end 2861 * is mapped; only those for which a resident page exists with the 2862 * corresponding offset from m_start are mapped. 2863 */ 2864void 2865pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2866 vm_page_t m_start, vm_prot_t prot) 2867{ 2868 vm_page_t m, mpte; 2869 vm_pindex_t diff, psize; 2870 multicall_entry_t mcl[16]; 2871 multicall_entry_t *mclp = mcl; 2872 int error, count = 0; 2873 2874 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2875 psize = atop(end - start); 2876 mpte = NULL; 2877 m = m_start; 2878 rw_wlock(&pvh_global_lock); 2879 PMAP_LOCK(pmap); 2880 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2881 mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, 2882 prot, mpte); 2883 m = TAILQ_NEXT(m, listq); 2884 if (count == 16) { 2885 error = HYPERVISOR_multicall(mcl, count); 2886 KASSERT(error == 0, ("bad multicall %d", error)); 2887 mclp = mcl; 2888 count = 0; 2889 } 2890 } 2891 if (count) { 2892 error = HYPERVISOR_multicall(mcl, count); 2893 KASSERT(error == 0, ("bad multicall %d", error)); 2894 } 2895 rw_wunlock(&pvh_global_lock); 2896 PMAP_UNLOCK(pmap); 2897} 2898 2899/* 2900 * this code makes some *MAJOR* assumptions: 2901 * 1. Current pmap & pmap exists. 2902 * 2. Not wired. 2903 * 3. Read access. 2904 * 4. No page table pages. 2905 * but is *MUCH* faster than pmap_enter... 2906 */ 2907 2908void 2909pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2910{ 2911 multicall_entry_t mcl, *mclp; 2912 int count = 0; 2913 mclp = &mcl; 2914 2915 CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", 2916 pmap, va, m, prot); 2917 2918 rw_wlock(&pvh_global_lock); 2919 PMAP_LOCK(pmap); 2920 (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); 2921 if (count) 2922 HYPERVISOR_multicall(&mcl, count); 2923 rw_wunlock(&pvh_global_lock); 2924 PMAP_UNLOCK(pmap); 2925} 2926 2927#ifdef notyet 2928void 2929pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count) 2930{ 2931 int i, error, index = 0; 2932 multicall_entry_t mcl[16]; 2933 multicall_entry_t *mclp = mcl; 2934 2935 PMAP_LOCK(pmap); 2936 for (i = 0; i < count; i++, addrs++, pages++, prots++) { 2937 if (!pmap_is_prefaultable_locked(pmap, *addrs)) 2938 continue; 2939 2940 (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL); 2941 if (index == 16) { 2942 error = HYPERVISOR_multicall(mcl, index); 2943 mclp = mcl; 2944 index = 0; 2945 KASSERT(error == 0, ("bad multicall %d", error)); 2946 } 2947 } 2948 if (index) { 2949 error = HYPERVISOR_multicall(mcl, index); 2950 KASSERT(error == 0, ("bad multicall %d", error)); 2951 } 2952 2953 PMAP_UNLOCK(pmap); 2954} 2955#endif 2956 2957static vm_page_t 2958pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, 2959 vm_prot_t prot, vm_page_t mpte) 2960{ 2961 pt_entry_t *pte; 2962 vm_paddr_t pa; 2963 vm_page_t free; 2964 multicall_entry_t *mcl = *mclpp; 2965 2966 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2967 (m->oflags & VPO_UNMANAGED) != 0, 2968 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2969 rw_assert(&pvh_global_lock, RA_WLOCKED); 2970 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2971 2972 /* 2973 * In the case that a page table page is not 2974 * resident, we are creating it here. 2975 */ 2976 if (va < VM_MAXUSER_ADDRESS) { 2977 u_int ptepindex; 2978 pd_entry_t ptema; 2979 2980 /* 2981 * Calculate pagetable page index 2982 */ 2983 ptepindex = va >> PDRSHIFT; 2984 if (mpte && (mpte->pindex == ptepindex)) { 2985 mpte->wire_count++; 2986 } else { 2987 /* 2988 * Get the page directory entry 2989 */ 2990 ptema = pmap->pm_pdir[ptepindex]; 2991 2992 /* 2993 * If the page table page is mapped, we just increment 2994 * the hold count, and activate it. 2995 */ 2996 if (ptema & PG_V) { 2997 if (ptema & PG_PS) 2998 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2999 mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 3000 mpte->wire_count++; 3001 } else { 3002 mpte = _pmap_allocpte(pmap, ptepindex, 3003 M_NOWAIT); 3004 if (mpte == NULL) 3005 return (mpte); 3006 } 3007 } 3008 } else { 3009 mpte = NULL; 3010 } 3011 3012 /* 3013 * This call to vtopte makes the assumption that we are 3014 * entering the page into the current pmap. In order to support 3015 * quick entry into any pmap, one would likely use pmap_pte_quick. 3016 * But that isn't as quick as vtopte. 3017 */ 3018 KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap")); 3019 pte = vtopte(va); 3020 if (*pte & PG_V) { 3021 if (mpte != NULL) { 3022 mpte->wire_count--; 3023 mpte = NULL; 3024 } 3025 return (mpte); 3026 } 3027 3028 /* 3029 * Enter on the PV list if part of our managed memory. 3030 */ 3031 if ((m->oflags & VPO_UNMANAGED) == 0 && 3032 !pmap_try_insert_pv_entry(pmap, va, m)) { 3033 if (mpte != NULL) { 3034 free = NULL; 3035 if (pmap_unwire_ptp(pmap, mpte, &free)) { 3036 pmap_invalidate_page(pmap, va); 3037 pmap_free_zero_pages(free); 3038 } 3039 3040 mpte = NULL; 3041 } 3042 return (mpte); 3043 } 3044 3045 /* 3046 * Increment counters 3047 */ 3048 pmap->pm_stats.resident_count++; 3049 3050 pa = VM_PAGE_TO_PHYS(m); 3051#ifdef PAE 3052 if ((prot & VM_PROT_EXECUTE) == 0) 3053 pa |= pg_nx; 3054#endif 3055 3056#if 0 3057 /* 3058 * Now validate mapping with RO protection 3059 */ 3060 if ((m->oflags & VPO_UNMANAGED) != 0) 3061 pte_store(pte, pa | PG_V | PG_U); 3062 else 3063 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3064#else 3065 /* 3066 * Now validate mapping with RO protection 3067 */ 3068 if ((m->oflags & VPO_UNMANAGED) != 0) 3069 pa = xpmap_ptom(pa | PG_V | PG_U); 3070 else 3071 pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED); 3072 3073 mcl->op = __HYPERVISOR_update_va_mapping; 3074 mcl->args[0] = va; 3075 mcl->args[1] = (uint32_t)(pa & 0xffffffff); 3076 mcl->args[2] = (uint32_t)(pa >> 32); 3077 mcl->args[3] = 0; 3078 *mclpp = mcl + 1; 3079 *count = *count + 1; 3080#endif 3081 return (mpte); 3082} 3083 3084/* 3085 * Make a temporary mapping for a physical address. This is only intended 3086 * to be used for panic dumps. 3087 */ 3088void * 3089pmap_kenter_temporary(vm_paddr_t pa, int i) 3090{ 3091 vm_offset_t va; 3092 vm_paddr_t ma = xpmap_ptom(pa); 3093 3094 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3095 PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag); 3096 invlpg(va); 3097 return ((void *)crashdumpmap); 3098} 3099 3100/* 3101 * This code maps large physical mmap regions into the 3102 * processor address space. Note that some shortcuts 3103 * are taken, but the code works. 3104 */ 3105void 3106pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3107 vm_pindex_t pindex, vm_size_t size) 3108{ 3109 pd_entry_t *pde; 3110 vm_paddr_t pa, ptepa; 3111 vm_page_t p; 3112 int pat_mode; 3113 3114 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3115 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3116 ("pmap_object_init_pt: non-device object")); 3117 if (pseflag && 3118 (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3119 if (!vm_object_populate(object, pindex, pindex + atop(size))) 3120 return; 3121 p = vm_page_lookup(object, pindex); 3122 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3123 ("pmap_object_init_pt: invalid page %p", p)); 3124 pat_mode = p->md.pat_mode; 3125 3126 /* 3127 * Abort the mapping if the first page is not physically 3128 * aligned to a 2/4MB page boundary. 3129 */ 3130 ptepa = VM_PAGE_TO_PHYS(p); 3131 if (ptepa & (NBPDR - 1)) 3132 return; 3133 3134 /* 3135 * Skip the first page. Abort the mapping if the rest of 3136 * the pages are not physically contiguous or have differing 3137 * memory attributes. 3138 */ 3139 p = TAILQ_NEXT(p, listq); 3140 for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3141 pa += PAGE_SIZE) { 3142 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3143 ("pmap_object_init_pt: invalid page %p", p)); 3144 if (pa != VM_PAGE_TO_PHYS(p) || 3145 pat_mode != p->md.pat_mode) 3146 return; 3147 p = TAILQ_NEXT(p, listq); 3148 } 3149 3150 /* 3151 * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and 3152 * "size" is a multiple of 2/4M, adding the PAT setting to 3153 * "pa" will not affect the termination of this loop. 3154 */ 3155 PMAP_LOCK(pmap); 3156 for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3157 size; pa += NBPDR) { 3158 pde = pmap_pde(pmap, addr); 3159 if (*pde == 0) { 3160 pde_store(pde, pa | PG_PS | PG_M | PG_A | 3161 PG_U | PG_RW | PG_V); 3162 pmap->pm_stats.resident_count += NBPDR / 3163 PAGE_SIZE; 3164 pmap_pde_mappings++; 3165 } 3166 /* Else continue on if the PDE is already valid. */ 3167 addr += NBPDR; 3168 } 3169 PMAP_UNLOCK(pmap); 3170 } 3171} 3172 3173/* 3174 * Routine: pmap_change_wiring 3175 * Function: Change the wiring attribute for a map/virtual-address 3176 * pair. 3177 * In/out conditions: 3178 * The mapping must already exist in the pmap. 3179 */ 3180void 3181pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3182{ 3183 pt_entry_t *pte; 3184 3185 rw_wlock(&pvh_global_lock); 3186 PMAP_LOCK(pmap); 3187 pte = pmap_pte(pmap, va); 3188 3189 if (wired && !pmap_pte_w(pte)) { 3190 PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); 3191 pmap->pm_stats.wired_count++; 3192 } else if (!wired && pmap_pte_w(pte)) { 3193 PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); 3194 pmap->pm_stats.wired_count--; 3195 } 3196 3197 /* 3198 * Wiring is not a hardware characteristic so there is no need to 3199 * invalidate TLB. 3200 */ 3201 pmap_pte_release(pte); 3202 PMAP_UNLOCK(pmap); 3203 rw_wunlock(&pvh_global_lock); 3204} 3205 3206 3207 3208/* 3209 * Copy the range specified by src_addr/len 3210 * from the source map to the range dst_addr/len 3211 * in the destination map. 3212 * 3213 * This routine is only advisory and need not do anything. 3214 */ 3215 3216void 3217pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3218 vm_offset_t src_addr) 3219{ 3220 vm_page_t free; 3221 vm_offset_t addr; 3222 vm_offset_t end_addr = src_addr + len; 3223 vm_offset_t pdnxt; 3224 3225 if (dst_addr != src_addr) 3226 return; 3227 3228 if (!pmap_is_current(src_pmap)) { 3229 CTR2(KTR_PMAP, 3230 "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx", 3231 (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME)); 3232 3233 return; 3234 } 3235 CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", 3236 dst_pmap, src_pmap, dst_addr, len, src_addr); 3237 3238#ifdef HAMFISTED_LOCKING 3239 mtx_lock(&createdelete_lock); 3240#endif 3241 3242 rw_wlock(&pvh_global_lock); 3243 if (dst_pmap < src_pmap) { 3244 PMAP_LOCK(dst_pmap); 3245 PMAP_LOCK(src_pmap); 3246 } else { 3247 PMAP_LOCK(src_pmap); 3248 PMAP_LOCK(dst_pmap); 3249 } 3250 sched_pin(); 3251 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3252 pt_entry_t *src_pte, *dst_pte; 3253 vm_page_t dstmpte, srcmpte; 3254 pd_entry_t srcptepaddr; 3255 u_int ptepindex; 3256 3257 KASSERT(addr < UPT_MIN_ADDRESS, 3258 ("pmap_copy: invalid to pmap_copy page tables")); 3259 3260 pdnxt = (addr + NBPDR) & ~PDRMASK; 3261 if (pdnxt < addr) 3262 pdnxt = end_addr; 3263 ptepindex = addr >> PDRSHIFT; 3264 3265 srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); 3266 if (srcptepaddr == 0) 3267 continue; 3268 3269 if (srcptepaddr & PG_PS) { 3270 if (dst_pmap->pm_pdir[ptepindex] == 0) { 3271 PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE); 3272 dst_pmap->pm_stats.resident_count += 3273 NBPDR / PAGE_SIZE; 3274 } 3275 continue; 3276 } 3277 3278 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3279 KASSERT(srcmpte->wire_count > 0, 3280 ("pmap_copy: source page table page is unused")); 3281 3282 if (pdnxt > end_addr) 3283 pdnxt = end_addr; 3284 3285 src_pte = vtopte(addr); 3286 while (addr < pdnxt) { 3287 pt_entry_t ptetemp; 3288 ptetemp = *src_pte; 3289 /* 3290 * we only virtual copy managed pages 3291 */ 3292 if ((ptetemp & PG_MANAGED) != 0) { 3293 dstmpte = pmap_allocpte(dst_pmap, addr, 3294 M_NOWAIT); 3295 if (dstmpte == NULL) 3296 goto out; 3297 dst_pte = pmap_pte_quick(dst_pmap, addr); 3298 if (*dst_pte == 0 && 3299 pmap_try_insert_pv_entry(dst_pmap, addr, 3300 PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) { 3301 /* 3302 * Clear the wired, modified, and 3303 * accessed (referenced) bits 3304 * during the copy. 3305 */ 3306 KASSERT(ptetemp != 0, ("src_pte not set")); 3307 PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */); 3308 KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)), 3309 ("no pmap copy expected: 0x%jx saw: 0x%jx", 3310 ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte)); 3311 dst_pmap->pm_stats.resident_count++; 3312 } else { 3313 free = NULL; 3314 if (pmap_unwire_ptp(dst_pmap, dstmpte, 3315 &free)) { 3316 pmap_invalidate_page(dst_pmap, 3317 addr); 3318 pmap_free_zero_pages(free); 3319 } 3320 goto out; 3321 } 3322 if (dstmpte->wire_count >= srcmpte->wire_count) 3323 break; 3324 } 3325 addr += PAGE_SIZE; 3326 src_pte++; 3327 } 3328 } 3329out: 3330 PT_UPDATES_FLUSH(); 3331 sched_unpin(); 3332 rw_wunlock(&pvh_global_lock); 3333 PMAP_UNLOCK(src_pmap); 3334 PMAP_UNLOCK(dst_pmap); 3335 3336#ifdef HAMFISTED_LOCKING 3337 mtx_unlock(&createdelete_lock); 3338#endif 3339} 3340 3341static __inline void 3342pagezero(void *page) 3343{ 3344#if defined(I686_CPU) 3345 if (cpu_class == CPUCLASS_686) { 3346#if defined(CPU_ENABLE_SSE) 3347 if (cpu_feature & CPUID_SSE2) 3348 sse2_pagezero(page); 3349 else 3350#endif 3351 i686_pagezero(page); 3352 } else 3353#endif 3354 bzero(page, PAGE_SIZE); 3355} 3356 3357/* 3358 * pmap_zero_page zeros the specified hardware page by mapping 3359 * the page into KVM and using bzero to clear its contents. 3360 */ 3361void 3362pmap_zero_page(vm_page_t m) 3363{ 3364 struct sysmaps *sysmaps; 3365 3366 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3367 mtx_lock(&sysmaps->lock); 3368 if (*sysmaps->CMAP2) 3369 panic("pmap_zero_page: CMAP2 busy"); 3370 sched_pin(); 3371 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3372 pagezero(sysmaps->CADDR2); 3373 PT_SET_MA(sysmaps->CADDR2, 0); 3374 sched_unpin(); 3375 mtx_unlock(&sysmaps->lock); 3376} 3377 3378/* 3379 * pmap_zero_page_area zeros the specified hardware page by mapping 3380 * the page into KVM and using bzero to clear its contents. 3381 * 3382 * off and size may not cover an area beyond a single hardware page. 3383 */ 3384void 3385pmap_zero_page_area(vm_page_t m, int off, int size) 3386{ 3387 struct sysmaps *sysmaps; 3388 3389 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3390 mtx_lock(&sysmaps->lock); 3391 if (*sysmaps->CMAP2) 3392 panic("pmap_zero_page_area: CMAP2 busy"); 3393 sched_pin(); 3394 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3395 3396 if (off == 0 && size == PAGE_SIZE) 3397 pagezero(sysmaps->CADDR2); 3398 else 3399 bzero((char *)sysmaps->CADDR2 + off, size); 3400 PT_SET_MA(sysmaps->CADDR2, 0); 3401 sched_unpin(); 3402 mtx_unlock(&sysmaps->lock); 3403} 3404 3405/* 3406 * pmap_zero_page_idle zeros the specified hardware page by mapping 3407 * the page into KVM and using bzero to clear its contents. This 3408 * is intended to be called from the vm_pagezero process only and 3409 * outside of Giant. 3410 */ 3411void 3412pmap_zero_page_idle(vm_page_t m) 3413{ 3414 3415 if (*CMAP3) 3416 panic("pmap_zero_page_idle: CMAP3 busy"); 3417 sched_pin(); 3418 PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3419 pagezero(CADDR3); 3420 PT_SET_MA(CADDR3, 0); 3421 sched_unpin(); 3422} 3423 3424/* 3425 * pmap_copy_page copies the specified (machine independent) 3426 * page by mapping the page into virtual memory and using 3427 * bcopy to copy the page, one machine dependent page at a 3428 * time. 3429 */ 3430void 3431pmap_copy_page(vm_page_t src, vm_page_t dst) 3432{ 3433 struct sysmaps *sysmaps; 3434 3435 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3436 mtx_lock(&sysmaps->lock); 3437 if (*sysmaps->CMAP1) 3438 panic("pmap_copy_page: CMAP1 busy"); 3439 if (*sysmaps->CMAP2) 3440 panic("pmap_copy_page: CMAP2 busy"); 3441 sched_pin(); 3442 PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A); 3443 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M); 3444 bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3445 PT_SET_MA(sysmaps->CADDR1, 0); 3446 PT_SET_MA(sysmaps->CADDR2, 0); 3447 sched_unpin(); 3448 mtx_unlock(&sysmaps->lock); 3449} 3450 3451/* 3452 * Returns true if the pmap's pv is one of the first 3453 * 16 pvs linked to from this page. This count may 3454 * be changed upwards or downwards in the future; it 3455 * is only necessary that true be returned for a small 3456 * subset of pmaps for proper page aging. 3457 */ 3458boolean_t 3459pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3460{ 3461 pv_entry_t pv; 3462 int loops = 0; 3463 boolean_t rv; 3464 3465 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3466 ("pmap_page_exists_quick: page %p is not managed", m)); 3467 rv = FALSE; 3468 rw_wlock(&pvh_global_lock); 3469 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3470 if (PV_PMAP(pv) == pmap) { 3471 rv = TRUE; 3472 break; 3473 } 3474 loops++; 3475 if (loops >= 16) 3476 break; 3477 } 3478 rw_wunlock(&pvh_global_lock); 3479 return (rv); 3480} 3481 3482/* 3483 * pmap_page_wired_mappings: 3484 * 3485 * Return the number of managed mappings to the given physical page 3486 * that are wired. 3487 */ 3488int 3489pmap_page_wired_mappings(vm_page_t m) 3490{ 3491 pv_entry_t pv; 3492 pt_entry_t *pte; 3493 pmap_t pmap; 3494 int count; 3495 3496 count = 0; 3497 if ((m->oflags & VPO_UNMANAGED) != 0) 3498 return (count); 3499 rw_wlock(&pvh_global_lock); 3500 sched_pin(); 3501 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3502 pmap = PV_PMAP(pv); 3503 PMAP_LOCK(pmap); 3504 pte = pmap_pte_quick(pmap, pv->pv_va); 3505 if ((*pte & PG_W) != 0) 3506 count++; 3507 PMAP_UNLOCK(pmap); 3508 } 3509 sched_unpin(); 3510 rw_wunlock(&pvh_global_lock); 3511 return (count); 3512} 3513 3514/* 3515 * Returns TRUE if the given page is mapped. Otherwise, returns FALSE. 3516 */ 3517boolean_t 3518pmap_page_is_mapped(vm_page_t m) 3519{ 3520 3521 if ((m->oflags & VPO_UNMANAGED) != 0) 3522 return (FALSE); 3523 return (!TAILQ_EMPTY(&m->md.pv_list)); 3524} 3525 3526/* 3527 * Remove all pages from specified address space 3528 * this aids process exit speeds. Also, this code 3529 * is special cased for current process only, but 3530 * can have the more generic (and slightly slower) 3531 * mode enabled. This is much faster than pmap_remove 3532 * in the case of running down an entire address space. 3533 */ 3534void 3535pmap_remove_pages(pmap_t pmap) 3536{ 3537 pt_entry_t *pte, tpte; 3538 vm_page_t m, free = NULL; 3539 pv_entry_t pv; 3540 struct pv_chunk *pc, *npc; 3541 int field, idx; 3542 int32_t bit; 3543 uint32_t inuse, bitmask; 3544 int allfree; 3545 3546 CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); 3547 3548 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 3549 printf("warning: pmap_remove_pages called with non-current pmap\n"); 3550 return; 3551 } 3552 rw_wlock(&pvh_global_lock); 3553 KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); 3554 PMAP_LOCK(pmap); 3555 sched_pin(); 3556 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3557 KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap, 3558 pc->pc_pmap)); 3559 allfree = 1; 3560 for (field = 0; field < _NPCM; field++) { 3561 inuse = ~pc->pc_map[field] & pc_freemask[field]; 3562 while (inuse != 0) { 3563 bit = bsfl(inuse); 3564 bitmask = 1UL << bit; 3565 idx = field * 32 + bit; 3566 pv = &pc->pc_pventry[idx]; 3567 inuse &= ~bitmask; 3568 3569 pte = vtopte(pv->pv_va); 3570 tpte = *pte ? xpmap_mtop(*pte) : 0; 3571 3572 if (tpte == 0) { 3573 printf( 3574 "TPTE at %p IS ZERO @ VA %08x\n", 3575 pte, pv->pv_va); 3576 panic("bad pte"); 3577 } 3578 3579/* 3580 * We cannot remove wired pages from a process' mapping at this time 3581 */ 3582 if (tpte & PG_W) { 3583 allfree = 0; 3584 continue; 3585 } 3586 3587 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 3588 KASSERT(m->phys_addr == (tpte & PG_FRAME), 3589 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3590 m, (uintmax_t)m->phys_addr, 3591 (uintmax_t)tpte)); 3592 3593 KASSERT(m < &vm_page_array[vm_page_array_size], 3594 ("pmap_remove_pages: bad tpte %#jx", 3595 (uintmax_t)tpte)); 3596 3597 3598 PT_CLEAR_VA(pte, FALSE); 3599 3600 /* 3601 * Update the vm_page_t clean/reference bits. 3602 */ 3603 if (tpte & PG_M) 3604 vm_page_dirty(m); 3605 3606 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3607 if (TAILQ_EMPTY(&m->md.pv_list)) 3608 vm_page_aflag_clear(m, PGA_WRITEABLE); 3609 3610 pmap_unuse_pt(pmap, pv->pv_va, &free); 3611 3612 /* Mark free */ 3613 PV_STAT(pv_entry_frees++); 3614 PV_STAT(pv_entry_spare++); 3615 pv_entry_count--; 3616 pc->pc_map[field] |= bitmask; 3617 pmap->pm_stats.resident_count--; 3618 } 3619 } 3620 PT_UPDATES_FLUSH(); 3621 if (allfree) { 3622 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3623 free_pv_chunk(pc); 3624 } 3625 } 3626 PT_UPDATES_FLUSH(); 3627 if (*PMAP1) 3628 PT_SET_MA(PADDR1, 0); 3629 3630 sched_unpin(); 3631 pmap_invalidate_all(pmap); 3632 rw_wunlock(&pvh_global_lock); 3633 PMAP_UNLOCK(pmap); 3634 pmap_free_zero_pages(free); 3635} 3636 3637/* 3638 * pmap_is_modified: 3639 * 3640 * Return whether or not the specified physical page was modified 3641 * in any physical maps. 3642 */ 3643boolean_t 3644pmap_is_modified(vm_page_t m) 3645{ 3646 pv_entry_t pv; 3647 pt_entry_t *pte; 3648 pmap_t pmap; 3649 boolean_t rv; 3650 3651 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3652 ("pmap_is_modified: page %p is not managed", m)); 3653 rv = FALSE; 3654 3655 /* 3656 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 3657 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3658 * is clear, no PTEs can have PG_M set. 3659 */ 3660 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3661 if ((m->oflags & VPO_BUSY) == 0 && 3662 (m->aflags & PGA_WRITEABLE) == 0) 3663 return (rv); 3664 rw_wlock(&pvh_global_lock); 3665 sched_pin(); 3666 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3667 pmap = PV_PMAP(pv); 3668 PMAP_LOCK(pmap); 3669 pte = pmap_pte_quick(pmap, pv->pv_va); 3670 rv = (*pte & PG_M) != 0; 3671 PMAP_UNLOCK(pmap); 3672 if (rv) 3673 break; 3674 } 3675 if (*PMAP1) 3676 PT_SET_MA(PADDR1, 0); 3677 sched_unpin(); 3678 rw_wunlock(&pvh_global_lock); 3679 return (rv); 3680} 3681 3682/* 3683 * pmap_is_prefaultable: 3684 * 3685 * Return whether or not the specified virtual address is elgible 3686 * for prefault. 3687 */ 3688static boolean_t 3689pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr) 3690{ 3691 pt_entry_t *pte; 3692 boolean_t rv = FALSE; 3693 3694 return (rv); 3695 3696 if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) { 3697 pte = vtopte(addr); 3698 rv = (*pte == 0); 3699 } 3700 return (rv); 3701} 3702 3703boolean_t 3704pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3705{ 3706 boolean_t rv; 3707 3708 PMAP_LOCK(pmap); 3709 rv = pmap_is_prefaultable_locked(pmap, addr); 3710 PMAP_UNLOCK(pmap); 3711 return (rv); 3712} 3713 3714boolean_t 3715pmap_is_referenced(vm_page_t m) 3716{ 3717 pv_entry_t pv; 3718 pt_entry_t *pte; 3719 pmap_t pmap; 3720 boolean_t rv; 3721 3722 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3723 ("pmap_is_referenced: page %p is not managed", m)); 3724 rv = FALSE; 3725 rw_wlock(&pvh_global_lock); 3726 sched_pin(); 3727 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3728 pmap = PV_PMAP(pv); 3729 PMAP_LOCK(pmap); 3730 pte = pmap_pte_quick(pmap, pv->pv_va); 3731 rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); 3732 PMAP_UNLOCK(pmap); 3733 if (rv) 3734 break; 3735 } 3736 if (*PMAP1) 3737 PT_SET_MA(PADDR1, 0); 3738 sched_unpin(); 3739 rw_wunlock(&pvh_global_lock); 3740 return (rv); 3741} 3742 3743void 3744pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) 3745{ 3746 int i, npages = round_page(len) >> PAGE_SHIFT; 3747 for (i = 0; i < npages; i++) { 3748 pt_entry_t *pte; 3749 pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3750 rw_wlock(&pvh_global_lock); 3751 pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); 3752 rw_wunlock(&pvh_global_lock); 3753 PMAP_MARK_PRIV(xpmap_mtop(*pte)); 3754 pmap_pte_release(pte); 3755 } 3756} 3757 3758void 3759pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) 3760{ 3761 int i, npages = round_page(len) >> PAGE_SHIFT; 3762 for (i = 0; i < npages; i++) { 3763 pt_entry_t *pte; 3764 pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3765 PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); 3766 rw_wlock(&pvh_global_lock); 3767 pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); 3768 rw_wunlock(&pvh_global_lock); 3769 pmap_pte_release(pte); 3770 } 3771} 3772 3773/* 3774 * Clear the write and modified bits in each of the given page's mappings. 3775 */ 3776void 3777pmap_remove_write(vm_page_t m) 3778{ 3779 pv_entry_t pv; 3780 pmap_t pmap; 3781 pt_entry_t oldpte, *pte; 3782 3783 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3784 ("pmap_remove_write: page %p is not managed", m)); 3785 3786 /* 3787 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 3788 * another thread while the object is locked. Thus, if PGA_WRITEABLE 3789 * is clear, no page table entries need updating. 3790 */ 3791 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3792 if ((m->oflags & VPO_BUSY) == 0 && 3793 (m->aflags & PGA_WRITEABLE) == 0) 3794 return; 3795 rw_wlock(&pvh_global_lock); 3796 sched_pin(); 3797 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3798 pmap = PV_PMAP(pv); 3799 PMAP_LOCK(pmap); 3800 pte = pmap_pte_quick(pmap, pv->pv_va); 3801retry: 3802 oldpte = *pte; 3803 if ((oldpte & PG_RW) != 0) { 3804 vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M); 3805 3806 /* 3807 * Regardless of whether a pte is 32 or 64 bits 3808 * in size, PG_RW and PG_M are among the least 3809 * significant 32 bits. 3810 */ 3811 PT_SET_VA_MA(pte, newpte, TRUE); 3812 if (*pte != newpte) 3813 goto retry; 3814 3815 if ((oldpte & PG_M) != 0) 3816 vm_page_dirty(m); 3817 pmap_invalidate_page(pmap, pv->pv_va); 3818 } 3819 PMAP_UNLOCK(pmap); 3820 } 3821 vm_page_aflag_clear(m, PGA_WRITEABLE); 3822 PT_UPDATES_FLUSH(); 3823 if (*PMAP1) 3824 PT_SET_MA(PADDR1, 0); 3825 sched_unpin(); 3826 rw_wunlock(&pvh_global_lock); 3827} 3828 3829/* 3830 * pmap_ts_referenced: 3831 * 3832 * Return a count of reference bits for a page, clearing those bits. 3833 * It is not necessary for every reference bit to be cleared, but it 3834 * is necessary that 0 only be returned when there are truly no 3835 * reference bits set. 3836 * 3837 * XXX: The exact number of bits to check and clear is a matter that 3838 * should be tested and standardized at some point in the future for 3839 * optimal aging of shared pages. 3840 */ 3841int 3842pmap_ts_referenced(vm_page_t m) 3843{ 3844 pv_entry_t pv, pvf, pvn; 3845 pmap_t pmap; 3846 pt_entry_t *pte; 3847 int rtval = 0; 3848 3849 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3850 ("pmap_ts_referenced: page %p is not managed", m)); 3851 rw_wlock(&pvh_global_lock); 3852 sched_pin(); 3853 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3854 pvf = pv; 3855 do { 3856 pvn = TAILQ_NEXT(pv, pv_list); 3857 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3858 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3859 pmap = PV_PMAP(pv); 3860 PMAP_LOCK(pmap); 3861 pte = pmap_pte_quick(pmap, pv->pv_va); 3862 if ((*pte & PG_A) != 0) { 3863 PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3864 pmap_invalidate_page(pmap, pv->pv_va); 3865 rtval++; 3866 if (rtval > 4) 3867 pvn = NULL; 3868 } 3869 PMAP_UNLOCK(pmap); 3870 } while ((pv = pvn) != NULL && pv != pvf); 3871 } 3872 PT_UPDATES_FLUSH(); 3873 if (*PMAP1) 3874 PT_SET_MA(PADDR1, 0); 3875 sched_unpin(); 3876 rw_wunlock(&pvh_global_lock); 3877 return (rtval); 3878} 3879 3880/* 3881 * Clear the modify bits on the specified physical page. 3882 */ 3883void 3884pmap_clear_modify(vm_page_t m) 3885{ 3886 pv_entry_t pv; 3887 pmap_t pmap; 3888 pt_entry_t *pte; 3889 3890 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3891 ("pmap_clear_modify: page %p is not managed", m)); 3892 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3893 KASSERT((m->oflags & VPO_BUSY) == 0, 3894 ("pmap_clear_modify: page %p is busy", m)); 3895 3896 /* 3897 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3898 * If the object containing the page is locked and the page is not 3899 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 3900 */ 3901 if ((m->aflags & PGA_WRITEABLE) == 0) 3902 return; 3903 rw_wlock(&pvh_global_lock); 3904 sched_pin(); 3905 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3906 pmap = PV_PMAP(pv); 3907 PMAP_LOCK(pmap); 3908 pte = pmap_pte_quick(pmap, pv->pv_va); 3909 if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 3910 /* 3911 * Regardless of whether a pte is 32 or 64 bits 3912 * in size, PG_M is among the least significant 3913 * 32 bits. 3914 */ 3915 PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE); 3916 pmap_invalidate_page(pmap, pv->pv_va); 3917 } 3918 PMAP_UNLOCK(pmap); 3919 } 3920 sched_unpin(); 3921 rw_wunlock(&pvh_global_lock); 3922} 3923 3924/* 3925 * pmap_clear_reference: 3926 * 3927 * Clear the reference bit on the specified physical page. 3928 */ 3929void 3930pmap_clear_reference(vm_page_t m) 3931{ 3932 pv_entry_t pv; 3933 pmap_t pmap; 3934 pt_entry_t *pte; 3935 3936 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3937 ("pmap_clear_reference: page %p is not managed", m)); 3938 rw_wlock(&pvh_global_lock); 3939 sched_pin(); 3940 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3941 pmap = PV_PMAP(pv); 3942 PMAP_LOCK(pmap); 3943 pte = pmap_pte_quick(pmap, pv->pv_va); 3944 if ((*pte & PG_A) != 0) { 3945 /* 3946 * Regardless of whether a pte is 32 or 64 bits 3947 * in size, PG_A is among the least significant 3948 * 32 bits. 3949 */ 3950 PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3951 pmap_invalidate_page(pmap, pv->pv_va); 3952 } 3953 PMAP_UNLOCK(pmap); 3954 } 3955 sched_unpin(); 3956 rw_wunlock(&pvh_global_lock); 3957} 3958 3959/* 3960 * Miscellaneous support routines follow 3961 */ 3962 3963/* 3964 * Map a set of physical memory pages into the kernel virtual 3965 * address space. Return a pointer to where it is mapped. This 3966 * routine is intended to be used for mapping device memory, 3967 * NOT real memory. 3968 */ 3969void * 3970pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 3971{ 3972 vm_offset_t va, offset; 3973 vm_size_t tmpsize; 3974 3975 offset = pa & PAGE_MASK; 3976 size = round_page(offset + size); 3977 pa = pa & PG_FRAME; 3978 3979 if (pa < KERNLOAD && pa + size <= KERNLOAD) 3980 va = KERNBASE + pa; 3981 else 3982 va = kmem_alloc_nofault(kernel_map, size); 3983 if (!va) 3984 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3985 3986 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 3987 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 3988 pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 3989 pmap_invalidate_cache_range(va, va + size); 3990 return ((void *)(va + offset)); 3991} 3992 3993void * 3994pmap_mapdev(vm_paddr_t pa, vm_size_t size) 3995{ 3996 3997 return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 3998} 3999 4000void * 4001pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4002{ 4003 4004 return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4005} 4006 4007void 4008pmap_unmapdev(vm_offset_t va, vm_size_t size) 4009{ 4010 vm_offset_t base, offset; 4011 4012 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4013 return; 4014 base = trunc_page(va); 4015 offset = va & PAGE_MASK; 4016 size = round_page(offset + size); 4017 kmem_free(kernel_map, base, size); 4018} 4019 4020/* 4021 * Sets the memory attribute for the specified page. 4022 */ 4023void 4024pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4025{ 4026 4027 m->md.pat_mode = ma; 4028 if ((m->flags & PG_FICTITIOUS) != 0) 4029 return; 4030 4031 /* 4032 * If "m" is a normal page, flush it from the cache. 4033 * See pmap_invalidate_cache_range(). 4034 * 4035 * First, try to find an existing mapping of the page by sf 4036 * buffer. sf_buf_invalidate_cache() modifies mapping and 4037 * flushes the cache. 4038 */ 4039 if (sf_buf_invalidate_cache(m)) 4040 return; 4041 4042 /* 4043 * If page is not mapped by sf buffer, but CPU does not 4044 * support self snoop, map the page transient and do 4045 * invalidation. In the worst case, whole cache is flushed by 4046 * pmap_invalidate_cache_range(). 4047 */ 4048 if ((cpu_feature & CPUID_SS) == 0) 4049 pmap_flush_page(m); 4050} 4051 4052static void 4053pmap_flush_page(vm_page_t m) 4054{ 4055 struct sysmaps *sysmaps; 4056 vm_offset_t sva, eva; 4057 4058 if ((cpu_feature & CPUID_CLFSH) != 0) { 4059 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4060 mtx_lock(&sysmaps->lock); 4061 if (*sysmaps->CMAP2) 4062 panic("pmap_flush_page: CMAP2 busy"); 4063 sched_pin(); 4064 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | 4065 VM_PAGE_TO_MACH(m) | PG_A | PG_M | 4066 pmap_cache_bits(m->md.pat_mode, 0)); 4067 invlcaddr(sysmaps->CADDR2); 4068 sva = (vm_offset_t)sysmaps->CADDR2; 4069 eva = sva + PAGE_SIZE; 4070 4071 /* 4072 * Use mfence despite the ordering implied by 4073 * mtx_{un,}lock() because clflush is not guaranteed 4074 * to be ordered by any other instruction. 4075 */ 4076 mfence(); 4077 for (; sva < eva; sva += cpu_clflush_line_size) 4078 clflush(sva); 4079 mfence(); 4080 PT_SET_MA(sysmaps->CADDR2, 0); 4081 sched_unpin(); 4082 mtx_unlock(&sysmaps->lock); 4083 } else 4084 pmap_invalidate_cache(); 4085} 4086 4087/* 4088 * Changes the specified virtual address range's memory type to that given by 4089 * the parameter "mode". The specified virtual address range must be 4090 * completely contained within either the kernel map. 4091 * 4092 * Returns zero if the change completed successfully, and either EINVAL or 4093 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4094 * of the virtual address range was not mapped, and ENOMEM is returned if 4095 * there was insufficient memory available to complete the change. 4096 */ 4097int 4098pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4099{ 4100 vm_offset_t base, offset, tmpva; 4101 pt_entry_t *pte; 4102 u_int opte, npte; 4103 pd_entry_t *pde; 4104 boolean_t changed; 4105 4106 base = trunc_page(va); 4107 offset = va & PAGE_MASK; 4108 size = round_page(offset + size); 4109 4110 /* Only supported on kernel virtual addresses. */ 4111 if (base <= VM_MAXUSER_ADDRESS) 4112 return (EINVAL); 4113 4114 /* 4MB pages and pages that aren't mapped aren't supported. */ 4115 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 4116 pde = pmap_pde(kernel_pmap, tmpva); 4117 if (*pde & PG_PS) 4118 return (EINVAL); 4119 if ((*pde & PG_V) == 0) 4120 return (EINVAL); 4121 pte = vtopte(va); 4122 if ((*pte & PG_V) == 0) 4123 return (EINVAL); 4124 } 4125 4126 changed = FALSE; 4127 4128 /* 4129 * Ok, all the pages exist and are 4k, so run through them updating 4130 * their cache mode. 4131 */ 4132 for (tmpva = base; size > 0; ) { 4133 pte = vtopte(tmpva); 4134 4135 /* 4136 * The cache mode bits are all in the low 32-bits of the 4137 * PTE, so we can just spin on updating the low 32-bits. 4138 */ 4139 do { 4140 opte = *(u_int *)pte; 4141 npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); 4142 npte |= pmap_cache_bits(mode, 0); 4143 PT_SET_VA_MA(pte, npte, TRUE); 4144 } while (npte != opte && (*pte != npte)); 4145 if (npte != opte) 4146 changed = TRUE; 4147 tmpva += PAGE_SIZE; 4148 size -= PAGE_SIZE; 4149 } 4150 4151 /* 4152 * Flush CPU caches to make sure any data isn't cached that 4153 * shouldn't be, etc. 4154 */ 4155 if (changed) { 4156 pmap_invalidate_range(kernel_pmap, base, tmpva); 4157 pmap_invalidate_cache_range(base, tmpva); 4158 } 4159 return (0); 4160} 4161 4162/* 4163 * perform the pmap work for mincore 4164 */ 4165int 4166pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4167{ 4168 pt_entry_t *ptep, pte; 4169 vm_paddr_t pa; 4170 int val; 4171 4172 PMAP_LOCK(pmap); 4173retry: 4174 ptep = pmap_pte(pmap, addr); 4175 pte = (ptep != NULL) ? PT_GET(ptep) : 0; 4176 pmap_pte_release(ptep); 4177 val = 0; 4178 if ((pte & PG_V) != 0) { 4179 val |= MINCORE_INCORE; 4180 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4181 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4182 if ((pte & PG_A) != 0) 4183 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4184 } 4185 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4186 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 4187 (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { 4188 pa = pte & PG_FRAME; 4189 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4190 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4191 goto retry; 4192 } else 4193 PA_UNLOCK_COND(*locked_pa); 4194 PMAP_UNLOCK(pmap); 4195 return (val); 4196} 4197 4198void 4199pmap_activate(struct thread *td) 4200{ 4201 pmap_t pmap, oldpmap; 4202 u_int cpuid; 4203 u_int32_t cr3; 4204 4205 critical_enter(); 4206 pmap = vmspace_pmap(td->td_proc->p_vmspace); 4207 oldpmap = PCPU_GET(curpmap); 4208 cpuid = PCPU_GET(cpuid); 4209#if defined(SMP) 4210 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 4211 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 4212#else 4213 CPU_CLR(cpuid, &oldpmap->pm_active); 4214 CPU_SET(cpuid, &pmap->pm_active); 4215#endif 4216#ifdef PAE 4217 cr3 = vtophys(pmap->pm_pdpt); 4218#else 4219 cr3 = vtophys(pmap->pm_pdir); 4220#endif 4221 /* 4222 * pmap_activate is for the current thread on the current cpu 4223 */ 4224 td->td_pcb->pcb_cr3 = cr3; 4225 PT_UPDATES_FLUSH(); 4226 load_cr3(cr3); 4227 PCPU_SET(curpmap, pmap); 4228 critical_exit(); 4229} 4230 4231void 4232pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4233{ 4234} 4235 4236/* 4237 * Increase the starting virtual address of the given mapping if a 4238 * different alignment might result in more superpage mappings. 4239 */ 4240void 4241pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4242 vm_offset_t *addr, vm_size_t size) 4243{ 4244 vm_offset_t superpage_offset; 4245 4246 if (size < NBPDR) 4247 return; 4248 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4249 offset += ptoa(object->pg_color); 4250 superpage_offset = offset & PDRMASK; 4251 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4252 (*addr & PDRMASK) == superpage_offset) 4253 return; 4254 if ((*addr & PDRMASK) < superpage_offset) 4255 *addr = (*addr & ~PDRMASK) + superpage_offset; 4256 else 4257 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4258} 4259 4260void 4261pmap_suspend() 4262{ 4263 pmap_t pmap; 4264 int i, pdir, offset; 4265 vm_paddr_t pdirma; 4266 mmu_update_t mu[4]; 4267 4268 /* 4269 * We need to remove the recursive mapping structure from all 4270 * our pmaps so that Xen doesn't get confused when it restores 4271 * the page tables. The recursive map lives at page directory 4272 * index PTDPTDI. We assume that the suspend code has stopped 4273 * the other vcpus (if any). 4274 */ 4275 LIST_FOREACH(pmap, &allpmaps, pm_list) { 4276 for (i = 0; i < 4; i++) { 4277 /* 4278 * Figure out which page directory (L2) page 4279 * contains this bit of the recursive map and 4280 * the offset within that page of the map 4281 * entry 4282 */ 4283 pdir = (PTDPTDI + i) / NPDEPG; 4284 offset = (PTDPTDI + i) % NPDEPG; 4285 pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4286 mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4287 mu[i].val = 0; 4288 } 4289 HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4290 } 4291} 4292 4293void 4294pmap_resume() 4295{ 4296 pmap_t pmap; 4297 int i, pdir, offset; 4298 vm_paddr_t pdirma; 4299 mmu_update_t mu[4]; 4300 4301 /* 4302 * Restore the recursive map that we removed on suspend. 4303 */ 4304 LIST_FOREACH(pmap, &allpmaps, pm_list) { 4305 for (i = 0; i < 4; i++) { 4306 /* 4307 * Figure out which page directory (L2) page 4308 * contains this bit of the recursive map and 4309 * the offset within that page of the map 4310 * entry 4311 */ 4312 pdir = (PTDPTDI + i) / NPDEPG; 4313 offset = (PTDPTDI + i) % NPDEPG; 4314 pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4315 mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4316 mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; 4317 } 4318 HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4319 } 4320} 4321 4322#if defined(PMAP_DEBUG) 4323pmap_pid_dump(int pid) 4324{ 4325 pmap_t pmap; 4326 struct proc *p; 4327 int npte = 0; 4328 int index; 4329 4330 sx_slock(&allproc_lock); 4331 FOREACH_PROC_IN_SYSTEM(p) { 4332 if (p->p_pid != pid) 4333 continue; 4334 4335 if (p->p_vmspace) { 4336 int i,j; 4337 index = 0; 4338 pmap = vmspace_pmap(p->p_vmspace); 4339 for (i = 0; i < NPDEPTD; i++) { 4340 pd_entry_t *pde; 4341 pt_entry_t *pte; 4342 vm_offset_t base = i << PDRSHIFT; 4343 4344 pde = &pmap->pm_pdir[i]; 4345 if (pde && pmap_pde_v(pde)) { 4346 for (j = 0; j < NPTEPG; j++) { 4347 vm_offset_t va = base + (j << PAGE_SHIFT); 4348 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4349 if (index) { 4350 index = 0; 4351 printf("\n"); 4352 } 4353 sx_sunlock(&allproc_lock); 4354 return (npte); 4355 } 4356 pte = pmap_pte(pmap, va); 4357 if (pte && pmap_pte_v(pte)) { 4358 pt_entry_t pa; 4359 vm_page_t m; 4360 pa = PT_GET(pte); 4361 m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4362 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4363 va, pa, m->hold_count, m->wire_count, m->flags); 4364 npte++; 4365 index++; 4366 if (index >= 2) { 4367 index = 0; 4368 printf("\n"); 4369 } else { 4370 printf(" "); 4371 } 4372 } 4373 } 4374 } 4375 } 4376 } 4377 } 4378 sx_sunlock(&allproc_lock); 4379 return (npte); 4380} 4381#endif 4382 4383#if defined(DEBUG) 4384 4385static void pads(pmap_t pm); 4386void pmap_pvdump(vm_paddr_t pa); 4387 4388/* print address space of pmap*/ 4389static void 4390pads(pmap_t pm) 4391{ 4392 int i, j; 4393 vm_paddr_t va; 4394 pt_entry_t *ptep; 4395 4396 if (pm == kernel_pmap) 4397 return; 4398 for (i = 0; i < NPDEPTD; i++) 4399 if (pm->pm_pdir[i]) 4400 for (j = 0; j < NPTEPG; j++) { 4401 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4402 if (pm == kernel_pmap && va < KERNBASE) 4403 continue; 4404 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4405 continue; 4406 ptep = pmap_pte(pm, va); 4407 if (pmap_pte_v(ptep)) 4408 printf("%x:%x ", va, *ptep); 4409 }; 4410 4411} 4412 4413void 4414pmap_pvdump(vm_paddr_t pa) 4415{ 4416 pv_entry_t pv; 4417 pmap_t pmap; 4418 vm_page_t m; 4419 4420 printf("pa %x", pa); 4421 m = PHYS_TO_VM_PAGE(pa); 4422 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4423 pmap = PV_PMAP(pv); 4424 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 4425 pads(pmap); 4426 } 4427 printf(" "); 4428} 4429#endif 4430