pmap.c revision 209048
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu> 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 */ 45/*- 46 * Copyright (c) 2003 Networks Associates Technology, Inc. 47 * All rights reserved. 48 * 49 * This software was developed for the FreeBSD Project by Jake Burkholder, 50 * Safeport Network Services, and Network Associates Laboratories, the 51 * Security Research Division of Network Associates, Inc. under 52 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53 * CHATS research program. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 */ 76 77#include <sys/cdefs.h> 78__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 209048 2010-06-11 15:49:39Z alc $"); 79 80/* 81 * Manages physical address maps. 82 * 83 * In addition to hardware address maps, this 84 * module is called upon to provide software-use-only 85 * maps which may or may not be stored in the same 86 * form as hardware maps. These pseudo-maps are 87 * used to store intermediate results from copy 88 * operations to and from address spaces. 89 * 90 * Since the information managed by this module is 91 * also stored by the logical address mapping module, 92 * this module may throw away valid virtual-to-physical 93 * mappings at almost any time. However, invalidations 94 * of virtual-to-physical mappings must be done as 95 * requested. 96 * 97 * In order to cope with hardware architectures which 98 * make virtual-to-physical map invalidates expensive, 99 * this module may delay invalidate or reduced protection 100 * operations until such time as they are actually 101 * necessary. This module is given full information as 102 * to which processors are currently using which maps, 103 * and to when physical maps must be made correct. 104 */ 105 106#include "opt_cpu.h" 107#include "opt_pmap.h" 108#include "opt_msgbuf.h" 109#include "opt_smp.h" 110#include "opt_xbox.h" 111 112#include <sys/param.h> 113#include <sys/systm.h> 114#include <sys/kernel.h> 115#include <sys/ktr.h> 116#include <sys/lock.h> 117#include <sys/malloc.h> 118#include <sys/mman.h> 119#include <sys/msgbuf.h> 120#include <sys/mutex.h> 121#include <sys/proc.h> 122#include <sys/sf_buf.h> 123#include <sys/sx.h> 124#include <sys/vmmeter.h> 125#include <sys/sched.h> 126#include <sys/sysctl.h> 127#ifdef SMP 128#include <sys/smp.h> 129#endif 130 131#include <vm/vm.h> 132#include <vm/vm_param.h> 133#include <vm/vm_kern.h> 134#include <vm/vm_page.h> 135#include <vm/vm_map.h> 136#include <vm/vm_object.h> 137#include <vm/vm_extern.h> 138#include <vm/vm_pageout.h> 139#include <vm/vm_pager.h> 140#include <vm/uma.h> 141 142#include <machine/cpu.h> 143#include <machine/cputypes.h> 144#include <machine/md_var.h> 145#include <machine/pcb.h> 146#include <machine/specialreg.h> 147#ifdef SMP 148#include <machine/smp.h> 149#endif 150 151#ifdef XBOX 152#include <machine/xbox.h> 153#endif 154 155#include <xen/interface/xen.h> 156#include <xen/hypervisor.h> 157#include <machine/xen/hypercall.h> 158#include <machine/xen/xenvar.h> 159#include <machine/xen/xenfunc.h> 160 161#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 162#define CPU_ENABLE_SSE 163#endif 164 165#ifndef PMAP_SHPGPERPROC 166#define PMAP_SHPGPERPROC 200 167#endif 168 169#define DIAGNOSTIC 170 171#if !defined(DIAGNOSTIC) 172#ifdef __GNUC_GNU_INLINE__ 173#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 174#else 175#define PMAP_INLINE extern inline 176#endif 177#else 178#define PMAP_INLINE 179#endif 180 181#define PV_STATS 182#ifdef PV_STATS 183#define PV_STAT(x) do { x ; } while (0) 184#else 185#define PV_STAT(x) do { } while (0) 186#endif 187 188#define pa_index(pa) ((pa) >> PDRSHIFT) 189#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 190 191/* 192 * Get PDEs and PTEs for user/kernel address space 193 */ 194#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 195#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 196 197#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 198#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 199#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 200#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 201#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 202 203#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 204 205struct pmap kernel_pmap_store; 206LIST_HEAD(pmaplist, pmap); 207static struct pmaplist allpmaps; 208static struct mtx allpmaps_lock; 209 210vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 211vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 212int pgeflag = 0; /* PG_G or-in */ 213int pseflag = 0; /* PG_PS or-in */ 214 215int nkpt; 216vm_offset_t kernel_vm_end; 217extern u_int32_t KERNend; 218 219#ifdef PAE 220pt_entry_t pg_nx; 221#if !defined(XEN) 222static uma_zone_t pdptzone; 223#endif 224#endif 225 226static int pat_works; /* Is page attribute table sane? */ 227 228/* 229 * Data for the pv entry allocation mechanism 230 */ 231static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 232static struct md_page *pv_table; 233static int shpgperproc = PMAP_SHPGPERPROC; 234 235struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 236int pv_maxchunks; /* How many chunks we have KVA for */ 237vm_offset_t pv_vafree; /* freelist stored in the PTE */ 238 239/* 240 * All those kernel PT submaps that BSD is so fond of 241 */ 242struct sysmaps { 243 struct mtx lock; 244 pt_entry_t *CMAP1; 245 pt_entry_t *CMAP2; 246 caddr_t CADDR1; 247 caddr_t CADDR2; 248}; 249static struct sysmaps sysmaps_pcpu[MAXCPU]; 250static pt_entry_t *CMAP3; 251caddr_t ptvmmap = 0; 252static caddr_t CADDR3; 253struct msgbuf *msgbufp = 0; 254 255/* 256 * Crashdump maps. 257 */ 258static caddr_t crashdumpmap; 259 260static pt_entry_t *PMAP1 = 0, *PMAP2; 261static pt_entry_t *PADDR1 = 0, *PADDR2; 262#ifdef SMP 263static int PMAP1cpu; 264static int PMAP1changedcpu; 265SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 266 &PMAP1changedcpu, 0, 267 "Number of times pmap_pte_quick changed CPU with same PMAP1"); 268#endif 269static int PMAP1changed; 270SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 271 &PMAP1changed, 0, 272 "Number of times pmap_pte_quick changed PMAP1"); 273static int PMAP1unchanged; 274SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 275 &PMAP1unchanged, 0, 276 "Number of times pmap_pte_quick didn't change PMAP1"); 277static struct mtx PMAP2mutex; 278 279SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 280static int pg_ps_enabled; 281SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, 282 "Are large page mappings enabled?"); 283 284SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 285 "Max number of PV entries"); 286SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 287 "Page share factor per proc"); 288SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 289 "2/4MB page mapping counters"); 290 291static u_long pmap_pde_mappings; 292SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 293 &pmap_pde_mappings, 0, "2/4MB page mappings"); 294 295static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 296static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); 297static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 298static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 299 vm_offset_t va); 300 301static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, 302 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 303static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 304 vm_page_t *free); 305static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 306 vm_page_t *free); 307static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 308 vm_offset_t va); 309static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 310 vm_page_t m); 311 312static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 313 314static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 315static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); 316static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 317static void pmap_pte_release(pt_entry_t *pte); 318static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 319static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 320static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); 321static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 322 323static __inline void pagezero(void *page); 324 325#if defined(PAE) && !defined(XEN) 326static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 327#endif 328#ifndef XEN 329static void pmap_set_pg(void); 330#endif 331 332CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 333CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 334 335/* 336 * If you get an error here, then you set KVA_PAGES wrong! See the 337 * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 338 * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 339 */ 340CTASSERT(KERNBASE % (1 << 24) == 0); 341 342 343 344void 345pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) 346{ 347 vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); 348 349 switch (type) { 350 case SH_PD_SET_VA: 351#if 0 352 xen_queue_pt_update(shadow_pdir_ma, 353 xpmap_ptom(val & ~(PG_RW))); 354#endif 355 xen_queue_pt_update(pdir_ma, 356 xpmap_ptom(val)); 357 break; 358 case SH_PD_SET_VA_MA: 359#if 0 360 xen_queue_pt_update(shadow_pdir_ma, 361 val & ~(PG_RW)); 362#endif 363 xen_queue_pt_update(pdir_ma, val); 364 break; 365 case SH_PD_SET_VA_CLEAR: 366#if 0 367 xen_queue_pt_update(shadow_pdir_ma, 0); 368#endif 369 xen_queue_pt_update(pdir_ma, 0); 370 break; 371 } 372} 373 374/* 375 * Move the kernel virtual free pointer to the next 376 * 4MB. This is used to help improve performance 377 * by using a large (4MB) page for much of the kernel 378 * (.text, .data, .bss) 379 */ 380static vm_offset_t 381pmap_kmem_choose(vm_offset_t addr) 382{ 383 vm_offset_t newaddr = addr; 384 385#ifndef DISABLE_PSE 386 if (cpu_feature & CPUID_PSE) 387 newaddr = (addr + PDRMASK) & ~PDRMASK; 388#endif 389 return newaddr; 390} 391 392/* 393 * Bootstrap the system enough to run with virtual memory. 394 * 395 * On the i386 this is called after mapping has already been enabled 396 * and just syncs the pmap module with what has already been done. 397 * [We can't call it easily with mapping off since the kernel is not 398 * mapped with PA == VA, hence we would have to relocate every address 399 * from the linked base (virtual) address "KERNBASE" to the actual 400 * (physical) address starting relative to 0] 401 */ 402void 403pmap_bootstrap(vm_paddr_t firstaddr) 404{ 405 vm_offset_t va; 406 pt_entry_t *pte, *unused; 407 struct sysmaps *sysmaps; 408 int i; 409 410 /* 411 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 412 * large. It should instead be correctly calculated in locore.s and 413 * not based on 'first' (which is a physical address, not a virtual 414 * address, for the start of unused physical memory). The kernel 415 * page tables are NOT double mapped and thus should not be included 416 * in this calculation. 417 */ 418 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 419 virtual_avail = pmap_kmem_choose(virtual_avail); 420 421 virtual_end = VM_MAX_KERNEL_ADDRESS; 422 423 /* 424 * Initialize the kernel pmap (which is statically allocated). 425 */ 426 PMAP_LOCK_INIT(kernel_pmap); 427 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 428#ifdef PAE 429 kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 430#endif 431 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 432 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 433 LIST_INIT(&allpmaps); 434 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 435 mtx_lock_spin(&allpmaps_lock); 436 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 437 mtx_unlock_spin(&allpmaps_lock); 438 if (nkpt == 0) 439 nkpt = NKPT; 440 441 /* 442 * Reserve some special page table entries/VA space for temporary 443 * mapping of pages. 444 */ 445#define SYSMAP(c, p, v, n) \ 446 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 447 448 va = virtual_avail; 449 pte = vtopte(va); 450 451 /* 452 * CMAP1/CMAP2 are used for zeroing and copying pages. 453 * CMAP3 is used for the idle process page zeroing. 454 */ 455 for (i = 0; i < MAXCPU; i++) { 456 sysmaps = &sysmaps_pcpu[i]; 457 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 458 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 459 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 460 PT_SET_MA(sysmaps->CADDR1, 0); 461 PT_SET_MA(sysmaps->CADDR2, 0); 462 } 463 SYSMAP(caddr_t, CMAP3, CADDR3, 1) 464 PT_SET_MA(CADDR3, 0); 465 466 /* 467 * Crashdump maps. 468 */ 469 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 470 471 /* 472 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 473 */ 474 SYSMAP(caddr_t, unused, ptvmmap, 1) 475 476 /* 477 * msgbufp is used to map the system message buffer. 478 */ 479 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 480 481 /* 482 * ptemap is used for pmap_pte_quick 483 */ 484 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 485 SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); 486 487 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 488 489 virtual_avail = va; 490 491 /* 492 * Leave in place an identity mapping (virt == phys) for the low 1 MB 493 * physical memory region that is used by the ACPI wakeup code. This 494 * mapping must not have PG_G set. 495 */ 496#ifndef XEN 497 /* 498 * leave here deliberately to show that this is not supported 499 */ 500#ifdef XBOX 501 /* FIXME: This is gross, but needed for the XBOX. Since we are in such 502 * an early stadium, we cannot yet neatly map video memory ... :-( 503 * Better fixes are very welcome! */ 504 if (!arch_i386_is_xbox) 505#endif 506 for (i = 1; i < NKPT; i++) 507 PTD[i] = 0; 508 509 /* Initialize the PAT MSR if present. */ 510 pmap_init_pat(); 511 512 /* Turn on PG_G on kernel page(s) */ 513 pmap_set_pg(); 514#endif 515} 516 517/* 518 * Setup the PAT MSR. 519 */ 520void 521pmap_init_pat(void) 522{ 523 uint64_t pat_msr; 524 525 /* Bail if this CPU doesn't implement PAT. */ 526 if (!(cpu_feature & CPUID_PAT)) 527 return; 528 529 if (cpu_vendor_id != CPU_VENDOR_INTEL || 530 (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) { 531 /* 532 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. 533 * Program 4 and 5 as WP and WC. 534 * Leave 6 and 7 as UC and UC-. 535 */ 536 pat_msr = rdmsr(MSR_PAT); 537 pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 538 pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 539 PAT_VALUE(5, PAT_WRITE_COMBINING); 540 pat_works = 1; 541 } else { 542 /* 543 * Due to some Intel errata, we can only safely use the lower 4 544 * PAT entries. Thus, just replace PAT Index 2 with WC instead 545 * of UC-. 546 * 547 * Intel Pentium III Processor Specification Update 548 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 549 * or Mode C Paging) 550 * 551 * Intel Pentium IV Processor Specification Update 552 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 553 */ 554 pat_msr = rdmsr(MSR_PAT); 555 pat_msr &= ~PAT_MASK(2); 556 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 557 pat_works = 0; 558 } 559 wrmsr(MSR_PAT, pat_msr); 560} 561 562#ifndef XEN 563/* 564 * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 565 */ 566static void 567pmap_set_pg(void) 568{ 569 pd_entry_t pdir; 570 pt_entry_t *pte; 571 vm_offset_t va, endva; 572 int i; 573 574 if (pgeflag == 0) 575 return; 576 577 i = KERNLOAD/NBPDR; 578 endva = KERNBASE + KERNend; 579 580 if (pseflag) { 581 va = KERNBASE + KERNLOAD; 582 while (va < endva) { 583 pdir = kernel_pmap->pm_pdir[KPTDI+i]; 584 pdir |= pgeflag; 585 kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; 586 invltlb(); /* Play it safe, invltlb() every time */ 587 i++; 588 va += NBPDR; 589 } 590 } else { 591 va = (vm_offset_t)btext; 592 while (va < endva) { 593 pte = vtopte(va); 594 if (*pte & PG_V) 595 *pte |= pgeflag; 596 invltlb(); /* Play it safe, invltlb() every time */ 597 va += PAGE_SIZE; 598 } 599 } 600} 601#endif 602 603/* 604 * Initialize a vm_page's machine-dependent fields. 605 */ 606void 607pmap_page_init(vm_page_t m) 608{ 609 610 TAILQ_INIT(&m->md.pv_list); 611 m->md.pat_mode = PAT_WRITE_BACK; 612} 613 614#if defined(PAE) && !defined(XEN) 615static void * 616pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 617{ 618 619 /* Inform UMA that this allocator uses kernel_map/object. */ 620 *flags = UMA_SLAB_KERNEL; 621 return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 622 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); 623} 624#endif 625 626/* 627 * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 628 * Requirements: 629 * - Must deal with pages in order to ensure that none of the PG_* bits 630 * are ever set, PG_V in particular. 631 * - Assumes we can write to ptes without pte_store() atomic ops, even 632 * on PAE systems. This should be ok. 633 * - Assumes nothing will ever test these addresses for 0 to indicate 634 * no mapping instead of correctly checking PG_V. 635 * - Assumes a vm_offset_t will fit in a pte (true for i386). 636 * Because PG_V is never set, there can be no mappings to invalidate. 637 */ 638static int ptelist_count = 0; 639static vm_offset_t 640pmap_ptelist_alloc(vm_offset_t *head) 641{ 642 vm_offset_t va; 643 vm_offset_t *phead = (vm_offset_t *)*head; 644 645 if (ptelist_count == 0) { 646 printf("out of memory!!!!!!\n"); 647 return (0); /* Out of memory */ 648 } 649 ptelist_count--; 650 va = phead[ptelist_count]; 651 return (va); 652} 653 654static void 655pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 656{ 657 vm_offset_t *phead = (vm_offset_t *)*head; 658 659 phead[ptelist_count++] = va; 660} 661 662static void 663pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 664{ 665 int i, nstackpages; 666 vm_offset_t va; 667 vm_page_t m; 668 669 nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t)); 670 for (i = 0; i < nstackpages; i++) { 671 va = (vm_offset_t)base + i * PAGE_SIZE; 672 m = vm_page_alloc(NULL, i, 673 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 674 VM_ALLOC_ZERO); 675 pmap_qenter(va, &m, 1); 676 } 677 678 *head = (vm_offset_t)base; 679 for (i = npages - 1; i >= nstackpages; i--) { 680 va = (vm_offset_t)base + i * PAGE_SIZE; 681 pmap_ptelist_free(head, va); 682 } 683} 684 685 686/* 687 * Initialize the pmap module. 688 * Called by vm_init, to initialize any structures that the pmap 689 * system needs to map virtual memory. 690 */ 691void 692pmap_init(void) 693{ 694 vm_page_t mpte; 695 vm_size_t s; 696 int i, pv_npg; 697 698 /* 699 * Initialize the vm page array entries for the kernel pmap's 700 * page table pages. 701 */ 702 for (i = 0; i < nkpt; i++) { 703 mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME)); 704 KASSERT(mpte >= vm_page_array && 705 mpte < &vm_page_array[vm_page_array_size], 706 ("pmap_init: page table page is out of range")); 707 mpte->pindex = i + KPTDI; 708 mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME); 709 } 710 711 /* 712 * Initialize the address space (zone) for the pv entries. Set a 713 * high water mark so that the system can recover from excessive 714 * numbers of pv entries. 715 */ 716 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 717 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 718 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 719 pv_entry_max = roundup(pv_entry_max, _NPCPV); 720 pv_entry_high_water = 9 * (pv_entry_max / 10); 721 722 /* 723 * Are large page mappings enabled? 724 */ 725 TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); 726 727 /* 728 * Calculate the size of the pv head table for superpages. 729 */ 730 for (i = 0; phys_avail[i + 1]; i += 2); 731 pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; 732 733 /* 734 * Allocate memory for the pv head table for superpages. 735 */ 736 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 737 s = round_page(s); 738 pv_table = (struct md_page *)kmem_alloc(kernel_map, s); 739 for (i = 0; i < pv_npg; i++) 740 TAILQ_INIT(&pv_table[i].pv_list); 741 742 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 743 pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 744 PAGE_SIZE * pv_maxchunks); 745 if (pv_chunkbase == NULL) 746 panic("pmap_init: not enough kvm for pv chunks"); 747 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 748#if defined(PAE) && !defined(XEN) 749 pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 750 NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 751 UMA_ZONE_VM | UMA_ZONE_NOFREE); 752 uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 753#endif 754} 755 756 757/*************************************************** 758 * Low level helper routines..... 759 ***************************************************/ 760 761/* 762 * Determine the appropriate bits to set in a PTE or PDE for a specified 763 * caching mode. 764 */ 765int 766pmap_cache_bits(int mode, boolean_t is_pde) 767{ 768 int pat_flag, pat_index, cache_bits; 769 770 /* The PAT bit is different for PTE's and PDE's. */ 771 pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 772 773 /* If we don't support PAT, map extended modes to older ones. */ 774 if (!(cpu_feature & CPUID_PAT)) { 775 switch (mode) { 776 case PAT_UNCACHEABLE: 777 case PAT_WRITE_THROUGH: 778 case PAT_WRITE_BACK: 779 break; 780 case PAT_UNCACHED: 781 case PAT_WRITE_COMBINING: 782 case PAT_WRITE_PROTECTED: 783 mode = PAT_UNCACHEABLE; 784 break; 785 } 786 } 787 788 /* Map the caching mode to a PAT index. */ 789 if (pat_works) { 790 switch (mode) { 791 case PAT_UNCACHEABLE: 792 pat_index = 3; 793 break; 794 case PAT_WRITE_THROUGH: 795 pat_index = 1; 796 break; 797 case PAT_WRITE_BACK: 798 pat_index = 0; 799 break; 800 case PAT_UNCACHED: 801 pat_index = 2; 802 break; 803 case PAT_WRITE_COMBINING: 804 pat_index = 5; 805 break; 806 case PAT_WRITE_PROTECTED: 807 pat_index = 4; 808 break; 809 default: 810 panic("Unknown caching mode %d\n", mode); 811 } 812 } else { 813 switch (mode) { 814 case PAT_UNCACHED: 815 case PAT_UNCACHEABLE: 816 case PAT_WRITE_PROTECTED: 817 pat_index = 3; 818 break; 819 case PAT_WRITE_THROUGH: 820 pat_index = 1; 821 break; 822 case PAT_WRITE_BACK: 823 pat_index = 0; 824 break; 825 case PAT_WRITE_COMBINING: 826 pat_index = 2; 827 break; 828 default: 829 panic("Unknown caching mode %d\n", mode); 830 } 831 } 832 833 /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 834 cache_bits = 0; 835 if (pat_index & 0x4) 836 cache_bits |= pat_flag; 837 if (pat_index & 0x2) 838 cache_bits |= PG_NC_PCD; 839 if (pat_index & 0x1) 840 cache_bits |= PG_NC_PWT; 841 return (cache_bits); 842} 843#ifdef SMP 844/* 845 * For SMP, these functions have to use the IPI mechanism for coherence. 846 * 847 * N.B.: Before calling any of the following TLB invalidation functions, 848 * the calling processor must ensure that all stores updating a non- 849 * kernel page table are globally performed. Otherwise, another 850 * processor could cache an old, pre-update entry without being 851 * invalidated. This can happen one of two ways: (1) The pmap becomes 852 * active on another processor after its pm_active field is checked by 853 * one of the following functions but before a store updating the page 854 * table is globally performed. (2) The pmap becomes active on another 855 * processor before its pm_active field is checked but due to 856 * speculative loads one of the following functions stills reads the 857 * pmap as inactive on the other processor. 858 * 859 * The kernel page table is exempt because its pm_active field is 860 * immutable. The kernel page table is always active on every 861 * processor. 862 */ 863void 864pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 865{ 866 u_int cpumask; 867 u_int other_cpus; 868 869 CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 870 pmap, va); 871 872 sched_pin(); 873 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 874 invlpg(va); 875 smp_invlpg(va); 876 } else { 877 cpumask = PCPU_GET(cpumask); 878 other_cpus = PCPU_GET(other_cpus); 879 if (pmap->pm_active & cpumask) 880 invlpg(va); 881 if (pmap->pm_active & other_cpus) 882 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 883 } 884 sched_unpin(); 885 PT_UPDATES_FLUSH(); 886} 887 888void 889pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 890{ 891 u_int cpumask; 892 u_int other_cpus; 893 vm_offset_t addr; 894 895 CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", 896 pmap, sva, eva); 897 898 sched_pin(); 899 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 900 for (addr = sva; addr < eva; addr += PAGE_SIZE) 901 invlpg(addr); 902 smp_invlpg_range(sva, eva); 903 } else { 904 cpumask = PCPU_GET(cpumask); 905 other_cpus = PCPU_GET(other_cpus); 906 if (pmap->pm_active & cpumask) 907 for (addr = sva; addr < eva; addr += PAGE_SIZE) 908 invlpg(addr); 909 if (pmap->pm_active & other_cpus) 910 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 911 sva, eva); 912 } 913 sched_unpin(); 914 PT_UPDATES_FLUSH(); 915} 916 917void 918pmap_invalidate_all(pmap_t pmap) 919{ 920 u_int cpumask; 921 u_int other_cpus; 922 923 CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); 924 925 sched_pin(); 926 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 927 invltlb(); 928 smp_invltlb(); 929 } else { 930 cpumask = PCPU_GET(cpumask); 931 other_cpus = PCPU_GET(other_cpus); 932 if (pmap->pm_active & cpumask) 933 invltlb(); 934 if (pmap->pm_active & other_cpus) 935 smp_masked_invltlb(pmap->pm_active & other_cpus); 936 } 937 sched_unpin(); 938} 939 940void 941pmap_invalidate_cache(void) 942{ 943 944 sched_pin(); 945 wbinvd(); 946 smp_cache_flush(); 947 sched_unpin(); 948} 949#else /* !SMP */ 950/* 951 * Normal, non-SMP, 486+ invalidation functions. 952 * We inline these within pmap.c for speed. 953 */ 954PMAP_INLINE void 955pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 956{ 957 CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 958 pmap, va); 959 960 if (pmap == kernel_pmap || pmap->pm_active) 961 invlpg(va); 962 PT_UPDATES_FLUSH(); 963} 964 965PMAP_INLINE void 966pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 967{ 968 vm_offset_t addr; 969 970 if (eva - sva > PAGE_SIZE) 971 CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", 972 pmap, sva, eva); 973 974 if (pmap == kernel_pmap || pmap->pm_active) 975 for (addr = sva; addr < eva; addr += PAGE_SIZE) 976 invlpg(addr); 977 PT_UPDATES_FLUSH(); 978} 979 980PMAP_INLINE void 981pmap_invalidate_all(pmap_t pmap) 982{ 983 984 CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); 985 986 if (pmap == kernel_pmap || pmap->pm_active) 987 invltlb(); 988} 989 990PMAP_INLINE void 991pmap_invalidate_cache(void) 992{ 993 994 wbinvd(); 995} 996#endif /* !SMP */ 997 998void 999pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 1000{ 1001 1002 KASSERT((sva & PAGE_MASK) == 0, 1003 ("pmap_invalidate_cache_range: sva not page-aligned")); 1004 KASSERT((eva & PAGE_MASK) == 0, 1005 ("pmap_invalidate_cache_range: eva not page-aligned")); 1006 1007 if (cpu_feature & CPUID_SS) 1008 ; /* If "Self Snoop" is supported, do nothing. */ 1009 else if (cpu_feature & CPUID_CLFSH) { 1010 1011 /* 1012 * Otherwise, do per-cache line flush. Use the mfence 1013 * instruction to insure that previous stores are 1014 * included in the write-back. The processor 1015 * propagates flush to other processors in the cache 1016 * coherence domain. 1017 */ 1018 mfence(); 1019 for (; sva < eva; sva += cpu_clflush_line_size) 1020 clflush(sva); 1021 mfence(); 1022 } else { 1023 1024 /* 1025 * No targeted cache flush methods are supported by CPU, 1026 * globally invalidate cache as a last resort. 1027 */ 1028 pmap_invalidate_cache(); 1029 } 1030} 1031 1032/* 1033 * Are we current address space or kernel? N.B. We return FALSE when 1034 * a pmap's page table is in use because a kernel thread is borrowing 1035 * it. The borrowed page table can change spontaneously, making any 1036 * dependence on its continued use subject to a race condition. 1037 */ 1038static __inline int 1039pmap_is_current(pmap_t pmap) 1040{ 1041 1042 return (pmap == kernel_pmap || 1043 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 1044 (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 1045} 1046 1047/* 1048 * If the given pmap is not the current or kernel pmap, the returned pte must 1049 * be released by passing it to pmap_pte_release(). 1050 */ 1051pt_entry_t * 1052pmap_pte(pmap_t pmap, vm_offset_t va) 1053{ 1054 pd_entry_t newpf; 1055 pd_entry_t *pde; 1056 1057 pde = pmap_pde(pmap, va); 1058 if (*pde & PG_PS) 1059 return (pde); 1060 if (*pde != 0) { 1061 /* are we current address space or kernel? */ 1062 if (pmap_is_current(pmap)) 1063 return (vtopte(va)); 1064 mtx_lock(&PMAP2mutex); 1065 newpf = *pde & PG_FRAME; 1066 if ((*PMAP2 & PG_FRAME) != newpf) { 1067 vm_page_lock_queues(); 1068 PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); 1069 vm_page_unlock_queues(); 1070 CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", 1071 pmap, va, (*PMAP2 & 0xffffffff)); 1072 } 1073 1074 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 1075 } 1076 return (0); 1077} 1078 1079/* 1080 * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 1081 * being NULL. 1082 */ 1083static __inline void 1084pmap_pte_release(pt_entry_t *pte) 1085{ 1086 1087 if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { 1088 CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", 1089 *PMAP2); 1090 PT_SET_VA(PMAP2, 0, TRUE); 1091 mtx_unlock(&PMAP2mutex); 1092 } 1093} 1094 1095static __inline void 1096invlcaddr(void *caddr) 1097{ 1098 1099 invlpg((u_int)caddr); 1100 PT_UPDATES_FLUSH(); 1101} 1102 1103/* 1104 * Super fast pmap_pte routine best used when scanning 1105 * the pv lists. This eliminates many coarse-grained 1106 * invltlb calls. Note that many of the pv list 1107 * scans are across different pmaps. It is very wasteful 1108 * to do an entire invltlb for checking a single mapping. 1109 * 1110 * If the given pmap is not the current pmap, vm_page_queue_mtx 1111 * must be held and curthread pinned to a CPU. 1112 */ 1113static pt_entry_t * 1114pmap_pte_quick(pmap_t pmap, vm_offset_t va) 1115{ 1116 pd_entry_t newpf; 1117 pd_entry_t *pde; 1118 1119 pde = pmap_pde(pmap, va); 1120 if (*pde & PG_PS) 1121 return (pde); 1122 if (*pde != 0) { 1123 /* are we current address space or kernel? */ 1124 if (pmap_is_current(pmap)) 1125 return (vtopte(va)); 1126 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1127 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1128 newpf = *pde & PG_FRAME; 1129 if ((*PMAP1 & PG_FRAME) != newpf) { 1130 PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); 1131 CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", 1132 pmap, va, (u_long)*PMAP1); 1133 1134#ifdef SMP 1135 PMAP1cpu = PCPU_GET(cpuid); 1136#endif 1137 PMAP1changed++; 1138 } else 1139#ifdef SMP 1140 if (PMAP1cpu != PCPU_GET(cpuid)) { 1141 PMAP1cpu = PCPU_GET(cpuid); 1142 invlcaddr(PADDR1); 1143 PMAP1changedcpu++; 1144 } else 1145#endif 1146 PMAP1unchanged++; 1147 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1148 } 1149 return (0); 1150} 1151 1152/* 1153 * Routine: pmap_extract 1154 * Function: 1155 * Extract the physical page address associated 1156 * with the given map/virtual_address pair. 1157 */ 1158vm_paddr_t 1159pmap_extract(pmap_t pmap, vm_offset_t va) 1160{ 1161 vm_paddr_t rtval; 1162 pt_entry_t *pte; 1163 pd_entry_t pde; 1164 pt_entry_t pteval; 1165 1166 rtval = 0; 1167 PMAP_LOCK(pmap); 1168 pde = pmap->pm_pdir[va >> PDRSHIFT]; 1169 if (pde != 0) { 1170 if ((pde & PG_PS) != 0) { 1171 rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); 1172 PMAP_UNLOCK(pmap); 1173 return rtval; 1174 } 1175 pte = pmap_pte(pmap, va); 1176 pteval = *pte ? xpmap_mtop(*pte) : 0; 1177 rtval = (pteval & PG_FRAME) | (va & PAGE_MASK); 1178 pmap_pte_release(pte); 1179 } 1180 PMAP_UNLOCK(pmap); 1181 return (rtval); 1182} 1183 1184/* 1185 * Routine: pmap_extract_ma 1186 * Function: 1187 * Like pmap_extract, but returns machine address 1188 */ 1189vm_paddr_t 1190pmap_extract_ma(pmap_t pmap, vm_offset_t va) 1191{ 1192 vm_paddr_t rtval; 1193 pt_entry_t *pte; 1194 pd_entry_t pde; 1195 1196 rtval = 0; 1197 PMAP_LOCK(pmap); 1198 pde = pmap->pm_pdir[va >> PDRSHIFT]; 1199 if (pde != 0) { 1200 if ((pde & PG_PS) != 0) { 1201 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 1202 PMAP_UNLOCK(pmap); 1203 return rtval; 1204 } 1205 pte = pmap_pte(pmap, va); 1206 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1207 pmap_pte_release(pte); 1208 } 1209 PMAP_UNLOCK(pmap); 1210 return (rtval); 1211} 1212 1213/* 1214 * Routine: pmap_extract_and_hold 1215 * Function: 1216 * Atomically extract and hold the physical page 1217 * with the given pmap and virtual address pair 1218 * if that mapping permits the given protection. 1219 */ 1220vm_page_t 1221pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1222{ 1223 pd_entry_t pde; 1224 pt_entry_t pte; 1225 vm_page_t m; 1226 vm_paddr_t pa; 1227 1228 pa = 0; 1229 m = NULL; 1230 PMAP_LOCK(pmap); 1231retry: 1232 pde = PT_GET(pmap_pde(pmap, va)); 1233 if (pde != 0) { 1234 if (pde & PG_PS) { 1235 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1236 if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | 1237 (va & PDRMASK), &pa)) 1238 goto retry; 1239 m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1240 (va & PDRMASK)); 1241 vm_page_hold(m); 1242 } 1243 } else { 1244 sched_pin(); 1245 pte = PT_GET(pmap_pte_quick(pmap, va)); 1246 if (*PMAP1) 1247 PT_SET_MA(PADDR1, 0); 1248 if ((pte & PG_V) && 1249 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1250 if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) 1251 goto retry; 1252 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1253 vm_page_hold(m); 1254 } 1255 sched_unpin(); 1256 } 1257 } 1258 PA_UNLOCK_COND(pa); 1259 PMAP_UNLOCK(pmap); 1260 return (m); 1261} 1262 1263/*************************************************** 1264 * Low level mapping routines..... 1265 ***************************************************/ 1266 1267/* 1268 * Add a wired page to the kva. 1269 * Note: not SMP coherent. 1270 */ 1271void 1272pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1273{ 1274 PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); 1275} 1276 1277void 1278pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) 1279{ 1280 pt_entry_t *pte; 1281 1282 pte = vtopte(va); 1283 pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); 1284} 1285 1286 1287static __inline void 1288pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1289{ 1290 PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1291} 1292 1293/* 1294 * Remove a page from the kernel pagetables. 1295 * Note: not SMP coherent. 1296 */ 1297PMAP_INLINE void 1298pmap_kremove(vm_offset_t va) 1299{ 1300 pt_entry_t *pte; 1301 1302 pte = vtopte(va); 1303 PT_CLEAR_VA(pte, FALSE); 1304} 1305 1306/* 1307 * Used to map a range of physical addresses into kernel 1308 * virtual address space. 1309 * 1310 * The value passed in '*virt' is a suggested virtual address for 1311 * the mapping. Architectures which can support a direct-mapped 1312 * physical to virtual region can return the appropriate address 1313 * within that region, leaving '*virt' unchanged. Other 1314 * architectures should map the pages starting at '*virt' and 1315 * update '*virt' with the first usable address after the mapped 1316 * region. 1317 */ 1318vm_offset_t 1319pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1320{ 1321 vm_offset_t va, sva; 1322 1323 va = sva = *virt; 1324 CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x", 1325 va, start, end, prot); 1326 while (start < end) { 1327 pmap_kenter(va, start); 1328 va += PAGE_SIZE; 1329 start += PAGE_SIZE; 1330 } 1331 pmap_invalidate_range(kernel_pmap, sva, va); 1332 *virt = va; 1333 return (sva); 1334} 1335 1336 1337/* 1338 * Add a list of wired pages to the kva 1339 * this routine is only used for temporary 1340 * kernel mappings that do not need to have 1341 * page modification or references recorded. 1342 * Note that old mappings are simply written 1343 * over. The page *must* be wired. 1344 * Note: SMP coherent. Uses a ranged shootdown IPI. 1345 */ 1346void 1347pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1348{ 1349 pt_entry_t *endpte, *pte; 1350 vm_paddr_t pa; 1351 vm_offset_t va = sva; 1352 int mclcount = 0; 1353 multicall_entry_t mcl[16]; 1354 multicall_entry_t *mclp = mcl; 1355 int error; 1356 1357 CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count); 1358 pte = vtopte(sva); 1359 endpte = pte + count; 1360 while (pte < endpte) { 1361 pa = xpmap_ptom(VM_PAGE_TO_PHYS(*ma)) | pgeflag | PG_RW | PG_V | PG_M | PG_A; 1362 1363 mclp->op = __HYPERVISOR_update_va_mapping; 1364 mclp->args[0] = va; 1365 mclp->args[1] = (uint32_t)(pa & 0xffffffff); 1366 mclp->args[2] = (uint32_t)(pa >> 32); 1367 mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0; 1368 1369 va += PAGE_SIZE; 1370 pte++; 1371 ma++; 1372 mclp++; 1373 mclcount++; 1374 if (mclcount == 16) { 1375 error = HYPERVISOR_multicall(mcl, mclcount); 1376 mclp = mcl; 1377 mclcount = 0; 1378 KASSERT(error == 0, ("bad multicall %d", error)); 1379 } 1380 } 1381 if (mclcount) { 1382 error = HYPERVISOR_multicall(mcl, mclcount); 1383 KASSERT(error == 0, ("bad multicall %d", error)); 1384 } 1385 1386#ifdef INVARIANTS 1387 for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++) 1388 KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE)); 1389#endif 1390} 1391 1392 1393/* 1394 * This routine tears out page mappings from the 1395 * kernel -- it is meant only for temporary mappings. 1396 * Note: SMP coherent. Uses a ranged shootdown IPI. 1397 */ 1398void 1399pmap_qremove(vm_offset_t sva, int count) 1400{ 1401 vm_offset_t va; 1402 1403 CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count); 1404 va = sva; 1405 vm_page_lock_queues(); 1406 critical_enter(); 1407 while (count-- > 0) { 1408 pmap_kremove(va); 1409 va += PAGE_SIZE; 1410 } 1411 pmap_invalidate_range(kernel_pmap, sva, va); 1412 critical_exit(); 1413 vm_page_unlock_queues(); 1414} 1415 1416/*************************************************** 1417 * Page table page management routines..... 1418 ***************************************************/ 1419static __inline void 1420pmap_free_zero_pages(vm_page_t free) 1421{ 1422 vm_page_t m; 1423 1424 while (free != NULL) { 1425 m = free; 1426 free = m->right; 1427 vm_page_free_zero(m); 1428 } 1429} 1430 1431/* 1432 * This routine unholds page table pages, and if the hold count 1433 * drops to zero, then it decrements the wire count. 1434 */ 1435static __inline int 1436pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1437{ 1438 1439 --m->wire_count; 1440 if (m->wire_count == 0) 1441 return _pmap_unwire_pte_hold(pmap, m, free); 1442 else 1443 return 0; 1444} 1445 1446static int 1447_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1448{ 1449 vm_offset_t pteva; 1450 1451 PT_UPDATES_FLUSH(); 1452 /* 1453 * unmap the page table page 1454 */ 1455 xen_pt_unpin(pmap->pm_pdir[m->pindex]); 1456 /* 1457 * page *might* contain residual mapping :-/ 1458 */ 1459 PD_CLEAR_VA(pmap, m->pindex, TRUE); 1460 pmap_zero_page(m); 1461 --pmap->pm_stats.resident_count; 1462 1463 /* 1464 * This is a release store so that the ordinary store unmapping 1465 * the page table page is globally performed before TLB shoot- 1466 * down is begun. 1467 */ 1468 atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1469 1470 /* 1471 * Do an invltlb to make the invalidated mapping 1472 * take effect immediately. 1473 */ 1474 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1475 pmap_invalidate_page(pmap, pteva); 1476 1477 /* 1478 * Put page on a list so that it is released after 1479 * *ALL* TLB shootdown is done 1480 */ 1481 m->right = *free; 1482 *free = m; 1483 1484 return 1; 1485} 1486 1487/* 1488 * After removing a page table entry, this routine is used to 1489 * conditionally free the page, and manage the hold/wire counts. 1490 */ 1491static int 1492pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1493{ 1494 pd_entry_t ptepde; 1495 vm_page_t mpte; 1496 1497 if (va >= VM_MAXUSER_ADDRESS) 1498 return 0; 1499 ptepde = PT_GET(pmap_pde(pmap, va)); 1500 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1501 return pmap_unwire_pte_hold(pmap, mpte, free); 1502} 1503 1504void 1505pmap_pinit0(pmap_t pmap) 1506{ 1507 1508 PMAP_LOCK_INIT(pmap); 1509 pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1510#ifdef PAE 1511 pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1512#endif 1513 pmap->pm_active = 0; 1514 PCPU_SET(curpmap, pmap); 1515 TAILQ_INIT(&pmap->pm_pvchunk); 1516 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1517 mtx_lock_spin(&allpmaps_lock); 1518 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1519 mtx_unlock_spin(&allpmaps_lock); 1520} 1521 1522/* 1523 * Initialize a preallocated and zeroed pmap structure, 1524 * such as one in a vmspace structure. 1525 */ 1526int 1527pmap_pinit(pmap_t pmap) 1528{ 1529 vm_page_t m, ptdpg[NPGPTD + 1]; 1530 int npgptd = NPGPTD + 1; 1531 static int color; 1532 int i; 1533 1534 PMAP_LOCK_INIT(pmap); 1535 1536 /* 1537 * No need to allocate page table space yet but we do need a valid 1538 * page directory table. 1539 */ 1540 if (pmap->pm_pdir == NULL) { 1541 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1542 NBPTD); 1543 if (pmap->pm_pdir == NULL) { 1544 PMAP_LOCK_DESTROY(pmap); 1545 return (0); 1546 } 1547#if defined(XEN) && defined(PAE) 1548 pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1); 1549#endif 1550 1551#if defined(PAE) && !defined(XEN) 1552 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1553 KASSERT(((vm_offset_t)pmap->pm_pdpt & 1554 ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1555 ("pmap_pinit: pdpt misaligned")); 1556 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1557 ("pmap_pinit: pdpt above 4g")); 1558#endif 1559 } 1560 1561 /* 1562 * allocate the page directory page(s) 1563 */ 1564 for (i = 0; i < npgptd;) { 1565 m = vm_page_alloc(NULL, color++, 1566 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1567 VM_ALLOC_ZERO); 1568 if (m == NULL) 1569 VM_WAIT; 1570 else { 1571 ptdpg[i++] = m; 1572 } 1573 } 1574 pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1575 for (i = 0; i < NPGPTD; i++) { 1576 if ((ptdpg[i]->flags & PG_ZERO) == 0) 1577 pagezero(&pmap->pm_pdir[i*NPTEPG]); 1578 } 1579 1580 mtx_lock_spin(&allpmaps_lock); 1581 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1582 mtx_unlock_spin(&allpmaps_lock); 1583 /* Wire in kernel global address entries. */ 1584 1585 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1586#ifdef PAE 1587#ifdef XEN 1588 pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); 1589 if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) 1590 bzero(pmap->pm_pdpt, PAGE_SIZE); 1591#endif 1592 for (i = 0; i < NPGPTD; i++) { 1593 vm_paddr_t ma; 1594 1595 ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1596 pmap->pm_pdpt[i] = ma | PG_V; 1597 1598 } 1599#endif 1600#ifdef XEN 1601 for (i = 0; i < NPGPTD; i++) { 1602 pt_entry_t *pd; 1603 vm_paddr_t ma; 1604 1605 ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1606 pd = pmap->pm_pdir + (i * NPDEPG); 1607 PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); 1608#if 0 1609 xen_pgd_pin(ma); 1610#endif 1611 } 1612 1613#ifdef PAE 1614 PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); 1615#endif 1616 vm_page_lock_queues(); 1617 xen_flush_queue(); 1618 xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD]))); 1619 for (i = 0; i < NPGPTD; i++) { 1620 vm_paddr_t ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1621 PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); 1622 } 1623 xen_flush_queue(); 1624 vm_page_unlock_queues(); 1625#endif 1626 pmap->pm_active = 0; 1627 TAILQ_INIT(&pmap->pm_pvchunk); 1628 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1629 1630 return (1); 1631} 1632 1633/* 1634 * this routine is called if the page table page is not 1635 * mapped correctly. 1636 */ 1637static vm_page_t 1638_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) 1639{ 1640 vm_paddr_t ptema; 1641 vm_page_t m; 1642 1643 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1644 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1645 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1646 1647 /* 1648 * Allocate a page table page. 1649 */ 1650 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1651 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1652 if (flags & M_WAITOK) { 1653 PMAP_UNLOCK(pmap); 1654 vm_page_unlock_queues(); 1655 VM_WAIT; 1656 vm_page_lock_queues(); 1657 PMAP_LOCK(pmap); 1658 } 1659 1660 /* 1661 * Indicate the need to retry. While waiting, the page table 1662 * page may have been allocated. 1663 */ 1664 return (NULL); 1665 } 1666 if ((m->flags & PG_ZERO) == 0) 1667 pmap_zero_page(m); 1668 1669 /* 1670 * Map the pagetable page into the process address space, if 1671 * it isn't already there. 1672 */ 1673 pmap->pm_stats.resident_count++; 1674 1675 ptema = xpmap_ptom(VM_PAGE_TO_PHYS(m)); 1676 xen_pt_pin(ptema); 1677 PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], 1678 (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); 1679 1680 KASSERT(pmap->pm_pdir[ptepindex], 1681 ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); 1682 return (m); 1683} 1684 1685static vm_page_t 1686pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1687{ 1688 unsigned ptepindex; 1689 pd_entry_t ptema; 1690 vm_page_t m; 1691 1692 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1693 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1694 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1695 1696 /* 1697 * Calculate pagetable page index 1698 */ 1699 ptepindex = va >> PDRSHIFT; 1700retry: 1701 /* 1702 * Get the page directory entry 1703 */ 1704 ptema = pmap->pm_pdir[ptepindex]; 1705 1706 /* 1707 * This supports switching from a 4MB page to a 1708 * normal 4K page. 1709 */ 1710 if (ptema & PG_PS) { 1711 /* 1712 * XXX 1713 */ 1714 pmap->pm_pdir[ptepindex] = 0; 1715 ptema = 0; 1716 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1717 pmap_invalidate_all(kernel_pmap); 1718 } 1719 1720 /* 1721 * If the page table page is mapped, we just increment the 1722 * hold count, and activate it. 1723 */ 1724 if (ptema & PG_V) { 1725 m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 1726 m->wire_count++; 1727 } else { 1728 /* 1729 * Here if the pte page isn't mapped, or if it has 1730 * been deallocated. 1731 */ 1732 CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", 1733 pmap, va, flags); 1734 m = _pmap_allocpte(pmap, ptepindex, flags); 1735 if (m == NULL && (flags & M_WAITOK)) 1736 goto retry; 1737 1738 KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex)); 1739 } 1740 return (m); 1741} 1742 1743 1744/*************************************************** 1745* Pmap allocation/deallocation routines. 1746 ***************************************************/ 1747 1748#ifdef SMP 1749/* 1750 * Deal with a SMP shootdown of other users of the pmap that we are 1751 * trying to dispose of. This can be a bit hairy. 1752 */ 1753static cpumask_t *lazymask; 1754static u_int lazyptd; 1755static volatile u_int lazywait; 1756 1757void pmap_lazyfix_action(void); 1758 1759void 1760pmap_lazyfix_action(void) 1761{ 1762 cpumask_t mymask = PCPU_GET(cpumask); 1763 1764#ifdef COUNT_IPIS 1765 (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1766#endif 1767 if (rcr3() == lazyptd) 1768 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1769 atomic_clear_int(lazymask, mymask); 1770 atomic_store_rel_int(&lazywait, 1); 1771} 1772 1773static void 1774pmap_lazyfix_self(cpumask_t mymask) 1775{ 1776 1777 if (rcr3() == lazyptd) 1778 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1779 atomic_clear_int(lazymask, mymask); 1780} 1781 1782 1783static void 1784pmap_lazyfix(pmap_t pmap) 1785{ 1786 cpumask_t mymask, mask; 1787 u_int spins; 1788 1789 while ((mask = pmap->pm_active) != 0) { 1790 spins = 50000000; 1791 mask = mask & -mask; /* Find least significant set bit */ 1792 mtx_lock_spin(&smp_ipi_mtx); 1793#ifdef PAE 1794 lazyptd = vtophys(pmap->pm_pdpt); 1795#else 1796 lazyptd = vtophys(pmap->pm_pdir); 1797#endif 1798 mymask = PCPU_GET(cpumask); 1799 if (mask == mymask) { 1800 lazymask = &pmap->pm_active; 1801 pmap_lazyfix_self(mymask); 1802 } else { 1803 atomic_store_rel_int((u_int *)&lazymask, 1804 (u_int)&pmap->pm_active); 1805 atomic_store_rel_int(&lazywait, 0); 1806 ipi_selected(mask, IPI_LAZYPMAP); 1807 while (lazywait == 0) { 1808 ia32_pause(); 1809 if (--spins == 0) 1810 break; 1811 } 1812 } 1813 mtx_unlock_spin(&smp_ipi_mtx); 1814 if (spins == 0) 1815 printf("pmap_lazyfix: spun for 50000000\n"); 1816 } 1817} 1818 1819#else /* SMP */ 1820 1821/* 1822 * Cleaning up on uniprocessor is easy. For various reasons, we're 1823 * unlikely to have to even execute this code, including the fact 1824 * that the cleanup is deferred until the parent does a wait(2), which 1825 * means that another userland process has run. 1826 */ 1827static void 1828pmap_lazyfix(pmap_t pmap) 1829{ 1830 u_int cr3; 1831 1832 cr3 = vtophys(pmap->pm_pdir); 1833 if (cr3 == rcr3()) { 1834 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1835 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1836 } 1837} 1838#endif /* SMP */ 1839 1840/* 1841 * Release any resources held by the given physical map. 1842 * Called when a pmap initialized by pmap_pinit is being released. 1843 * Should only be called if the map contains no valid mappings. 1844 */ 1845void 1846pmap_release(pmap_t pmap) 1847{ 1848 vm_page_t m, ptdpg[2*NPGPTD+1]; 1849 vm_paddr_t ma; 1850 int i; 1851#ifdef XEN 1852#ifdef PAE 1853 int npgptd = NPGPTD + 1; 1854#else 1855 int npgptd = NPGPTD; 1856#endif 1857#else 1858 int npgptd = NPGPTD; 1859#endif 1860 KASSERT(pmap->pm_stats.resident_count == 0, 1861 ("pmap_release: pmap resident count %ld != 0", 1862 pmap->pm_stats.resident_count)); 1863 PT_UPDATES_FLUSH(); 1864 1865 pmap_lazyfix(pmap); 1866 mtx_lock_spin(&allpmaps_lock); 1867 LIST_REMOVE(pmap, pm_list); 1868 mtx_unlock_spin(&allpmaps_lock); 1869 1870 for (i = 0; i < NPGPTD; i++) 1871 ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME); 1872 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1873#if defined(PAE) && defined(XEN) 1874 ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); 1875#endif 1876 1877 for (i = 0; i < npgptd; i++) { 1878 m = ptdpg[i]; 1879 ma = xpmap_ptom(VM_PAGE_TO_PHYS(m)); 1880 /* unpinning L1 and L2 treated the same */ 1881 xen_pgd_unpin(ma); 1882#ifdef PAE 1883 KASSERT(xpmap_ptom(VM_PAGE_TO_PHYS(m)) == (pmap->pm_pdpt[i] & PG_FRAME), 1884 ("pmap_release: got wrong ptd page")); 1885#endif 1886 m->wire_count--; 1887 atomic_subtract_int(&cnt.v_wire_count, 1); 1888 vm_page_free(m); 1889 } 1890 PMAP_LOCK_DESTROY(pmap); 1891} 1892 1893static int 1894kvm_size(SYSCTL_HANDLER_ARGS) 1895{ 1896 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1897 1898 return sysctl_handle_long(oidp, &ksize, 0, req); 1899} 1900SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1901 0, 0, kvm_size, "IU", "Size of KVM"); 1902 1903static int 1904kvm_free(SYSCTL_HANDLER_ARGS) 1905{ 1906 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1907 1908 return sysctl_handle_long(oidp, &kfree, 0, req); 1909} 1910SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1911 0, 0, kvm_free, "IU", "Amount of KVM free"); 1912 1913/* 1914 * grow the number of kernel page table entries, if needed 1915 */ 1916void 1917pmap_growkernel(vm_offset_t addr) 1918{ 1919 struct pmap *pmap; 1920 vm_paddr_t ptppaddr; 1921 vm_page_t nkpg; 1922 pd_entry_t newpdir; 1923 1924 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1925 if (kernel_vm_end == 0) { 1926 kernel_vm_end = KERNBASE; 1927 nkpt = 0; 1928 while (pdir_pde(PTD, kernel_vm_end)) { 1929 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1930 nkpt++; 1931 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1932 kernel_vm_end = kernel_map->max_offset; 1933 break; 1934 } 1935 } 1936 } 1937 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1938 if (addr - 1 >= kernel_map->max_offset) 1939 addr = kernel_map->max_offset; 1940 while (kernel_vm_end < addr) { 1941 if (pdir_pde(PTD, kernel_vm_end)) { 1942 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1943 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1944 kernel_vm_end = kernel_map->max_offset; 1945 break; 1946 } 1947 continue; 1948 } 1949 1950 /* 1951 * This index is bogus, but out of the way 1952 */ 1953 nkpg = vm_page_alloc(NULL, nkpt, 1954 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1955 if (!nkpg) 1956 panic("pmap_growkernel: no memory to grow kernel"); 1957 1958 nkpt++; 1959 1960 pmap_zero_page(nkpg); 1961 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1962 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1963 vm_page_lock_queues(); 1964 PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1965 mtx_lock_spin(&allpmaps_lock); 1966 LIST_FOREACH(pmap, &allpmaps, pm_list) 1967 PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1968 1969 mtx_unlock_spin(&allpmaps_lock); 1970 vm_page_unlock_queues(); 1971 1972 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1973 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1974 kernel_vm_end = kernel_map->max_offset; 1975 break; 1976 } 1977 } 1978} 1979 1980 1981/*************************************************** 1982 * page management routines. 1983 ***************************************************/ 1984 1985CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1986CTASSERT(_NPCM == 11); 1987 1988static __inline struct pv_chunk * 1989pv_to_chunk(pv_entry_t pv) 1990{ 1991 1992 return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); 1993} 1994 1995#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1996 1997#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1998#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1999 2000static uint32_t pc_freemask[11] = { 2001 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2002 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2003 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2004 PC_FREE0_9, PC_FREE10 2005}; 2006 2007SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 2008 "Current number of pv entries"); 2009 2010#ifdef PV_STATS 2011static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 2012 2013SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 2014 "Current number of pv entry chunks"); 2015SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 2016 "Current number of pv entry chunks allocated"); 2017SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 2018 "Current number of pv entry chunks frees"); 2019SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 2020 "Number of times tried to get a chunk page but failed."); 2021 2022static long pv_entry_frees, pv_entry_allocs; 2023static int pv_entry_spare; 2024 2025SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 2026 "Current number of pv entry frees"); 2027SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 2028 "Current number of pv entry allocs"); 2029SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 2030 "Current number of spare pv entries"); 2031 2032static int pmap_collect_inactive, pmap_collect_active; 2033 2034SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, 2035 "Current number times pmap_collect called on inactive queue"); 2036SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, 2037 "Current number times pmap_collect called on active queue"); 2038#endif 2039 2040/* 2041 * We are in a serious low memory condition. Resort to 2042 * drastic measures to free some pages so we can allocate 2043 * another pv entry chunk. This is normally called to 2044 * unmap inactive pages, and if necessary, active pages. 2045 */ 2046static void 2047pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) 2048{ 2049 pmap_t pmap; 2050 pt_entry_t *pte, tpte; 2051 pv_entry_t next_pv, pv; 2052 vm_offset_t va; 2053 vm_page_t m, free; 2054 2055 sched_pin(); 2056 TAILQ_FOREACH(m, &vpq->pl, pageq) { 2057 if (m->hold_count || m->busy) 2058 continue; 2059 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 2060 va = pv->pv_va; 2061 pmap = PV_PMAP(pv); 2062 /* Avoid deadlock and lock recursion. */ 2063 if (pmap > locked_pmap) 2064 PMAP_LOCK(pmap); 2065 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 2066 continue; 2067 pmap->pm_stats.resident_count--; 2068 pte = pmap_pte_quick(pmap, va); 2069 tpte = pte_load_clear(pte); 2070 KASSERT((tpte & PG_W) == 0, 2071 ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); 2072 if (tpte & PG_A) 2073 vm_page_flag_set(m, PG_REFERENCED); 2074 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2075 vm_page_dirty(m); 2076 free = NULL; 2077 pmap_unuse_pt(pmap, va, &free); 2078 pmap_invalidate_page(pmap, va); 2079 pmap_free_zero_pages(free); 2080 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2081 free_pv_entry(pmap, pv); 2082 if (pmap != locked_pmap) 2083 PMAP_UNLOCK(pmap); 2084 } 2085 if (TAILQ_EMPTY(&m->md.pv_list)) 2086 vm_page_flag_clear(m, PG_WRITEABLE); 2087 } 2088 sched_unpin(); 2089} 2090 2091 2092/* 2093 * free the pv_entry back to the free list 2094 */ 2095static void 2096free_pv_entry(pmap_t pmap, pv_entry_t pv) 2097{ 2098 vm_page_t m; 2099 struct pv_chunk *pc; 2100 int idx, field, bit; 2101 2102 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2103 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2104 PV_STAT(pv_entry_frees++); 2105 PV_STAT(pv_entry_spare++); 2106 pv_entry_count--; 2107 pc = pv_to_chunk(pv); 2108 idx = pv - &pc->pc_pventry[0]; 2109 field = idx / 32; 2110 bit = idx % 32; 2111 pc->pc_map[field] |= 1ul << bit; 2112 /* move to head of list */ 2113 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2114 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2115 for (idx = 0; idx < _NPCM; idx++) 2116 if (pc->pc_map[idx] != pc_freemask[idx]) 2117 return; 2118 PV_STAT(pv_entry_spare -= _NPCPV); 2119 PV_STAT(pc_chunk_count--); 2120 PV_STAT(pc_chunk_frees++); 2121 /* entire chunk is free, return it */ 2122 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2123 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2124 pmap_qremove((vm_offset_t)pc, 1); 2125 vm_page_unwire(m, 0); 2126 vm_page_free(m); 2127 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2128} 2129 2130/* 2131 * get a new pv_entry, allocating a block from the system 2132 * when needed. 2133 */ 2134static pv_entry_t 2135get_pv_entry(pmap_t pmap, int try) 2136{ 2137 static const struct timeval printinterval = { 60, 0 }; 2138 static struct timeval lastprint; 2139 static vm_pindex_t colour; 2140 struct vpgqueues *pq; 2141 int bit, field; 2142 pv_entry_t pv; 2143 struct pv_chunk *pc; 2144 vm_page_t m; 2145 2146 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2147 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2148 PV_STAT(pv_entry_allocs++); 2149 pv_entry_count++; 2150 if (pv_entry_count > pv_entry_high_water) 2151 if (ratecheck(&lastprint, &printinterval)) 2152 printf("Approaching the limit on PV entries, consider " 2153 "increasing either the vm.pmap.shpgperproc or the " 2154 "vm.pmap.pv_entry_max tunable.\n"); 2155 pq = NULL; 2156retry: 2157 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2158 if (pc != NULL) { 2159 for (field = 0; field < _NPCM; field++) { 2160 if (pc->pc_map[field]) { 2161 bit = bsfl(pc->pc_map[field]); 2162 break; 2163 } 2164 } 2165 if (field < _NPCM) { 2166 pv = &pc->pc_pventry[field * 32 + bit]; 2167 pc->pc_map[field] &= ~(1ul << bit); 2168 /* If this was the last item, move it to tail */ 2169 for (field = 0; field < _NPCM; field++) 2170 if (pc->pc_map[field] != 0) { 2171 PV_STAT(pv_entry_spare--); 2172 return (pv); /* not full, return */ 2173 } 2174 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2175 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2176 PV_STAT(pv_entry_spare--); 2177 return (pv); 2178 } 2179 } 2180 /* 2181 * Access to the ptelist "pv_vafree" is synchronized by the page 2182 * queues lock. If "pv_vafree" is currently non-empty, it will 2183 * remain non-empty until pmap_ptelist_alloc() completes. 2184 */ 2185 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == 2186 &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | 2187 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2188 if (try) { 2189 pv_entry_count--; 2190 PV_STAT(pc_chunk_tryfail++); 2191 return (NULL); 2192 } 2193 /* 2194 * Reclaim pv entries: At first, destroy mappings to 2195 * inactive pages. After that, if a pv chunk entry 2196 * is still needed, destroy mappings to active pages. 2197 */ 2198 if (pq == NULL) { 2199 PV_STAT(pmap_collect_inactive++); 2200 pq = &vm_page_queues[PQ_INACTIVE]; 2201 } else if (pq == &vm_page_queues[PQ_INACTIVE]) { 2202 PV_STAT(pmap_collect_active++); 2203 pq = &vm_page_queues[PQ_ACTIVE]; 2204 } else 2205 panic("get_pv_entry: increase vm.pmap.shpgperproc"); 2206 pmap_collect(pmap, pq); 2207 goto retry; 2208 } 2209 PV_STAT(pc_chunk_count++); 2210 PV_STAT(pc_chunk_allocs++); 2211 colour++; 2212 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2213 pmap_qenter((vm_offset_t)pc, &m, 1); 2214 if ((m->flags & PG_ZERO) == 0) 2215 pagezero(pc); 2216 pc->pc_pmap = pmap; 2217 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2218 for (field = 1; field < _NPCM; field++) 2219 pc->pc_map[field] = pc_freemask[field]; 2220 pv = &pc->pc_pventry[0]; 2221 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2222 PV_STAT(pv_entry_spare += _NPCPV - 1); 2223 return (pv); 2224} 2225 2226static __inline pv_entry_t 2227pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2228{ 2229 pv_entry_t pv; 2230 2231 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2232 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 2233 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2234 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2235 break; 2236 } 2237 } 2238 return (pv); 2239} 2240 2241static void 2242pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2243{ 2244 pv_entry_t pv; 2245 2246 pv = pmap_pvh_remove(pvh, pmap, va); 2247 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2248 free_pv_entry(pmap, pv); 2249} 2250 2251static void 2252pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2253{ 2254 2255 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2256 pmap_pvh_free(&m->md, pmap, va); 2257 if (TAILQ_EMPTY(&m->md.pv_list)) 2258 vm_page_flag_clear(m, PG_WRITEABLE); 2259} 2260 2261/* 2262 * Conditionally create a pv entry. 2263 */ 2264static boolean_t 2265pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2266{ 2267 pv_entry_t pv; 2268 2269 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2270 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2271 if (pv_entry_count < pv_entry_high_water && 2272 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2273 pv->pv_va = va; 2274 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2275 return (TRUE); 2276 } else 2277 return (FALSE); 2278} 2279 2280/* 2281 * pmap_remove_pte: do the things to unmap a page in a process 2282 */ 2283static int 2284pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2285{ 2286 pt_entry_t oldpte; 2287 vm_page_t m; 2288 2289 CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", 2290 pmap, (u_long)*ptq, va); 2291 2292 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2293 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2294 oldpte = *ptq; 2295 PT_SET_VA_MA(ptq, 0, TRUE); 2296 if (oldpte & PG_W) 2297 pmap->pm_stats.wired_count -= 1; 2298 /* 2299 * Machines that don't support invlpg, also don't support 2300 * PG_G. 2301 */ 2302 if (oldpte & PG_G) 2303 pmap_invalidate_page(kernel_pmap, va); 2304 pmap->pm_stats.resident_count -= 1; 2305 /* 2306 * XXX This is not strictly correctly, but somewhere along the line 2307 * we are losing the managed bit on some pages. It is unclear to me 2308 * why, but I think the most likely explanation is that xen's writable 2309 * page table implementation doesn't respect the unused bits. 2310 */ 2311 if ((oldpte & PG_MANAGED) || ((oldpte & PG_V) && (va < VM_MAXUSER_ADDRESS)) 2312 ) { 2313 m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); 2314 2315 if (!(oldpte & PG_MANAGED)) 2316 printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); 2317 2318 if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2319 vm_page_dirty(m); 2320 if (oldpte & PG_A) 2321 vm_page_flag_set(m, PG_REFERENCED); 2322 pmap_remove_entry(pmap, m, va); 2323 } else if ((va < VM_MAXUSER_ADDRESS) && (oldpte & PG_V)) 2324 printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); 2325 2326 return (pmap_unuse_pt(pmap, va, free)); 2327} 2328 2329/* 2330 * Remove a single page from a process address space 2331 */ 2332static void 2333pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2334{ 2335 pt_entry_t *pte; 2336 2337 CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", 2338 pmap, va); 2339 2340 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2341 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2342 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2343 if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0) 2344 return; 2345 pmap_remove_pte(pmap, pte, va, free); 2346 pmap_invalidate_page(pmap, va); 2347 if (*PMAP1) 2348 PT_SET_MA(PADDR1, 0); 2349 2350} 2351 2352/* 2353 * Remove the given range of addresses from the specified map. 2354 * 2355 * It is assumed that the start and end are properly 2356 * rounded to the page size. 2357 */ 2358void 2359pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2360{ 2361 vm_offset_t pdnxt; 2362 pd_entry_t ptpaddr; 2363 pt_entry_t *pte; 2364 vm_page_t free = NULL; 2365 int anyvalid; 2366 2367 CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", 2368 pmap, sva, eva); 2369 2370 /* 2371 * Perform an unsynchronized read. This is, however, safe. 2372 */ 2373 if (pmap->pm_stats.resident_count == 0) 2374 return; 2375 2376 anyvalid = 0; 2377 2378 vm_page_lock_queues(); 2379 sched_pin(); 2380 PMAP_LOCK(pmap); 2381 2382 /* 2383 * special handling of removing one page. a very 2384 * common operation and easy to short circuit some 2385 * code. 2386 */ 2387 if ((sva + PAGE_SIZE == eva) && 2388 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2389 pmap_remove_page(pmap, sva, &free); 2390 goto out; 2391 } 2392 2393 for (; sva < eva; sva = pdnxt) { 2394 unsigned pdirindex; 2395 2396 /* 2397 * Calculate index for next page table. 2398 */ 2399 pdnxt = (sva + NBPDR) & ~PDRMASK; 2400 if (pmap->pm_stats.resident_count == 0) 2401 break; 2402 2403 pdirindex = sva >> PDRSHIFT; 2404 ptpaddr = pmap->pm_pdir[pdirindex]; 2405 2406 /* 2407 * Weed out invalid mappings. Note: we assume that the page 2408 * directory table is always allocated, and in kernel virtual. 2409 */ 2410 if (ptpaddr == 0) 2411 continue; 2412 2413 /* 2414 * Check for large page. 2415 */ 2416 if ((ptpaddr & PG_PS) != 0) { 2417 PD_CLEAR_VA(pmap, pdirindex, TRUE); 2418 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2419 anyvalid = 1; 2420 continue; 2421 } 2422 2423 /* 2424 * Limit our scan to either the end of the va represented 2425 * by the current page table page, or to the end of the 2426 * range being removed. 2427 */ 2428 if (pdnxt > eva) 2429 pdnxt = eva; 2430 2431 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2432 sva += PAGE_SIZE) { 2433 if ((*pte & PG_V) == 0) 2434 continue; 2435 2436 /* 2437 * The TLB entry for a PG_G mapping is invalidated 2438 * by pmap_remove_pte(). 2439 */ 2440 if ((*pte & PG_G) == 0) 2441 anyvalid = 1; 2442 if (pmap_remove_pte(pmap, pte, sva, &free)) 2443 break; 2444 } 2445 } 2446 PT_UPDATES_FLUSH(); 2447 if (*PMAP1) 2448 PT_SET_VA_MA(PMAP1, 0, TRUE); 2449out: 2450 if (anyvalid) 2451 pmap_invalidate_all(pmap); 2452 sched_unpin(); 2453 vm_page_unlock_queues(); 2454 PMAP_UNLOCK(pmap); 2455 pmap_free_zero_pages(free); 2456} 2457 2458/* 2459 * Routine: pmap_remove_all 2460 * Function: 2461 * Removes this physical page from 2462 * all physical maps in which it resides. 2463 * Reflects back modify bits to the pager. 2464 * 2465 * Notes: 2466 * Original versions of this routine were very 2467 * inefficient because they iteratively called 2468 * pmap_remove (slow...) 2469 */ 2470 2471void 2472pmap_remove_all(vm_page_t m) 2473{ 2474 pv_entry_t pv; 2475 pmap_t pmap; 2476 pt_entry_t *pte, tpte; 2477 vm_page_t free; 2478 2479 KASSERT((m->flags & PG_FICTITIOUS) == 0, 2480 ("pmap_remove_all: page %p is fictitious", m)); 2481 free = NULL; 2482 vm_page_lock_queues(); 2483 sched_pin(); 2484 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2485 pmap = PV_PMAP(pv); 2486 PMAP_LOCK(pmap); 2487 pmap->pm_stats.resident_count--; 2488 pte = pmap_pte_quick(pmap, pv->pv_va); 2489 2490 tpte = *pte; 2491 PT_SET_VA_MA(pte, 0, TRUE); 2492 if (tpte & PG_W) 2493 pmap->pm_stats.wired_count--; 2494 if (tpte & PG_A) 2495 vm_page_flag_set(m, PG_REFERENCED); 2496 2497 /* 2498 * Update the vm_page_t clean and reference bits. 2499 */ 2500 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2501 vm_page_dirty(m); 2502 pmap_unuse_pt(pmap, pv->pv_va, &free); 2503 pmap_invalidate_page(pmap, pv->pv_va); 2504 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2505 free_pv_entry(pmap, pv); 2506 PMAP_UNLOCK(pmap); 2507 } 2508 vm_page_flag_clear(m, PG_WRITEABLE); 2509 PT_UPDATES_FLUSH(); 2510 if (*PMAP1) 2511 PT_SET_MA(PADDR1, 0); 2512 sched_unpin(); 2513 vm_page_unlock_queues(); 2514 pmap_free_zero_pages(free); 2515} 2516 2517/* 2518 * Set the physical protection on the 2519 * specified range of this map as requested. 2520 */ 2521void 2522pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2523{ 2524 vm_offset_t pdnxt; 2525 pd_entry_t ptpaddr; 2526 pt_entry_t *pte; 2527 int anychanged; 2528 2529 CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", 2530 pmap, sva, eva, prot); 2531 2532 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2533 pmap_remove(pmap, sva, eva); 2534 return; 2535 } 2536 2537#ifdef PAE 2538 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2539 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2540 return; 2541#else 2542 if (prot & VM_PROT_WRITE) 2543 return; 2544#endif 2545 2546 anychanged = 0; 2547 2548 vm_page_lock_queues(); 2549 sched_pin(); 2550 PMAP_LOCK(pmap); 2551 for (; sva < eva; sva = pdnxt) { 2552 pt_entry_t obits, pbits; 2553 unsigned pdirindex; 2554 2555 pdnxt = (sva + NBPDR) & ~PDRMASK; 2556 2557 pdirindex = sva >> PDRSHIFT; 2558 ptpaddr = pmap->pm_pdir[pdirindex]; 2559 2560 /* 2561 * Weed out invalid mappings. Note: we assume that the page 2562 * directory table is always allocated, and in kernel virtual. 2563 */ 2564 if (ptpaddr == 0) 2565 continue; 2566 2567 /* 2568 * Check for large page. 2569 */ 2570 if ((ptpaddr & PG_PS) != 0) { 2571 if ((prot & VM_PROT_WRITE) == 0) 2572 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2573#ifdef PAE 2574 if ((prot & VM_PROT_EXECUTE) == 0) 2575 pmap->pm_pdir[pdirindex] |= pg_nx; 2576#endif 2577 anychanged = 1; 2578 continue; 2579 } 2580 2581 if (pdnxt > eva) 2582 pdnxt = eva; 2583 2584 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2585 sva += PAGE_SIZE) { 2586 vm_page_t m; 2587 2588retry: 2589 /* 2590 * Regardless of whether a pte is 32 or 64 bits in 2591 * size, PG_RW, PG_A, and PG_M are among the least 2592 * significant 32 bits. 2593 */ 2594 obits = pbits = *pte; 2595 if ((pbits & PG_V) == 0) 2596 continue; 2597 2598 if ((prot & VM_PROT_WRITE) == 0) { 2599 if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == 2600 (PG_MANAGED | PG_M | PG_RW)) { 2601 m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & 2602 PG_FRAME); 2603 vm_page_dirty(m); 2604 } 2605 pbits &= ~(PG_RW | PG_M); 2606 } 2607#ifdef PAE 2608 if ((prot & VM_PROT_EXECUTE) == 0) 2609 pbits |= pg_nx; 2610#endif 2611 2612 if (pbits != obits) { 2613#ifdef XEN 2614 obits = *pte; 2615 PT_SET_VA_MA(pte, pbits, TRUE); 2616 if (*pte != pbits) 2617 goto retry; 2618#else 2619#ifdef PAE 2620 if (!atomic_cmpset_64(pte, obits, pbits)) 2621 goto retry; 2622#else 2623 if (!atomic_cmpset_int((u_int *)pte, obits, 2624 pbits)) 2625 goto retry; 2626#endif 2627#endif 2628 if (obits & PG_G) 2629 pmap_invalidate_page(pmap, sva); 2630 else 2631 anychanged = 1; 2632 } 2633 } 2634 } 2635 PT_UPDATES_FLUSH(); 2636 if (*PMAP1) 2637 PT_SET_VA_MA(PMAP1, 0, TRUE); 2638 if (anychanged) 2639 pmap_invalidate_all(pmap); 2640 sched_unpin(); 2641 vm_page_unlock_queues(); 2642 PMAP_UNLOCK(pmap); 2643} 2644 2645/* 2646 * Insert the given physical page (p) at 2647 * the specified virtual address (v) in the 2648 * target physical map with the protection requested. 2649 * 2650 * If specified, the page will be wired down, meaning 2651 * that the related pte can not be reclaimed. 2652 * 2653 * NB: This is the only routine which MAY NOT lazy-evaluate 2654 * or lose information. That is, this routine must actually 2655 * insert this page into the given map NOW. 2656 */ 2657void 2658pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2659 vm_prot_t prot, boolean_t wired) 2660{ 2661 pd_entry_t *pde; 2662 pt_entry_t *pte; 2663 pt_entry_t newpte, origpte; 2664 pv_entry_t pv; 2665 vm_paddr_t opa, pa; 2666 vm_page_t mpte, om; 2667 boolean_t invlva; 2668 2669 CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d", 2670 pmap, va, access, xpmap_ptom(VM_PAGE_TO_PHYS(m)), prot, wired); 2671 va = trunc_page(va); 2672 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2673 KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 2674 ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", 2675 va)); 2676 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 2677 (m->oflags & VPO_BUSY) != 0, 2678 ("pmap_enter: page %p is not busy", m)); 2679 2680 mpte = NULL; 2681 2682 vm_page_lock_queues(); 2683 PMAP_LOCK(pmap); 2684 sched_pin(); 2685 2686 /* 2687 * In the case that a page table page is not 2688 * resident, we are creating it here. 2689 */ 2690 if (va < VM_MAXUSER_ADDRESS) { 2691 mpte = pmap_allocpte(pmap, va, M_WAITOK); 2692 } 2693 2694 pde = pmap_pde(pmap, va); 2695 if ((*pde & PG_PS) != 0) 2696 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2697 pte = pmap_pte_quick(pmap, va); 2698 2699 /* 2700 * Page Directory table entry not valid, we need a new PT page 2701 */ 2702 if (pte == NULL) { 2703 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 2704 (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); 2705 } 2706 2707 pa = VM_PAGE_TO_PHYS(m); 2708 om = NULL; 2709 opa = origpte = 0; 2710 2711#if 0 2712 KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx", 2713 pte, *pte)); 2714#endif 2715 origpte = *pte; 2716 if (origpte) 2717 origpte = xpmap_mtop(origpte); 2718 opa = origpte & PG_FRAME; 2719 2720 /* 2721 * Mapping has not changed, must be protection or wiring change. 2722 */ 2723 if (origpte && (opa == pa)) { 2724 /* 2725 * Wiring change, just update stats. We don't worry about 2726 * wiring PT pages as they remain resident as long as there 2727 * are valid mappings in them. Hence, if a user page is wired, 2728 * the PT page will be also. 2729 */ 2730 if (wired && ((origpte & PG_W) == 0)) 2731 pmap->pm_stats.wired_count++; 2732 else if (!wired && (origpte & PG_W)) 2733 pmap->pm_stats.wired_count--; 2734 2735 /* 2736 * Remove extra pte reference 2737 */ 2738 if (mpte) 2739 mpte->wire_count--; 2740 2741 if (origpte & PG_MANAGED) { 2742 om = m; 2743 pa |= PG_MANAGED; 2744 } 2745 goto validate; 2746 } 2747 2748 pv = NULL; 2749 2750 /* 2751 * Mapping has changed, invalidate old range and fall through to 2752 * handle validating new mapping. 2753 */ 2754 if (opa) { 2755 if (origpte & PG_W) 2756 pmap->pm_stats.wired_count--; 2757 if (origpte & PG_MANAGED) { 2758 om = PHYS_TO_VM_PAGE(opa); 2759 pv = pmap_pvh_remove(&om->md, pmap, va); 2760 } else if (va < VM_MAXUSER_ADDRESS) 2761 printf("va=0x%x is unmanaged :-( \n", va); 2762 2763 if (mpte != NULL) { 2764 mpte->wire_count--; 2765 KASSERT(mpte->wire_count > 0, 2766 ("pmap_enter: missing reference to page table page," 2767 " va: 0x%x", va)); 2768 } 2769 } else 2770 pmap->pm_stats.resident_count++; 2771 2772 /* 2773 * Enter on the PV list if part of our managed memory. 2774 */ 2775 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 2776 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 2777 ("pmap_enter: managed mapping within the clean submap")); 2778 if (pv == NULL) 2779 pv = get_pv_entry(pmap, FALSE); 2780 pv->pv_va = va; 2781 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2782 pa |= PG_MANAGED; 2783 } else if (pv != NULL) 2784 free_pv_entry(pmap, pv); 2785 2786 /* 2787 * Increment counters 2788 */ 2789 if (wired) 2790 pmap->pm_stats.wired_count++; 2791 2792validate: 2793 /* 2794 * Now validate mapping with desired protection/wiring. 2795 */ 2796 newpte = (pt_entry_t)(pa | PG_V); 2797 if ((prot & VM_PROT_WRITE) != 0) { 2798 newpte |= PG_RW; 2799 if ((newpte & PG_MANAGED) != 0) 2800 vm_page_flag_set(m, PG_WRITEABLE); 2801 } 2802#ifdef PAE 2803 if ((prot & VM_PROT_EXECUTE) == 0) 2804 newpte |= pg_nx; 2805#endif 2806 if (wired) 2807 newpte |= PG_W; 2808 if (va < VM_MAXUSER_ADDRESS) 2809 newpte |= PG_U; 2810 if (pmap == kernel_pmap) 2811 newpte |= pgeflag; 2812 2813 critical_enter(); 2814 /* 2815 * if the mapping or permission bits are different, we need 2816 * to update the pte. 2817 */ 2818 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2819 if (origpte) { 2820 invlva = FALSE; 2821 origpte = *pte; 2822 PT_SET_VA(pte, newpte | PG_A, FALSE); 2823 if (origpte & PG_A) { 2824 if (origpte & PG_MANAGED) 2825 vm_page_flag_set(om, PG_REFERENCED); 2826 if (opa != VM_PAGE_TO_PHYS(m)) 2827 invlva = TRUE; 2828#ifdef PAE 2829 if ((origpte & PG_NX) == 0 && 2830 (newpte & PG_NX) != 0) 2831 invlva = TRUE; 2832#endif 2833 } 2834 if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 2835 if ((origpte & PG_MANAGED) != 0) 2836 vm_page_dirty(om); 2837 if ((prot & VM_PROT_WRITE) == 0) 2838 invlva = TRUE; 2839 } 2840 if ((origpte & PG_MANAGED) != 0 && 2841 TAILQ_EMPTY(&om->md.pv_list)) 2842 vm_page_flag_clear(om, PG_WRITEABLE); 2843 if (invlva) 2844 pmap_invalidate_page(pmap, va); 2845 } else{ 2846 PT_SET_VA(pte, newpte | PG_A, FALSE); 2847 } 2848 2849 } 2850 PT_UPDATES_FLUSH(); 2851 critical_exit(); 2852 if (*PMAP1) 2853 PT_SET_VA_MA(PMAP1, 0, TRUE); 2854 sched_unpin(); 2855 vm_page_unlock_queues(); 2856 PMAP_UNLOCK(pmap); 2857} 2858 2859/* 2860 * Maps a sequence of resident pages belonging to the same object. 2861 * The sequence begins with the given page m_start. This page is 2862 * mapped at the given virtual address start. Each subsequent page is 2863 * mapped at a virtual address that is offset from start by the same 2864 * amount as the page is offset from m_start within the object. The 2865 * last page in the sequence is the page with the largest offset from 2866 * m_start that can be mapped at a virtual address less than the given 2867 * virtual address end. Not every virtual page between start and end 2868 * is mapped; only those for which a resident page exists with the 2869 * corresponding offset from m_start are mapped. 2870 */ 2871void 2872pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2873 vm_page_t m_start, vm_prot_t prot) 2874{ 2875 vm_page_t m, mpte; 2876 vm_pindex_t diff, psize; 2877 multicall_entry_t mcl[16]; 2878 multicall_entry_t *mclp = mcl; 2879 int error, count = 0; 2880 2881 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2882 psize = atop(end - start); 2883 2884 mpte = NULL; 2885 m = m_start; 2886 vm_page_lock_queues(); 2887 PMAP_LOCK(pmap); 2888 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2889 mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, 2890 prot, mpte); 2891 m = TAILQ_NEXT(m, listq); 2892 if (count == 16) { 2893 error = HYPERVISOR_multicall(mcl, count); 2894 KASSERT(error == 0, ("bad multicall %d", error)); 2895 mclp = mcl; 2896 count = 0; 2897 } 2898 } 2899 if (count) { 2900 error = HYPERVISOR_multicall(mcl, count); 2901 KASSERT(error == 0, ("bad multicall %d", error)); 2902 } 2903 vm_page_unlock_queues(); 2904 PMAP_UNLOCK(pmap); 2905} 2906 2907/* 2908 * this code makes some *MAJOR* assumptions: 2909 * 1. Current pmap & pmap exists. 2910 * 2. Not wired. 2911 * 3. Read access. 2912 * 4. No page table pages. 2913 * but is *MUCH* faster than pmap_enter... 2914 */ 2915 2916void 2917pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2918{ 2919 multicall_entry_t mcl, *mclp; 2920 int count = 0; 2921 mclp = &mcl; 2922 2923 CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", 2924 pmap, va, m, prot); 2925 2926 vm_page_lock_queues(); 2927 PMAP_LOCK(pmap); 2928 (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); 2929 if (count) 2930 HYPERVISOR_multicall(&mcl, count); 2931 vm_page_unlock_queues(); 2932 PMAP_UNLOCK(pmap); 2933} 2934 2935#ifdef notyet 2936void 2937pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count) 2938{ 2939 int i, error, index = 0; 2940 multicall_entry_t mcl[16]; 2941 multicall_entry_t *mclp = mcl; 2942 2943 PMAP_LOCK(pmap); 2944 for (i = 0; i < count; i++, addrs++, pages++, prots++) { 2945 if (!pmap_is_prefaultable_locked(pmap, *addrs)) 2946 continue; 2947 2948 (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL); 2949 if (index == 16) { 2950 error = HYPERVISOR_multicall(mcl, index); 2951 mclp = mcl; 2952 index = 0; 2953 KASSERT(error == 0, ("bad multicall %d", error)); 2954 } 2955 } 2956 if (index) { 2957 error = HYPERVISOR_multicall(mcl, index); 2958 KASSERT(error == 0, ("bad multicall %d", error)); 2959 } 2960 2961 PMAP_UNLOCK(pmap); 2962} 2963#endif 2964 2965static vm_page_t 2966pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, 2967 vm_prot_t prot, vm_page_t mpte) 2968{ 2969 pt_entry_t *pte; 2970 vm_paddr_t pa; 2971 vm_page_t free; 2972 multicall_entry_t *mcl = *mclpp; 2973 2974 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2975 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 2976 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2977 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2978 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2979 2980 /* 2981 * In the case that a page table page is not 2982 * resident, we are creating it here. 2983 */ 2984 if (va < VM_MAXUSER_ADDRESS) { 2985 unsigned ptepindex; 2986 pd_entry_t ptema; 2987 2988 /* 2989 * Calculate pagetable page index 2990 */ 2991 ptepindex = va >> PDRSHIFT; 2992 if (mpte && (mpte->pindex == ptepindex)) { 2993 mpte->wire_count++; 2994 } else { 2995 /* 2996 * Get the page directory entry 2997 */ 2998 ptema = pmap->pm_pdir[ptepindex]; 2999 3000 /* 3001 * If the page table page is mapped, we just increment 3002 * the hold count, and activate it. 3003 */ 3004 if (ptema & PG_V) { 3005 if (ptema & PG_PS) 3006 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 3007 mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 3008 mpte->wire_count++; 3009 } else { 3010 mpte = _pmap_allocpte(pmap, ptepindex, 3011 M_NOWAIT); 3012 if (mpte == NULL) 3013 return (mpte); 3014 } 3015 } 3016 } else { 3017 mpte = NULL; 3018 } 3019 3020 /* 3021 * This call to vtopte makes the assumption that we are 3022 * entering the page into the current pmap. In order to support 3023 * quick entry into any pmap, one would likely use pmap_pte_quick. 3024 * But that isn't as quick as vtopte. 3025 */ 3026 KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap")); 3027 pte = vtopte(va); 3028 if (*pte & PG_V) { 3029 if (mpte != NULL) { 3030 mpte->wire_count--; 3031 mpte = NULL; 3032 } 3033 return (mpte); 3034 } 3035 3036 /* 3037 * Enter on the PV list if part of our managed memory. 3038 */ 3039 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 3040 !pmap_try_insert_pv_entry(pmap, va, m)) { 3041 if (mpte != NULL) { 3042 free = NULL; 3043 if (pmap_unwire_pte_hold(pmap, mpte, &free)) { 3044 pmap_invalidate_page(pmap, va); 3045 pmap_free_zero_pages(free); 3046 } 3047 3048 mpte = NULL; 3049 } 3050 return (mpte); 3051 } 3052 3053 /* 3054 * Increment counters 3055 */ 3056 pmap->pm_stats.resident_count++; 3057 3058 pa = VM_PAGE_TO_PHYS(m); 3059#ifdef PAE 3060 if ((prot & VM_PROT_EXECUTE) == 0) 3061 pa |= pg_nx; 3062#endif 3063 3064#if 0 3065 /* 3066 * Now validate mapping with RO protection 3067 */ 3068 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3069 pte_store(pte, pa | PG_V | PG_U); 3070 else 3071 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3072#else 3073 /* 3074 * Now validate mapping with RO protection 3075 */ 3076 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3077 pa = xpmap_ptom(pa | PG_V | PG_U); 3078 else 3079 pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED); 3080 3081 mcl->op = __HYPERVISOR_update_va_mapping; 3082 mcl->args[0] = va; 3083 mcl->args[1] = (uint32_t)(pa & 0xffffffff); 3084 mcl->args[2] = (uint32_t)(pa >> 32); 3085 mcl->args[3] = 0; 3086 *mclpp = mcl + 1; 3087 *count = *count + 1; 3088#endif 3089 return mpte; 3090} 3091 3092/* 3093 * Make a temporary mapping for a physical address. This is only intended 3094 * to be used for panic dumps. 3095 */ 3096void * 3097pmap_kenter_temporary(vm_paddr_t pa, int i) 3098{ 3099 vm_offset_t va; 3100 vm_paddr_t ma = xpmap_ptom(pa); 3101 3102 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3103 PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag); 3104 invlpg(va); 3105 return ((void *)crashdumpmap); 3106} 3107 3108/* 3109 * This code maps large physical mmap regions into the 3110 * processor address space. Note that some shortcuts 3111 * are taken, but the code works. 3112 */ 3113void 3114pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 3115 vm_object_t object, vm_pindex_t pindex, 3116 vm_size_t size) 3117{ 3118 pd_entry_t *pde; 3119 vm_paddr_t pa, ptepa; 3120 vm_page_t p; 3121 int pat_mode; 3122 3123 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3124 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3125 ("pmap_object_init_pt: non-device object")); 3126 if (pseflag && 3127 (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3128 if (!vm_object_populate(object, pindex, pindex + atop(size))) 3129 return; 3130 p = vm_page_lookup(object, pindex); 3131 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3132 ("pmap_object_init_pt: invalid page %p", p)); 3133 pat_mode = p->md.pat_mode; 3134 /* 3135 * Abort the mapping if the first page is not physically 3136 * aligned to a 2/4MB page boundary. 3137 */ 3138 ptepa = VM_PAGE_TO_PHYS(p); 3139 if (ptepa & (NBPDR - 1)) 3140 return; 3141 /* 3142 * Skip the first page. Abort the mapping if the rest of 3143 * the pages are not physically contiguous or have differing 3144 * memory attributes. 3145 */ 3146 p = TAILQ_NEXT(p, listq); 3147 for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3148 pa += PAGE_SIZE) { 3149 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3150 ("pmap_object_init_pt: invalid page %p", p)); 3151 if (pa != VM_PAGE_TO_PHYS(p) || 3152 pat_mode != p->md.pat_mode) 3153 return; 3154 p = TAILQ_NEXT(p, listq); 3155 } 3156 /* Map using 2/4MB pages. */ 3157 PMAP_LOCK(pmap); 3158 for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3159 size; pa += NBPDR) { 3160 pde = pmap_pde(pmap, addr); 3161 if (*pde == 0) { 3162 pde_store(pde, pa | PG_PS | PG_M | PG_A | 3163 PG_U | PG_RW | PG_V); 3164 pmap->pm_stats.resident_count += NBPDR / 3165 PAGE_SIZE; 3166 pmap_pde_mappings++; 3167 } 3168 /* Else continue on if the PDE is already valid. */ 3169 addr += NBPDR; 3170 } 3171 PMAP_UNLOCK(pmap); 3172 } 3173} 3174 3175/* 3176 * Routine: pmap_change_wiring 3177 * Function: Change the wiring attribute for a map/virtual-address 3178 * pair. 3179 * In/out conditions: 3180 * The mapping must already exist in the pmap. 3181 */ 3182void 3183pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3184{ 3185 pt_entry_t *pte; 3186 3187 vm_page_lock_queues(); 3188 PMAP_LOCK(pmap); 3189 pte = pmap_pte(pmap, va); 3190 3191 if (wired && !pmap_pte_w(pte)) { 3192 PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); 3193 pmap->pm_stats.wired_count++; 3194 } else if (!wired && pmap_pte_w(pte)) { 3195 PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); 3196 pmap->pm_stats.wired_count--; 3197 } 3198 3199 /* 3200 * Wiring is not a hardware characteristic so there is no need to 3201 * invalidate TLB. 3202 */ 3203 pmap_pte_release(pte); 3204 PMAP_UNLOCK(pmap); 3205 vm_page_unlock_queues(); 3206} 3207 3208 3209 3210/* 3211 * Copy the range specified by src_addr/len 3212 * from the source map to the range dst_addr/len 3213 * in the destination map. 3214 * 3215 * This routine is only advisory and need not do anything. 3216 */ 3217 3218void 3219pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3220 vm_offset_t src_addr) 3221{ 3222 vm_page_t free; 3223 vm_offset_t addr; 3224 vm_offset_t end_addr = src_addr + len; 3225 vm_offset_t pdnxt; 3226 3227 if (dst_addr != src_addr) 3228 return; 3229 3230 if (!pmap_is_current(src_pmap)) { 3231 CTR2(KTR_PMAP, 3232 "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx", 3233 (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME)); 3234 3235 return; 3236 } 3237 CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", 3238 dst_pmap, src_pmap, dst_addr, len, src_addr); 3239 3240 vm_page_lock_queues(); 3241 if (dst_pmap < src_pmap) { 3242 PMAP_LOCK(dst_pmap); 3243 PMAP_LOCK(src_pmap); 3244 } else { 3245 PMAP_LOCK(src_pmap); 3246 PMAP_LOCK(dst_pmap); 3247 } 3248 sched_pin(); 3249 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3250 pt_entry_t *src_pte, *dst_pte; 3251 vm_page_t dstmpte, srcmpte; 3252 pd_entry_t srcptepaddr; 3253 unsigned ptepindex; 3254 3255 KASSERT(addr < UPT_MIN_ADDRESS, 3256 ("pmap_copy: invalid to pmap_copy page tables")); 3257 3258 pdnxt = (addr + NBPDR) & ~PDRMASK; 3259 ptepindex = addr >> PDRSHIFT; 3260 3261 srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); 3262 if (srcptepaddr == 0) 3263 continue; 3264 3265 if (srcptepaddr & PG_PS) { 3266 if (dst_pmap->pm_pdir[ptepindex] == 0) { 3267 PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE); 3268 dst_pmap->pm_stats.resident_count += 3269 NBPDR / PAGE_SIZE; 3270 } 3271 continue; 3272 } 3273 3274 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3275 KASSERT(srcmpte->wire_count > 0, 3276 ("pmap_copy: source page table page is unused")); 3277 3278 if (pdnxt > end_addr) 3279 pdnxt = end_addr; 3280 3281 src_pte = vtopte(addr); 3282 while (addr < pdnxt) { 3283 pt_entry_t ptetemp; 3284 ptetemp = *src_pte; 3285 /* 3286 * we only virtual copy managed pages 3287 */ 3288 if ((ptetemp & PG_MANAGED) != 0) { 3289 dstmpte = pmap_allocpte(dst_pmap, addr, 3290 M_NOWAIT); 3291 if (dstmpte == NULL) 3292 break; 3293 dst_pte = pmap_pte_quick(dst_pmap, addr); 3294 if (*dst_pte == 0 && 3295 pmap_try_insert_pv_entry(dst_pmap, addr, 3296 PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) { 3297 /* 3298 * Clear the wired, modified, and 3299 * accessed (referenced) bits 3300 * during the copy. 3301 */ 3302 KASSERT(ptetemp != 0, ("src_pte not set")); 3303 PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */); 3304 KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)), 3305 ("no pmap copy expected: 0x%jx saw: 0x%jx", 3306 ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte)); 3307 dst_pmap->pm_stats.resident_count++; 3308 } else { 3309 free = NULL; 3310 if (pmap_unwire_pte_hold(dst_pmap, 3311 dstmpte, &free)) { 3312 pmap_invalidate_page(dst_pmap, 3313 addr); 3314 pmap_free_zero_pages(free); 3315 } 3316 } 3317 if (dstmpte->wire_count >= srcmpte->wire_count) 3318 break; 3319 } 3320 addr += PAGE_SIZE; 3321 src_pte++; 3322 } 3323 } 3324 PT_UPDATES_FLUSH(); 3325 sched_unpin(); 3326 vm_page_unlock_queues(); 3327 PMAP_UNLOCK(src_pmap); 3328 PMAP_UNLOCK(dst_pmap); 3329} 3330 3331static __inline void 3332pagezero(void *page) 3333{ 3334#if defined(I686_CPU) 3335 if (cpu_class == CPUCLASS_686) { 3336#if defined(CPU_ENABLE_SSE) 3337 if (cpu_feature & CPUID_SSE2) 3338 sse2_pagezero(page); 3339 else 3340#endif 3341 i686_pagezero(page); 3342 } else 3343#endif 3344 bzero(page, PAGE_SIZE); 3345} 3346 3347/* 3348 * pmap_zero_page zeros the specified hardware page by mapping 3349 * the page into KVM and using bzero to clear its contents. 3350 */ 3351void 3352pmap_zero_page(vm_page_t m) 3353{ 3354 struct sysmaps *sysmaps; 3355 3356 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3357 mtx_lock(&sysmaps->lock); 3358 if (*sysmaps->CMAP2) 3359 panic("pmap_zero_page: CMAP2 busy"); 3360 sched_pin(); 3361 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3362 pagezero(sysmaps->CADDR2); 3363 PT_SET_MA(sysmaps->CADDR2, 0); 3364 sched_unpin(); 3365 mtx_unlock(&sysmaps->lock); 3366} 3367 3368/* 3369 * pmap_zero_page_area zeros the specified hardware page by mapping 3370 * the page into KVM and using bzero to clear its contents. 3371 * 3372 * off and size may not cover an area beyond a single hardware page. 3373 */ 3374void 3375pmap_zero_page_area(vm_page_t m, int off, int size) 3376{ 3377 struct sysmaps *sysmaps; 3378 3379 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3380 mtx_lock(&sysmaps->lock); 3381 if (*sysmaps->CMAP2) 3382 panic("pmap_zero_page: CMAP2 busy"); 3383 sched_pin(); 3384 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3385 3386 if (off == 0 && size == PAGE_SIZE) 3387 pagezero(sysmaps->CADDR2); 3388 else 3389 bzero((char *)sysmaps->CADDR2 + off, size); 3390 PT_SET_MA(sysmaps->CADDR2, 0); 3391 sched_unpin(); 3392 mtx_unlock(&sysmaps->lock); 3393} 3394 3395/* 3396 * pmap_zero_page_idle zeros the specified hardware page by mapping 3397 * the page into KVM and using bzero to clear its contents. This 3398 * is intended to be called from the vm_pagezero process only and 3399 * outside of Giant. 3400 */ 3401void 3402pmap_zero_page_idle(vm_page_t m) 3403{ 3404 3405 if (*CMAP3) 3406 panic("pmap_zero_page: CMAP3 busy"); 3407 sched_pin(); 3408 PT_SET_MA(CADDR3, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3409 pagezero(CADDR3); 3410 PT_SET_MA(CADDR3, 0); 3411 sched_unpin(); 3412} 3413 3414/* 3415 * pmap_copy_page copies the specified (machine independent) 3416 * page by mapping the page into virtual memory and using 3417 * bcopy to copy the page, one machine dependent page at a 3418 * time. 3419 */ 3420void 3421pmap_copy_page(vm_page_t src, vm_page_t dst) 3422{ 3423 struct sysmaps *sysmaps; 3424 3425 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3426 mtx_lock(&sysmaps->lock); 3427 if (*sysmaps->CMAP1) 3428 panic("pmap_copy_page: CMAP1 busy"); 3429 if (*sysmaps->CMAP2) 3430 panic("pmap_copy_page: CMAP2 busy"); 3431 sched_pin(); 3432 PT_SET_MA(sysmaps->CADDR1, PG_V | xpmap_ptom(VM_PAGE_TO_PHYS(src)) | PG_A); 3433 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(dst)) | PG_A | PG_M); 3434 bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3435 PT_SET_MA(sysmaps->CADDR1, 0); 3436 PT_SET_MA(sysmaps->CADDR2, 0); 3437 sched_unpin(); 3438 mtx_unlock(&sysmaps->lock); 3439} 3440 3441/* 3442 * Returns true if the pmap's pv is one of the first 3443 * 16 pvs linked to from this page. This count may 3444 * be changed upwards or downwards in the future; it 3445 * is only necessary that true be returned for a small 3446 * subset of pmaps for proper page aging. 3447 */ 3448boolean_t 3449pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3450{ 3451 pv_entry_t pv; 3452 int loops = 0; 3453 boolean_t rv; 3454 3455 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3456 ("pmap_page_exists_quick: page %p is not managed", m)); 3457 rv = FALSE; 3458 vm_page_lock_queues(); 3459 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3460 if (PV_PMAP(pv) == pmap) { 3461 rv = TRUE; 3462 break; 3463 } 3464 loops++; 3465 if (loops >= 16) 3466 break; 3467 } 3468 vm_page_unlock_queues(); 3469 return (rv); 3470} 3471 3472/* 3473 * pmap_page_wired_mappings: 3474 * 3475 * Return the number of managed mappings to the given physical page 3476 * that are wired. 3477 */ 3478int 3479pmap_page_wired_mappings(vm_page_t m) 3480{ 3481 pv_entry_t pv; 3482 pt_entry_t *pte; 3483 pmap_t pmap; 3484 int count; 3485 3486 count = 0; 3487 if ((m->flags & PG_FICTITIOUS) != 0) 3488 return (count); 3489 vm_page_lock_queues(); 3490 sched_pin(); 3491 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3492 pmap = PV_PMAP(pv); 3493 PMAP_LOCK(pmap); 3494 pte = pmap_pte_quick(pmap, pv->pv_va); 3495 if ((*pte & PG_W) != 0) 3496 count++; 3497 PMAP_UNLOCK(pmap); 3498 } 3499 sched_unpin(); 3500 vm_page_unlock_queues(); 3501 return (count); 3502} 3503 3504/* 3505 * Returns TRUE if the given page is mapped individually or as part of 3506 * a 4mpage. Otherwise, returns FALSE. 3507 */ 3508boolean_t 3509pmap_page_is_mapped(vm_page_t m) 3510{ 3511 boolean_t rv; 3512 3513 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 3514 return (FALSE); 3515 vm_page_lock_queues(); 3516 rv = !TAILQ_EMPTY(&m->md.pv_list) || 3517 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); 3518 vm_page_unlock_queues(); 3519 return (rv); 3520} 3521 3522/* 3523 * Remove all pages from specified address space 3524 * this aids process exit speeds. Also, this code 3525 * is special cased for current process only, but 3526 * can have the more generic (and slightly slower) 3527 * mode enabled. This is much faster than pmap_remove 3528 * in the case of running down an entire address space. 3529 */ 3530void 3531pmap_remove_pages(pmap_t pmap) 3532{ 3533 pt_entry_t *pte, tpte; 3534 vm_page_t m, free = NULL; 3535 pv_entry_t pv; 3536 struct pv_chunk *pc, *npc; 3537 int field, idx; 3538 int32_t bit; 3539 uint32_t inuse, bitmask; 3540 int allfree; 3541 3542 CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); 3543 3544 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 3545 printf("warning: pmap_remove_pages called with non-current pmap\n"); 3546 return; 3547 } 3548 vm_page_lock_queues(); 3549 KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); 3550 PMAP_LOCK(pmap); 3551 sched_pin(); 3552 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3553 allfree = 1; 3554 for (field = 0; field < _NPCM; field++) { 3555 inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 3556 while (inuse != 0) { 3557 bit = bsfl(inuse); 3558 bitmask = 1UL << bit; 3559 idx = field * 32 + bit; 3560 pv = &pc->pc_pventry[idx]; 3561 inuse &= ~bitmask; 3562 3563 pte = vtopte(pv->pv_va); 3564 tpte = *pte ? xpmap_mtop(*pte) : 0; 3565 3566 if (tpte == 0) { 3567 printf( 3568 "TPTE at %p IS ZERO @ VA %08x\n", 3569 pte, pv->pv_va); 3570 panic("bad pte"); 3571 } 3572 3573/* 3574 * We cannot remove wired pages from a process' mapping at this time 3575 */ 3576 if (tpte & PG_W) { 3577 allfree = 0; 3578 continue; 3579 } 3580 3581 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 3582 KASSERT(m->phys_addr == (tpte & PG_FRAME), 3583 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3584 m, (uintmax_t)m->phys_addr, 3585 (uintmax_t)tpte)); 3586 3587 KASSERT(m < &vm_page_array[vm_page_array_size], 3588 ("pmap_remove_pages: bad tpte %#jx", 3589 (uintmax_t)tpte)); 3590 3591 3592 PT_CLEAR_VA(pte, FALSE); 3593 3594 /* 3595 * Update the vm_page_t clean/reference bits. 3596 */ 3597 if (tpte & PG_M) 3598 vm_page_dirty(m); 3599 3600 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3601 if (TAILQ_EMPTY(&m->md.pv_list)) 3602 vm_page_flag_clear(m, PG_WRITEABLE); 3603 3604 pmap_unuse_pt(pmap, pv->pv_va, &free); 3605 3606 /* Mark free */ 3607 PV_STAT(pv_entry_frees++); 3608 PV_STAT(pv_entry_spare++); 3609 pv_entry_count--; 3610 pc->pc_map[field] |= bitmask; 3611 pmap->pm_stats.resident_count--; 3612 } 3613 } 3614 PT_UPDATES_FLUSH(); 3615 if (allfree) { 3616 PV_STAT(pv_entry_spare -= _NPCPV); 3617 PV_STAT(pc_chunk_count--); 3618 PV_STAT(pc_chunk_frees++); 3619 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3620 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 3621 pmap_qremove((vm_offset_t)pc, 1); 3622 vm_page_unwire(m, 0); 3623 vm_page_free(m); 3624 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 3625 } 3626 } 3627 PT_UPDATES_FLUSH(); 3628 if (*PMAP1) 3629 PT_SET_MA(PADDR1, 0); 3630 3631 sched_unpin(); 3632 pmap_invalidate_all(pmap); 3633 vm_page_unlock_queues(); 3634 PMAP_UNLOCK(pmap); 3635 pmap_free_zero_pages(free); 3636} 3637 3638/* 3639 * pmap_is_modified: 3640 * 3641 * Return whether or not the specified physical page was modified 3642 * in any physical maps. 3643 */ 3644boolean_t 3645pmap_is_modified(vm_page_t m) 3646{ 3647 pv_entry_t pv; 3648 pt_entry_t *pte; 3649 pmap_t pmap; 3650 boolean_t rv; 3651 3652 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3653 ("pmap_is_modified: page %p is not managed", m)); 3654 rv = FALSE; 3655 3656 /* 3657 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 3658 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 3659 * is clear, no PTEs can have PG_M set. 3660 */ 3661 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3662 if ((m->oflags & VPO_BUSY) == 0 && 3663 (m->flags & PG_WRITEABLE) == 0) 3664 return (rv); 3665 vm_page_lock_queues(); 3666 sched_pin(); 3667 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3668 pmap = PV_PMAP(pv); 3669 PMAP_LOCK(pmap); 3670 pte = pmap_pte_quick(pmap, pv->pv_va); 3671 rv = (*pte & PG_M) != 0; 3672 PMAP_UNLOCK(pmap); 3673 if (rv) 3674 break; 3675 } 3676 if (*PMAP1) 3677 PT_SET_MA(PADDR1, 0); 3678 sched_unpin(); 3679 vm_page_unlock_queues(); 3680 return (rv); 3681} 3682 3683/* 3684 * pmap_is_prefaultable: 3685 * 3686 * Return whether or not the specified virtual address is elgible 3687 * for prefault. 3688 */ 3689static boolean_t 3690pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr) 3691{ 3692 pt_entry_t *pte; 3693 boolean_t rv = FALSE; 3694 3695 return (rv); 3696 3697 if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) { 3698 pte = vtopte(addr); 3699 rv = (*pte == 0); 3700 } 3701 return (rv); 3702} 3703 3704boolean_t 3705pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3706{ 3707 boolean_t rv; 3708 3709 PMAP_LOCK(pmap); 3710 rv = pmap_is_prefaultable_locked(pmap, addr); 3711 PMAP_UNLOCK(pmap); 3712 return (rv); 3713} 3714 3715boolean_t 3716pmap_is_referenced(vm_page_t m) 3717{ 3718 pv_entry_t pv; 3719 pt_entry_t *pte; 3720 pmap_t pmap; 3721 boolean_t rv; 3722 3723 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3724 ("pmap_is_referenced: page %p is not managed", m)); 3725 rv = FALSE; 3726 vm_page_lock_queues(); 3727 sched_pin(); 3728 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3729 pmap = PV_PMAP(pv); 3730 PMAP_LOCK(pmap); 3731 pte = pmap_pte_quick(pmap, pv->pv_va); 3732 rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); 3733 PMAP_UNLOCK(pmap); 3734 if (rv) 3735 break; 3736 } 3737 if (*PMAP1) 3738 PT_SET_MA(PADDR1, 0); 3739 sched_unpin(); 3740 vm_page_unlock_queues(); 3741 return (rv); 3742} 3743 3744void 3745pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) 3746{ 3747 int i, npages = round_page(len) >> PAGE_SHIFT; 3748 for (i = 0; i < npages; i++) { 3749 pt_entry_t *pte; 3750 pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3751 pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); 3752 PMAP_MARK_PRIV(xpmap_mtop(*pte)); 3753 pmap_pte_release(pte); 3754 } 3755} 3756 3757void 3758pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) 3759{ 3760 int i, npages = round_page(len) >> PAGE_SHIFT; 3761 for (i = 0; i < npages; i++) { 3762 pt_entry_t *pte; 3763 pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3764 PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); 3765 pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); 3766 pmap_pte_release(pte); 3767 } 3768} 3769 3770/* 3771 * Clear the write and modified bits in each of the given page's mappings. 3772 */ 3773void 3774pmap_remove_write(vm_page_t m) 3775{ 3776 pv_entry_t pv; 3777 pmap_t pmap; 3778 pt_entry_t oldpte, *pte; 3779 3780 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3781 ("pmap_remove_write: page %p is not managed", m)); 3782 3783 /* 3784 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 3785 * another thread while the object is locked. Thus, if PG_WRITEABLE 3786 * is clear, no page table entries need updating. 3787 */ 3788 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3789 if ((m->oflags & VPO_BUSY) == 0 && 3790 (m->flags & PG_WRITEABLE) == 0) 3791 return; 3792 vm_page_lock_queues(); 3793 sched_pin(); 3794 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3795 pmap = PV_PMAP(pv); 3796 PMAP_LOCK(pmap); 3797 pte = pmap_pte_quick(pmap, pv->pv_va); 3798retry: 3799 oldpte = *pte; 3800 if ((oldpte & PG_RW) != 0) { 3801 vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M); 3802 3803 /* 3804 * Regardless of whether a pte is 32 or 64 bits 3805 * in size, PG_RW and PG_M are among the least 3806 * significant 32 bits. 3807 */ 3808 PT_SET_VA_MA(pte, newpte, TRUE); 3809 if (*pte != newpte) 3810 goto retry; 3811 3812 if ((oldpte & PG_M) != 0) 3813 vm_page_dirty(m); 3814 pmap_invalidate_page(pmap, pv->pv_va); 3815 } 3816 PMAP_UNLOCK(pmap); 3817 } 3818 vm_page_flag_clear(m, PG_WRITEABLE); 3819 PT_UPDATES_FLUSH(); 3820 if (*PMAP1) 3821 PT_SET_MA(PADDR1, 0); 3822 sched_unpin(); 3823 vm_page_unlock_queues(); 3824} 3825 3826/* 3827 * pmap_ts_referenced: 3828 * 3829 * Return a count of reference bits for a page, clearing those bits. 3830 * It is not necessary for every reference bit to be cleared, but it 3831 * is necessary that 0 only be returned when there are truly no 3832 * reference bits set. 3833 * 3834 * XXX: The exact number of bits to check and clear is a matter that 3835 * should be tested and standardized at some point in the future for 3836 * optimal aging of shared pages. 3837 */ 3838int 3839pmap_ts_referenced(vm_page_t m) 3840{ 3841 pv_entry_t pv, pvf, pvn; 3842 pmap_t pmap; 3843 pt_entry_t *pte; 3844 int rtval = 0; 3845 3846 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3847 ("pmap_ts_referenced: page %p is not managed", m)); 3848 vm_page_lock_queues(); 3849 sched_pin(); 3850 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3851 pvf = pv; 3852 do { 3853 pvn = TAILQ_NEXT(pv, pv_list); 3854 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3855 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3856 pmap = PV_PMAP(pv); 3857 PMAP_LOCK(pmap); 3858 pte = pmap_pte_quick(pmap, pv->pv_va); 3859 if ((*pte & PG_A) != 0) { 3860 PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3861 pmap_invalidate_page(pmap, pv->pv_va); 3862 rtval++; 3863 if (rtval > 4) 3864 pvn = NULL; 3865 } 3866 PMAP_UNLOCK(pmap); 3867 } while ((pv = pvn) != NULL && pv != pvf); 3868 } 3869 PT_UPDATES_FLUSH(); 3870 if (*PMAP1) 3871 PT_SET_MA(PADDR1, 0); 3872 3873 sched_unpin(); 3874 vm_page_unlock_queues(); 3875 return (rtval); 3876} 3877 3878/* 3879 * Clear the modify bits on the specified physical page. 3880 */ 3881void 3882pmap_clear_modify(vm_page_t m) 3883{ 3884 pv_entry_t pv; 3885 pmap_t pmap; 3886 pt_entry_t *pte; 3887 3888 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3889 ("pmap_clear_modify: page %p is not managed", m)); 3890 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3891 KASSERT((m->oflags & VPO_BUSY) == 0, 3892 ("pmap_clear_modify: page %p is busy", m)); 3893 3894 /* 3895 * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. 3896 * If the object containing the page is locked and the page is not 3897 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 3898 */ 3899 if ((m->flags & PG_WRITEABLE) == 0) 3900 return; 3901 vm_page_lock_queues(); 3902 sched_pin(); 3903 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3904 pmap = PV_PMAP(pv); 3905 PMAP_LOCK(pmap); 3906 pte = pmap_pte_quick(pmap, pv->pv_va); 3907 if ((*pte & PG_M) != 0) { 3908 /* 3909 * Regardless of whether a pte is 32 or 64 bits 3910 * in size, PG_M is among the least significant 3911 * 32 bits. 3912 */ 3913 PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE); 3914 pmap_invalidate_page(pmap, pv->pv_va); 3915 } 3916 PMAP_UNLOCK(pmap); 3917 } 3918 sched_unpin(); 3919 vm_page_unlock_queues(); 3920} 3921 3922/* 3923 * pmap_clear_reference: 3924 * 3925 * Clear the reference bit on the specified physical page. 3926 */ 3927void 3928pmap_clear_reference(vm_page_t m) 3929{ 3930 pv_entry_t pv; 3931 pmap_t pmap; 3932 pt_entry_t *pte; 3933 3934 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3935 ("pmap_clear_reference: page %p is not managed", m)); 3936 vm_page_lock_queues(); 3937 sched_pin(); 3938 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3939 pmap = PV_PMAP(pv); 3940 PMAP_LOCK(pmap); 3941 pte = pmap_pte_quick(pmap, pv->pv_va); 3942 if ((*pte & PG_A) != 0) { 3943 /* 3944 * Regardless of whether a pte is 32 or 64 bits 3945 * in size, PG_A is among the least significant 3946 * 32 bits. 3947 */ 3948 PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3949 pmap_invalidate_page(pmap, pv->pv_va); 3950 } 3951 PMAP_UNLOCK(pmap); 3952 } 3953 sched_unpin(); 3954 vm_page_unlock_queues(); 3955} 3956 3957/* 3958 * Miscellaneous support routines follow 3959 */ 3960 3961/* 3962 * Map a set of physical memory pages into the kernel virtual 3963 * address space. Return a pointer to where it is mapped. This 3964 * routine is intended to be used for mapping device memory, 3965 * NOT real memory. 3966 */ 3967void * 3968pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 3969{ 3970 vm_offset_t va, offset; 3971 vm_size_t tmpsize; 3972 3973 offset = pa & PAGE_MASK; 3974 size = roundup(offset + size, PAGE_SIZE); 3975 pa = pa & PG_FRAME; 3976 3977 if (pa < KERNLOAD && pa + size <= KERNLOAD) 3978 va = KERNBASE + pa; 3979 else 3980 va = kmem_alloc_nofault(kernel_map, size); 3981 if (!va) 3982 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3983 3984 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 3985 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 3986 pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 3987 pmap_invalidate_cache_range(va, va + size); 3988 return ((void *)(va + offset)); 3989} 3990 3991void * 3992pmap_mapdev(vm_paddr_t pa, vm_size_t size) 3993{ 3994 3995 return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 3996} 3997 3998void * 3999pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4000{ 4001 4002 return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4003} 4004 4005void 4006pmap_unmapdev(vm_offset_t va, vm_size_t size) 4007{ 4008 vm_offset_t base, offset, tmpva; 4009 4010 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4011 return; 4012 base = trunc_page(va); 4013 offset = va & PAGE_MASK; 4014 size = roundup(offset + size, PAGE_SIZE); 4015 critical_enter(); 4016 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 4017 pmap_kremove(tmpva); 4018 pmap_invalidate_range(kernel_pmap, va, tmpva); 4019 critical_exit(); 4020 kmem_free(kernel_map, base, size); 4021} 4022 4023/* 4024 * Sets the memory attribute for the specified page. 4025 */ 4026void 4027pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4028{ 4029 struct sysmaps *sysmaps; 4030 vm_offset_t sva, eva; 4031 4032 m->md.pat_mode = ma; 4033 if ((m->flags & PG_FICTITIOUS) != 0) 4034 return; 4035 4036 /* 4037 * If "m" is a normal page, flush it from the cache. 4038 * See pmap_invalidate_cache_range(). 4039 * 4040 * First, try to find an existing mapping of the page by sf 4041 * buffer. sf_buf_invalidate_cache() modifies mapping and 4042 * flushes the cache. 4043 */ 4044 if (sf_buf_invalidate_cache(m)) 4045 return; 4046 4047 /* 4048 * If page is not mapped by sf buffer, but CPU does not 4049 * support self snoop, map the page transient and do 4050 * invalidation. In the worst case, whole cache is flushed by 4051 * pmap_invalidate_cache_range(). 4052 */ 4053 if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { 4054 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4055 mtx_lock(&sysmaps->lock); 4056 if (*sysmaps->CMAP2) 4057 panic("pmap_page_set_memattr: CMAP2 busy"); 4058 sched_pin(); 4059 PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | 4060 xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M | 4061 pmap_cache_bits(m->md.pat_mode, 0)); 4062 invlcaddr(sysmaps->CADDR2); 4063 sva = (vm_offset_t)sysmaps->CADDR2; 4064 eva = sva + PAGE_SIZE; 4065 } else 4066 sva = eva = 0; /* gcc */ 4067 pmap_invalidate_cache_range(sva, eva); 4068 if (sva != 0) { 4069 PT_SET_MA(sysmaps->CADDR2, 0); 4070 sched_unpin(); 4071 mtx_unlock(&sysmaps->lock); 4072 } 4073} 4074 4075int 4076pmap_change_attr(va, size, mode) 4077 vm_offset_t va; 4078 vm_size_t size; 4079 int mode; 4080{ 4081 vm_offset_t base, offset, tmpva; 4082 pt_entry_t *pte; 4083 u_int opte, npte; 4084 pd_entry_t *pde; 4085 boolean_t changed; 4086 4087 base = trunc_page(va); 4088 offset = va & PAGE_MASK; 4089 size = roundup(offset + size, PAGE_SIZE); 4090 4091 /* Only supported on kernel virtual addresses. */ 4092 if (base <= VM_MAXUSER_ADDRESS) 4093 return (EINVAL); 4094 4095 /* 4MB pages and pages that aren't mapped aren't supported. */ 4096 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 4097 pde = pmap_pde(kernel_pmap, tmpva); 4098 if (*pde & PG_PS) 4099 return (EINVAL); 4100 if ((*pde & PG_V) == 0) 4101 return (EINVAL); 4102 pte = vtopte(va); 4103 if ((*pte & PG_V) == 0) 4104 return (EINVAL); 4105 } 4106 4107 changed = FALSE; 4108 4109 /* 4110 * Ok, all the pages exist and are 4k, so run through them updating 4111 * their cache mode. 4112 */ 4113 for (tmpva = base; size > 0; ) { 4114 pte = vtopte(tmpva); 4115 4116 /* 4117 * The cache mode bits are all in the low 32-bits of the 4118 * PTE, so we can just spin on updating the low 32-bits. 4119 */ 4120 do { 4121 opte = *(u_int *)pte; 4122 npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); 4123 npte |= pmap_cache_bits(mode, 0); 4124 PT_SET_VA_MA(pte, npte, TRUE); 4125 } while (npte != opte && (*pte != npte)); 4126 if (npte != opte) 4127 changed = TRUE; 4128 tmpva += PAGE_SIZE; 4129 size -= PAGE_SIZE; 4130 } 4131 4132 /* 4133 * Flush CPU caches to make sure any data isn't cached that shouldn't 4134 * be, etc. 4135 */ 4136 if (changed) { 4137 pmap_invalidate_range(kernel_pmap, base, tmpva); 4138 pmap_invalidate_cache_range(base, tmpva); 4139 } 4140 return (0); 4141} 4142 4143/* 4144 * perform the pmap work for mincore 4145 */ 4146int 4147pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4148{ 4149 pt_entry_t *ptep, pte; 4150 vm_paddr_t pa; 4151 int val; 4152 4153 PMAP_LOCK(pmap); 4154retry: 4155 ptep = pmap_pte(pmap, addr); 4156 pte = (ptep != NULL) ? PT_GET(ptep) : 0; 4157 pmap_pte_release(ptep); 4158 val = 0; 4159 if ((pte & PG_V) != 0) { 4160 val |= MINCORE_INCORE; 4161 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4162 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4163 if ((pte & PG_A) != 0) 4164 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4165 } 4166 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4167 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 4168 (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { 4169 pa = pte & PG_FRAME; 4170 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4171 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4172 goto retry; 4173 } else 4174 PA_UNLOCK_COND(*locked_pa); 4175 PMAP_UNLOCK(pmap); 4176 return (val); 4177} 4178 4179void 4180pmap_activate(struct thread *td) 4181{ 4182 pmap_t pmap, oldpmap; 4183 u_int32_t cr3; 4184 4185 critical_enter(); 4186 pmap = vmspace_pmap(td->td_proc->p_vmspace); 4187 oldpmap = PCPU_GET(curpmap); 4188#if defined(SMP) 4189 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 4190 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 4191#else 4192 oldpmap->pm_active &= ~1; 4193 pmap->pm_active |= 1; 4194#endif 4195#ifdef PAE 4196 cr3 = vtophys(pmap->pm_pdpt); 4197#else 4198 cr3 = vtophys(pmap->pm_pdir); 4199#endif 4200 /* 4201 * pmap_activate is for the current thread on the current cpu 4202 */ 4203 td->td_pcb->pcb_cr3 = cr3; 4204 PT_UPDATES_FLUSH(); 4205 load_cr3(cr3); 4206 PCPU_SET(curpmap, pmap); 4207 critical_exit(); 4208} 4209 4210void 4211pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4212{ 4213} 4214 4215/* 4216 * Increase the starting virtual address of the given mapping if a 4217 * different alignment might result in more superpage mappings. 4218 */ 4219void 4220pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4221 vm_offset_t *addr, vm_size_t size) 4222{ 4223 vm_offset_t superpage_offset; 4224 4225 if (size < NBPDR) 4226 return; 4227 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4228 offset += ptoa(object->pg_color); 4229 superpage_offset = offset & PDRMASK; 4230 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4231 (*addr & PDRMASK) == superpage_offset) 4232 return; 4233 if ((*addr & PDRMASK) < superpage_offset) 4234 *addr = (*addr & ~PDRMASK) + superpage_offset; 4235 else 4236 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4237} 4238 4239#ifdef XEN 4240 4241void 4242pmap_suspend() 4243{ 4244 pmap_t pmap; 4245 int i, pdir, offset; 4246 vm_paddr_t pdirma; 4247 mmu_update_t mu[4]; 4248 4249 /* 4250 * We need to remove the recursive mapping structure from all 4251 * our pmaps so that Xen doesn't get confused when it restores 4252 * the page tables. The recursive map lives at page directory 4253 * index PTDPTDI. We assume that the suspend code has stopped 4254 * the other vcpus (if any). 4255 */ 4256 LIST_FOREACH(pmap, &allpmaps, pm_list) { 4257 for (i = 0; i < 4; i++) { 4258 /* 4259 * Figure out which page directory (L2) page 4260 * contains this bit of the recursive map and 4261 * the offset within that page of the map 4262 * entry 4263 */ 4264 pdir = (PTDPTDI + i) / NPDEPG; 4265 offset = (PTDPTDI + i) % NPDEPG; 4266 pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4267 mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4268 mu[i].val = 0; 4269 } 4270 HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4271 } 4272} 4273 4274void 4275pmap_resume() 4276{ 4277 pmap_t pmap; 4278 int i, pdir, offset; 4279 vm_paddr_t pdirma; 4280 mmu_update_t mu[4]; 4281 4282 /* 4283 * Restore the recursive map that we removed on suspend. 4284 */ 4285 LIST_FOREACH(pmap, &allpmaps, pm_list) { 4286 for (i = 0; i < 4; i++) { 4287 /* 4288 * Figure out which page directory (L2) page 4289 * contains this bit of the recursive map and 4290 * the offset within that page of the map 4291 * entry 4292 */ 4293 pdir = (PTDPTDI + i) / NPDEPG; 4294 offset = (PTDPTDI + i) % NPDEPG; 4295 pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4296 mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4297 mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; 4298 } 4299 HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4300 } 4301} 4302 4303#endif 4304 4305#if defined(PMAP_DEBUG) 4306pmap_pid_dump(int pid) 4307{ 4308 pmap_t pmap; 4309 struct proc *p; 4310 int npte = 0; 4311 int index; 4312 4313 sx_slock(&allproc_lock); 4314 FOREACH_PROC_IN_SYSTEM(p) { 4315 if (p->p_pid != pid) 4316 continue; 4317 4318 if (p->p_vmspace) { 4319 int i,j; 4320 index = 0; 4321 pmap = vmspace_pmap(p->p_vmspace); 4322 for (i = 0; i < NPDEPTD; i++) { 4323 pd_entry_t *pde; 4324 pt_entry_t *pte; 4325 vm_offset_t base = i << PDRSHIFT; 4326 4327 pde = &pmap->pm_pdir[i]; 4328 if (pde && pmap_pde_v(pde)) { 4329 for (j = 0; j < NPTEPG; j++) { 4330 vm_offset_t va = base + (j << PAGE_SHIFT); 4331 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4332 if (index) { 4333 index = 0; 4334 printf("\n"); 4335 } 4336 sx_sunlock(&allproc_lock); 4337 return npte; 4338 } 4339 pte = pmap_pte(pmap, va); 4340 if (pte && pmap_pte_v(pte)) { 4341 pt_entry_t pa; 4342 vm_page_t m; 4343 pa = PT_GET(pte); 4344 m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4345 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4346 va, pa, m->hold_count, m->wire_count, m->flags); 4347 npte++; 4348 index++; 4349 if (index >= 2) { 4350 index = 0; 4351 printf("\n"); 4352 } else { 4353 printf(" "); 4354 } 4355 } 4356 } 4357 } 4358 } 4359 } 4360 } 4361 sx_sunlock(&allproc_lock); 4362 return npte; 4363} 4364#endif 4365 4366#if defined(DEBUG) 4367 4368static void pads(pmap_t pm); 4369void pmap_pvdump(vm_paddr_t pa); 4370 4371/* print address space of pmap*/ 4372static void 4373pads(pmap_t pm) 4374{ 4375 int i, j; 4376 vm_paddr_t va; 4377 pt_entry_t *ptep; 4378 4379 if (pm == kernel_pmap) 4380 return; 4381 for (i = 0; i < NPDEPTD; i++) 4382 if (pm->pm_pdir[i]) 4383 for (j = 0; j < NPTEPG; j++) { 4384 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4385 if (pm == kernel_pmap && va < KERNBASE) 4386 continue; 4387 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4388 continue; 4389 ptep = pmap_pte(pm, va); 4390 if (pmap_pte_v(ptep)) 4391 printf("%x:%x ", va, *ptep); 4392 }; 4393 4394} 4395 4396void 4397pmap_pvdump(vm_paddr_t pa) 4398{ 4399 pv_entry_t pv; 4400 pmap_t pmap; 4401 vm_page_t m; 4402 4403 printf("pa %x", pa); 4404 m = PHYS_TO_VM_PAGE(pa); 4405 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4406 pmap = PV_PMAP(pv); 4407 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 4408 pads(pmap); 4409 } 4410 printf(" "); 4411} 4412#endif 4413