pmap.c revision 201751
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 */ 45/*- 46 * Copyright (c) 2003 Networks Associates Technology, Inc. 47 * All rights reserved. 48 * 49 * This software was developed for the FreeBSD Project by Jake Burkholder, 50 * Safeport Network Services, and Network Associates Laboratories, the 51 * Security Research Division of Network Associates, Inc. under 52 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53 * CHATS research program. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 */ 76 77#include <sys/cdefs.h> 78__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 201751 2010-01-07 17:34:45Z alc $"); 79 80/* 81 * Manages physical address maps. 82 * 83 * In addition to hardware address maps, this 84 * module is called upon to provide software-use-only 85 * maps which may or may not be stored in the same 86 * form as hardware maps. These pseudo-maps are 87 * used to store intermediate results from copy 88 * operations to and from address spaces. 89 * 90 * Since the information managed by this module is 91 * also stored by the logical address mapping module, 92 * this module may throw away valid virtual-to-physical 93 * mappings at almost any time. However, invalidations 94 * of virtual-to-physical mappings must be done as 95 * requested. 96 * 97 * In order to cope with hardware architectures which 98 * make virtual-to-physical map invalidates expensive, 99 * this module may delay invalidate or reduced protection 100 * operations until such time as they are actually 101 * necessary. This module is given full information as 102 * to which processors are currently using which maps, 103 * and to when physical maps must be made correct. 104 */ 105 106#include "opt_cpu.h" 107#include "opt_pmap.h" 108#include "opt_msgbuf.h" 109#include "opt_smp.h" 110#include "opt_xbox.h" 111 112#include <sys/param.h> 113#include <sys/systm.h> 114#include <sys/kernel.h> 115#include <sys/ktr.h> 116#include <sys/lock.h> 117#include <sys/malloc.h> 118#include <sys/mman.h> 119#include <sys/msgbuf.h> 120#include <sys/mutex.h> 121#include <sys/proc.h> 122#include <sys/sf_buf.h> 123#include <sys/sx.h> 124#include <sys/vmmeter.h> 125#include <sys/sched.h> 126#include <sys/sysctl.h> 127#ifdef SMP 128#include <sys/smp.h> 129#endif 130 131#include <vm/vm.h> 132#include <vm/vm_param.h> 133#include <vm/vm_kern.h> 134#include <vm/vm_page.h> 135#include <vm/vm_map.h> 136#include <vm/vm_object.h> 137#include <vm/vm_extern.h> 138#include <vm/vm_pageout.h> 139#include <vm/vm_pager.h> 140#include <vm/vm_reserv.h> 141#include <vm/uma.h> 142 143#include <machine/cpu.h> 144#include <machine/cputypes.h> 145#include <machine/md_var.h> 146#include <machine/pcb.h> 147#include <machine/specialreg.h> 148#ifdef SMP 149#include <machine/smp.h> 150#endif 151 152#ifdef XBOX 153#include <machine/xbox.h> 154#endif 155 156#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 157#define CPU_ENABLE_SSE 158#endif 159 160#ifndef PMAP_SHPGPERPROC 161#define PMAP_SHPGPERPROC 200 162#endif 163 164#if !defined(DIAGNOSTIC) 165#define PMAP_INLINE __gnu89_inline 166#else 167#define PMAP_INLINE 168#endif 169 170#define PV_STATS 171#ifdef PV_STATS 172#define PV_STAT(x) do { x ; } while (0) 173#else 174#define PV_STAT(x) do { } while (0) 175#endif 176 177#define pa_index(pa) ((pa) >> PDRSHIFT) 178#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 179 180/* 181 * Get PDEs and PTEs for user/kernel address space 182 */ 183#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 184#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 185 186#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 187#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 188#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 189#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 190#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 191 192#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ 193 atomic_clear_int((u_int *)(pte), PG_W)) 194#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 195 196struct pmap kernel_pmap_store; 197LIST_HEAD(pmaplist, pmap); 198static struct pmaplist allpmaps; 199static struct mtx allpmaps_lock; 200 201vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 202vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 203int pgeflag = 0; /* PG_G or-in */ 204int pseflag = 0; /* PG_PS or-in */ 205 206static int nkpt; 207vm_offset_t kernel_vm_end; 208extern u_int32_t KERNend; 209 210#ifdef PAE 211pt_entry_t pg_nx; 212static uma_zone_t pdptzone; 213#endif 214 215static int pat_works = 0; /* Is page attribute table sane? */ 216 217SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 218 219static int pg_ps_enabled; 220SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, 221 "Are large page mappings enabled?"); 222 223/* 224 * Data for the pv entry allocation mechanism 225 */ 226static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 227static struct md_page *pv_table; 228static int shpgperproc = PMAP_SHPGPERPROC; 229 230struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 231int pv_maxchunks; /* How many chunks we have KVA for */ 232vm_offset_t pv_vafree; /* freelist stored in the PTE */ 233 234/* 235 * All those kernel PT submaps that BSD is so fond of 236 */ 237struct sysmaps { 238 struct mtx lock; 239 pt_entry_t *CMAP1; 240 pt_entry_t *CMAP2; 241 caddr_t CADDR1; 242 caddr_t CADDR2; 243}; 244static struct sysmaps sysmaps_pcpu[MAXCPU]; 245pt_entry_t *CMAP1 = 0; 246static pt_entry_t *CMAP3; 247caddr_t CADDR1 = 0, ptvmmap = 0; 248static caddr_t CADDR3; 249struct msgbuf *msgbufp = 0; 250 251/* 252 * Crashdump maps. 253 */ 254static caddr_t crashdumpmap; 255 256static pt_entry_t *PMAP1 = 0, *PMAP2; 257static pt_entry_t *PADDR1 = 0, *PADDR2; 258#ifdef SMP 259static int PMAP1cpu; 260static int PMAP1changedcpu; 261SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 262 &PMAP1changedcpu, 0, 263 "Number of times pmap_pte_quick changed CPU with same PMAP1"); 264#endif 265static int PMAP1changed; 266SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 267 &PMAP1changed, 0, 268 "Number of times pmap_pte_quick changed PMAP1"); 269static int PMAP1unchanged; 270SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 271 &PMAP1unchanged, 0, 272 "Number of times pmap_pte_quick didn't change PMAP1"); 273static struct mtx PMAP2mutex; 274 275static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 276static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); 277static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 278static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 279static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 280static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 281static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 282 vm_offset_t va); 283static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); 284 285static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 286static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, 287 vm_prot_t prot); 288static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 289 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 290static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); 291static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); 292static boolean_t pmap_is_modified_pvh(struct md_page *pvh); 293static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 294static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); 295static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); 296static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 297static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, 298 vm_prot_t prot); 299static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); 300static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 301 vm_page_t *free); 302static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 303 vm_page_t *free); 304static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); 305static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 306 vm_page_t *free); 307static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 308 vm_offset_t va); 309static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 310static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 311 vm_page_t m); 312 313static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 314 315static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 316static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); 317static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 318static void pmap_pte_release(pt_entry_t *pte); 319static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 320static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 321#ifdef PAE 322static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 323#endif 324static void pmap_set_pg(void); 325 326CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 327CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 328 329/* 330 * If you get an error here, then you set KVA_PAGES wrong! See the 331 * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 332 * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 333 */ 334CTASSERT(KERNBASE % (1 << 24) == 0); 335 336/* 337 * Move the kernel virtual free pointer to the next 338 * 4MB. This is used to help improve performance 339 * by using a large (4MB) page for much of the kernel 340 * (.text, .data, .bss) 341 */ 342static vm_offset_t 343pmap_kmem_choose(vm_offset_t addr) 344{ 345 vm_offset_t newaddr = addr; 346 347#ifndef DISABLE_PSE 348 if (cpu_feature & CPUID_PSE) 349 newaddr = (addr + PDRMASK) & ~PDRMASK; 350#endif 351 return newaddr; 352} 353 354/* 355 * Bootstrap the system enough to run with virtual memory. 356 * 357 * On the i386 this is called after mapping has already been enabled 358 * and just syncs the pmap module with what has already been done. 359 * [We can't call it easily with mapping off since the kernel is not 360 * mapped with PA == VA, hence we would have to relocate every address 361 * from the linked base (virtual) address "KERNBASE" to the actual 362 * (physical) address starting relative to 0] 363 */ 364void 365pmap_bootstrap(vm_paddr_t firstaddr) 366{ 367 vm_offset_t va; 368 pt_entry_t *pte, *unused; 369 struct sysmaps *sysmaps; 370 int i; 371 372 /* 373 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 374 * large. It should instead be correctly calculated in locore.s and 375 * not based on 'first' (which is a physical address, not a virtual 376 * address, for the start of unused physical memory). The kernel 377 * page tables are NOT double mapped and thus should not be included 378 * in this calculation. 379 */ 380 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 381 virtual_avail = pmap_kmem_choose(virtual_avail); 382 383 virtual_end = VM_MAX_KERNEL_ADDRESS; 384 385 /* 386 * Initialize the kernel pmap (which is statically allocated). 387 */ 388 PMAP_LOCK_INIT(kernel_pmap); 389 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 390#ifdef PAE 391 kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 392#endif 393 kernel_pmap->pm_root = NULL; 394 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 395 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 396 LIST_INIT(&allpmaps); 397 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 398 mtx_lock_spin(&allpmaps_lock); 399 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 400 mtx_unlock_spin(&allpmaps_lock); 401 nkpt = NKPT; 402 403 /* 404 * Reserve some special page table entries/VA space for temporary 405 * mapping of pages. 406 */ 407#define SYSMAP(c, p, v, n) \ 408 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 409 410 va = virtual_avail; 411 pte = vtopte(va); 412 413 /* 414 * CMAP1/CMAP2 are used for zeroing and copying pages. 415 * CMAP3 is used for the idle process page zeroing. 416 */ 417 for (i = 0; i < MAXCPU; i++) { 418 sysmaps = &sysmaps_pcpu[i]; 419 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 420 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 421 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 422 } 423 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 424 SYSMAP(caddr_t, CMAP3, CADDR3, 1) 425 *CMAP3 = 0; 426 427 /* 428 * Crashdump maps. 429 */ 430 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 431 432 /* 433 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 434 */ 435 SYSMAP(caddr_t, unused, ptvmmap, 1) 436 437 /* 438 * msgbufp is used to map the system message buffer. 439 */ 440 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 441 442 /* 443 * ptemap is used for pmap_pte_quick 444 */ 445 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 446 SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); 447 448 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 449 450 virtual_avail = va; 451 452 *CMAP1 = 0; 453 454 /* 455 * Leave in place an identity mapping (virt == phys) for the low 1 MB 456 * physical memory region that is used by the ACPI wakeup code. This 457 * mapping must not have PG_G set. 458 */ 459#ifdef XBOX 460 /* FIXME: This is gross, but needed for the XBOX. Since we are in such 461 * an early stadium, we cannot yet neatly map video memory ... :-( 462 * Better fixes are very welcome! */ 463 if (!arch_i386_is_xbox) 464#endif 465 for (i = 1; i < NKPT; i++) 466 PTD[i] = 0; 467 468 /* Initialize the PAT MSR if present. */ 469 pmap_init_pat(); 470 471 /* Turn on PG_G on kernel page(s) */ 472 pmap_set_pg(); 473} 474 475/* 476 * Setup the PAT MSR. 477 */ 478void 479pmap_init_pat(void) 480{ 481 uint64_t pat_msr; 482 char *sysenv; 483 static int pat_tested = 0; 484 485 /* Bail if this CPU doesn't implement PAT. */ 486 if (!(cpu_feature & CPUID_PAT)) 487 return; 488 489 /* 490 * Due to some Intel errata, we can only safely use the lower 4 491 * PAT entries. 492 * 493 * Intel Pentium III Processor Specification Update 494 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 495 * or Mode C Paging) 496 * 497 * Intel Pentium IV Processor Specification Update 498 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 499 * 500 * Some Apple Macs based on nVidia chipsets cannot enter ACPI mode 501 * via SMI# when we use upper 4 PAT entries for unknown reason. 502 */ 503 if (!pat_tested) { 504 if (cpu_vendor_id != CPU_VENDOR_INTEL || 505 (CPUID_TO_FAMILY(cpu_id) == 6 && 506 CPUID_TO_MODEL(cpu_id) >= 0xe)) { 507 pat_works = 1; 508 sysenv = getenv("smbios.system.product"); 509 if (sysenv != NULL) { 510 if (strncmp(sysenv, "MacBook5,1", 10) == 0 || 511 strncmp(sysenv, "MacBookPro5,5", 13) == 0 || 512 strncmp(sysenv, "Macmini3,1", 10) == 0) 513 pat_works = 0; 514 freeenv(sysenv); 515 } 516 } 517 pat_tested = 1; 518 } 519 520 /* Initialize default PAT entries. */ 521 pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | 522 PAT_VALUE(1, PAT_WRITE_THROUGH) | 523 PAT_VALUE(2, PAT_UNCACHED) | 524 PAT_VALUE(3, PAT_UNCACHEABLE) | 525 PAT_VALUE(4, PAT_WRITE_BACK) | 526 PAT_VALUE(5, PAT_WRITE_THROUGH) | 527 PAT_VALUE(6, PAT_UNCACHED) | 528 PAT_VALUE(7, PAT_UNCACHEABLE); 529 530 if (pat_works) { 531 /* 532 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. 533 * Program 4 and 5 as WP and WC. 534 * Leave 6 and 7 as UC- and UC. 535 */ 536 pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 537 pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 538 PAT_VALUE(5, PAT_WRITE_COMBINING); 539 } else { 540 /* 541 * Just replace PAT Index 2 with WC instead of UC-. 542 */ 543 pat_msr &= ~PAT_MASK(2); 544 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 545 } 546 wrmsr(MSR_PAT, pat_msr); 547} 548 549/* 550 * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 551 */ 552static void 553pmap_set_pg(void) 554{ 555 pd_entry_t pdir; 556 pt_entry_t *pte; 557 vm_offset_t va, endva; 558 int i; 559 560 if (pgeflag == 0) 561 return; 562 563 i = KERNLOAD/NBPDR; 564 endva = KERNBASE + KERNend; 565 566 if (pseflag) { 567 va = KERNBASE + KERNLOAD; 568 while (va < endva) { 569 pdir = kernel_pmap->pm_pdir[KPTDI+i]; 570 pdir |= pgeflag; 571 kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; 572 invltlb(); /* Play it safe, invltlb() every time */ 573 i++; 574 va += NBPDR; 575 } 576 } else { 577 va = (vm_offset_t)btext; 578 while (va < endva) { 579 pte = vtopte(va); 580 if (*pte) 581 *pte |= pgeflag; 582 invltlb(); /* Play it safe, invltlb() every time */ 583 va += PAGE_SIZE; 584 } 585 } 586} 587 588/* 589 * Initialize a vm_page's machine-dependent fields. 590 */ 591void 592pmap_page_init(vm_page_t m) 593{ 594 595 TAILQ_INIT(&m->md.pv_list); 596 m->md.pat_mode = PAT_WRITE_BACK; 597} 598 599#ifdef PAE 600static void * 601pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 602{ 603 604 /* Inform UMA that this allocator uses kernel_map/object. */ 605 *flags = UMA_SLAB_KERNEL; 606 return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 607 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); 608} 609#endif 610 611/* 612 * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 613 * Requirements: 614 * - Must deal with pages in order to ensure that none of the PG_* bits 615 * are ever set, PG_V in particular. 616 * - Assumes we can write to ptes without pte_store() atomic ops, even 617 * on PAE systems. This should be ok. 618 * - Assumes nothing will ever test these addresses for 0 to indicate 619 * no mapping instead of correctly checking PG_V. 620 * - Assumes a vm_offset_t will fit in a pte (true for i386). 621 * Because PG_V is never set, there can be no mappings to invalidate. 622 */ 623static vm_offset_t 624pmap_ptelist_alloc(vm_offset_t *head) 625{ 626 pt_entry_t *pte; 627 vm_offset_t va; 628 629 va = *head; 630 if (va == 0) 631 return (va); /* Out of memory */ 632 pte = vtopte(va); 633 *head = *pte; 634 if (*head & PG_V) 635 panic("pmap_ptelist_alloc: va with PG_V set!"); 636 *pte = 0; 637 return (va); 638} 639 640static void 641pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 642{ 643 pt_entry_t *pte; 644 645 if (va & PG_V) 646 panic("pmap_ptelist_free: freeing va with PG_V set!"); 647 pte = vtopte(va); 648 *pte = *head; /* virtual! PG_V is 0 though */ 649 *head = va; 650} 651 652static void 653pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 654{ 655 int i; 656 vm_offset_t va; 657 658 *head = 0; 659 for (i = npages - 1; i >= 0; i--) { 660 va = (vm_offset_t)base + i * PAGE_SIZE; 661 pmap_ptelist_free(head, va); 662 } 663} 664 665 666/* 667 * Initialize the pmap module. 668 * Called by vm_init, to initialize any structures that the pmap 669 * system needs to map virtual memory. 670 */ 671void 672pmap_init(void) 673{ 674 vm_page_t mpte; 675 vm_size_t s; 676 int i, pv_npg; 677 678 /* 679 * Initialize the vm page array entries for the kernel pmap's 680 * page table pages. 681 */ 682 for (i = 0; i < nkpt; i++) { 683 mpte = PHYS_TO_VM_PAGE(PTD[i + KPTDI] & PG_FRAME); 684 KASSERT(mpte >= vm_page_array && 685 mpte < &vm_page_array[vm_page_array_size], 686 ("pmap_init: page table page is out of range")); 687 mpte->pindex = i + KPTDI; 688 mpte->phys_addr = PTD[i + KPTDI] & PG_FRAME; 689 } 690 691 /* 692 * Initialize the address space (zone) for the pv entries. Set a 693 * high water mark so that the system can recover from excessive 694 * numbers of pv entries. 695 */ 696 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 697 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 698 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 699 pv_entry_max = roundup(pv_entry_max, _NPCPV); 700 pv_entry_high_water = 9 * (pv_entry_max / 10); 701 702 /* 703 * Are large page mappings enabled? 704 */ 705 TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); 706 if (pg_ps_enabled) { 707 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 708 ("pmap_init: can't assign to pagesizes[1]")); 709 pagesizes[1] = NBPDR; 710 } 711 712 /* 713 * Calculate the size of the pv head table for superpages. 714 */ 715 for (i = 0; phys_avail[i + 1]; i += 2); 716 pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; 717 718 /* 719 * Allocate memory for the pv head table for superpages. 720 */ 721 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 722 s = round_page(s); 723 pv_table = (struct md_page *)kmem_alloc(kernel_map, s); 724 for (i = 0; i < pv_npg; i++) 725 TAILQ_INIT(&pv_table[i].pv_list); 726 727 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 728 pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 729 PAGE_SIZE * pv_maxchunks); 730 if (pv_chunkbase == NULL) 731 panic("pmap_init: not enough kvm for pv chunks"); 732 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 733#ifdef PAE 734 pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 735 NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 736 UMA_ZONE_VM | UMA_ZONE_NOFREE); 737 uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 738#endif 739} 740 741 742SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 743 "Max number of PV entries"); 744SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 745 "Page share factor per proc"); 746 747SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 748 "2/4MB page mapping counters"); 749 750static u_long pmap_pde_demotions; 751SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, 752 &pmap_pde_demotions, 0, "2/4MB page demotions"); 753 754static u_long pmap_pde_mappings; 755SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 756 &pmap_pde_mappings, 0, "2/4MB page mappings"); 757 758static u_long pmap_pde_p_failures; 759SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, 760 &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); 761 762static u_long pmap_pde_promotions; 763SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, 764 &pmap_pde_promotions, 0, "2/4MB page promotions"); 765 766/*************************************************** 767 * Low level helper routines..... 768 ***************************************************/ 769 770/* 771 * Determine the appropriate bits to set in a PTE or PDE for a specified 772 * caching mode. 773 */ 774int 775pmap_cache_bits(int mode, boolean_t is_pde) 776{ 777 int pat_flag, pat_index, cache_bits; 778 779 /* The PAT bit is different for PTE's and PDE's. */ 780 pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 781 782 /* If we don't support PAT, map extended modes to older ones. */ 783 if (!(cpu_feature & CPUID_PAT)) { 784 switch (mode) { 785 case PAT_UNCACHEABLE: 786 case PAT_WRITE_THROUGH: 787 case PAT_WRITE_BACK: 788 break; 789 case PAT_UNCACHED: 790 case PAT_WRITE_COMBINING: 791 case PAT_WRITE_PROTECTED: 792 mode = PAT_UNCACHEABLE; 793 break; 794 } 795 } 796 797 /* Map the caching mode to a PAT index. */ 798 if (pat_works) { 799 switch (mode) { 800 case PAT_UNCACHEABLE: 801 pat_index = 3; 802 break; 803 case PAT_WRITE_THROUGH: 804 pat_index = 1; 805 break; 806 case PAT_WRITE_BACK: 807 pat_index = 0; 808 break; 809 case PAT_UNCACHED: 810 pat_index = 2; 811 break; 812 case PAT_WRITE_COMBINING: 813 pat_index = 5; 814 break; 815 case PAT_WRITE_PROTECTED: 816 pat_index = 4; 817 break; 818 default: 819 panic("Unknown caching mode %d\n", mode); 820 } 821 } else { 822 switch (mode) { 823 case PAT_UNCACHED: 824 case PAT_UNCACHEABLE: 825 case PAT_WRITE_PROTECTED: 826 pat_index = 3; 827 break; 828 case PAT_WRITE_THROUGH: 829 pat_index = 1; 830 break; 831 case PAT_WRITE_BACK: 832 pat_index = 0; 833 break; 834 case PAT_WRITE_COMBINING: 835 pat_index = 2; 836 break; 837 default: 838 panic("Unknown caching mode %d\n", mode); 839 } 840 } 841 842 /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 843 cache_bits = 0; 844 if (pat_index & 0x4) 845 cache_bits |= pat_flag; 846 if (pat_index & 0x2) 847 cache_bits |= PG_NC_PCD; 848 if (pat_index & 0x1) 849 cache_bits |= PG_NC_PWT; 850 return (cache_bits); 851} 852#ifdef SMP 853/* 854 * For SMP, these functions have to use the IPI mechanism for coherence. 855 * 856 * N.B.: Before calling any of the following TLB invalidation functions, 857 * the calling processor must ensure that all stores updating a non- 858 * kernel page table are globally performed. Otherwise, another 859 * processor could cache an old, pre-update entry without being 860 * invalidated. This can happen one of two ways: (1) The pmap becomes 861 * active on another processor after its pm_active field is checked by 862 * one of the following functions but before a store updating the page 863 * table is globally performed. (2) The pmap becomes active on another 864 * processor before its pm_active field is checked but due to 865 * speculative loads one of the following functions stills reads the 866 * pmap as inactive on the other processor. 867 * 868 * The kernel page table is exempt because its pm_active field is 869 * immutable. The kernel page table is always active on every 870 * processor. 871 */ 872void 873pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 874{ 875 u_int cpumask; 876 u_int other_cpus; 877 878 sched_pin(); 879 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 880 invlpg(va); 881 smp_invlpg(va); 882 } else { 883 cpumask = PCPU_GET(cpumask); 884 other_cpus = PCPU_GET(other_cpus); 885 if (pmap->pm_active & cpumask) 886 invlpg(va); 887 if (pmap->pm_active & other_cpus) 888 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 889 } 890 sched_unpin(); 891} 892 893void 894pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 895{ 896 u_int cpumask; 897 u_int other_cpus; 898 vm_offset_t addr; 899 900 sched_pin(); 901 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 902 for (addr = sva; addr < eva; addr += PAGE_SIZE) 903 invlpg(addr); 904 smp_invlpg_range(sva, eva); 905 } else { 906 cpumask = PCPU_GET(cpumask); 907 other_cpus = PCPU_GET(other_cpus); 908 if (pmap->pm_active & cpumask) 909 for (addr = sva; addr < eva; addr += PAGE_SIZE) 910 invlpg(addr); 911 if (pmap->pm_active & other_cpus) 912 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 913 sva, eva); 914 } 915 sched_unpin(); 916} 917 918void 919pmap_invalidate_all(pmap_t pmap) 920{ 921 u_int cpumask; 922 u_int other_cpus; 923 924 sched_pin(); 925 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 926 invltlb(); 927 smp_invltlb(); 928 } else { 929 cpumask = PCPU_GET(cpumask); 930 other_cpus = PCPU_GET(other_cpus); 931 if (pmap->pm_active & cpumask) 932 invltlb(); 933 if (pmap->pm_active & other_cpus) 934 smp_masked_invltlb(pmap->pm_active & other_cpus); 935 } 936 sched_unpin(); 937} 938 939void 940pmap_invalidate_cache(void) 941{ 942 943 sched_pin(); 944 wbinvd(); 945 smp_cache_flush(); 946 sched_unpin(); 947} 948#else /* !SMP */ 949/* 950 * Normal, non-SMP, 486+ invalidation functions. 951 * We inline these within pmap.c for speed. 952 */ 953PMAP_INLINE void 954pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 955{ 956 957 if (pmap == kernel_pmap || pmap->pm_active) 958 invlpg(va); 959} 960 961PMAP_INLINE void 962pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 963{ 964 vm_offset_t addr; 965 966 if (pmap == kernel_pmap || pmap->pm_active) 967 for (addr = sva; addr < eva; addr += PAGE_SIZE) 968 invlpg(addr); 969} 970 971PMAP_INLINE void 972pmap_invalidate_all(pmap_t pmap) 973{ 974 975 if (pmap == kernel_pmap || pmap->pm_active) 976 invltlb(); 977} 978 979PMAP_INLINE void 980pmap_invalidate_cache(void) 981{ 982 983 wbinvd(); 984} 985#endif /* !SMP */ 986 987void 988pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 989{ 990 991 KASSERT((sva & PAGE_MASK) == 0, 992 ("pmap_invalidate_cache_range: sva not page-aligned")); 993 KASSERT((eva & PAGE_MASK) == 0, 994 ("pmap_invalidate_cache_range: eva not page-aligned")); 995 996 if (cpu_feature & CPUID_SS) 997 ; /* If "Self Snoop" is supported, do nothing. */ 998 else if (cpu_feature & CPUID_CLFSH) { 999 1000 /* 1001 * Otherwise, do per-cache line flush. Use the mfence 1002 * instruction to insure that previous stores are 1003 * included in the write-back. The processor 1004 * propagates flush to other processors in the cache 1005 * coherence domain. 1006 */ 1007 mfence(); 1008 for (; sva < eva; sva += cpu_clflush_line_size) 1009 clflush(sva); 1010 mfence(); 1011 } else { 1012 1013 /* 1014 * No targeted cache flush methods are supported by CPU, 1015 * globally invalidate cache as a last resort. 1016 */ 1017 pmap_invalidate_cache(); 1018 } 1019} 1020 1021/* 1022 * Are we current address space or kernel? N.B. We return FALSE when 1023 * a pmap's page table is in use because a kernel thread is borrowing 1024 * it. The borrowed page table can change spontaneously, making any 1025 * dependence on its continued use subject to a race condition. 1026 */ 1027static __inline int 1028pmap_is_current(pmap_t pmap) 1029{ 1030 1031 return (pmap == kernel_pmap || 1032 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 1033 (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 1034} 1035 1036/* 1037 * If the given pmap is not the current or kernel pmap, the returned pte must 1038 * be released by passing it to pmap_pte_release(). 1039 */ 1040pt_entry_t * 1041pmap_pte(pmap_t pmap, vm_offset_t va) 1042{ 1043 pd_entry_t newpf; 1044 pd_entry_t *pde; 1045 1046 pde = pmap_pde(pmap, va); 1047 if (*pde & PG_PS) 1048 return (pde); 1049 if (*pde != 0) { 1050 /* are we current address space or kernel? */ 1051 if (pmap_is_current(pmap)) 1052 return (vtopte(va)); 1053 mtx_lock(&PMAP2mutex); 1054 newpf = *pde & PG_FRAME; 1055 if ((*PMAP2 & PG_FRAME) != newpf) { 1056 *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; 1057 pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 1058 } 1059 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 1060 } 1061 return (0); 1062} 1063 1064/* 1065 * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 1066 * being NULL. 1067 */ 1068static __inline void 1069pmap_pte_release(pt_entry_t *pte) 1070{ 1071 1072 if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) 1073 mtx_unlock(&PMAP2mutex); 1074} 1075 1076static __inline void 1077invlcaddr(void *caddr) 1078{ 1079 1080 invlpg((u_int)caddr); 1081} 1082 1083/* 1084 * Super fast pmap_pte routine best used when scanning 1085 * the pv lists. This eliminates many coarse-grained 1086 * invltlb calls. Note that many of the pv list 1087 * scans are across different pmaps. It is very wasteful 1088 * to do an entire invltlb for checking a single mapping. 1089 * 1090 * If the given pmap is not the current pmap, vm_page_queue_mtx 1091 * must be held and curthread pinned to a CPU. 1092 */ 1093static pt_entry_t * 1094pmap_pte_quick(pmap_t pmap, vm_offset_t va) 1095{ 1096 pd_entry_t newpf; 1097 pd_entry_t *pde; 1098 1099 pde = pmap_pde(pmap, va); 1100 if (*pde & PG_PS) 1101 return (pde); 1102 if (*pde != 0) { 1103 /* are we current address space or kernel? */ 1104 if (pmap_is_current(pmap)) 1105 return (vtopte(va)); 1106 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1107 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1108 newpf = *pde & PG_FRAME; 1109 if ((*PMAP1 & PG_FRAME) != newpf) { 1110 *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; 1111#ifdef SMP 1112 PMAP1cpu = PCPU_GET(cpuid); 1113#endif 1114 invlcaddr(PADDR1); 1115 PMAP1changed++; 1116 } else 1117#ifdef SMP 1118 if (PMAP1cpu != PCPU_GET(cpuid)) { 1119 PMAP1cpu = PCPU_GET(cpuid); 1120 invlcaddr(PADDR1); 1121 PMAP1changedcpu++; 1122 } else 1123#endif 1124 PMAP1unchanged++; 1125 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1126 } 1127 return (0); 1128} 1129 1130/* 1131 * Routine: pmap_extract 1132 * Function: 1133 * Extract the physical page address associated 1134 * with the given map/virtual_address pair. 1135 */ 1136vm_paddr_t 1137pmap_extract(pmap_t pmap, vm_offset_t va) 1138{ 1139 vm_paddr_t rtval; 1140 pt_entry_t *pte; 1141 pd_entry_t pde; 1142 1143 rtval = 0; 1144 PMAP_LOCK(pmap); 1145 pde = pmap->pm_pdir[va >> PDRSHIFT]; 1146 if (pde != 0) { 1147 if ((pde & PG_PS) != 0) 1148 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 1149 else { 1150 pte = pmap_pte(pmap, va); 1151 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1152 pmap_pte_release(pte); 1153 } 1154 } 1155 PMAP_UNLOCK(pmap); 1156 return (rtval); 1157} 1158 1159/* 1160 * Routine: pmap_extract_and_hold 1161 * Function: 1162 * Atomically extract and hold the physical page 1163 * with the given pmap and virtual address pair 1164 * if that mapping permits the given protection. 1165 */ 1166vm_page_t 1167pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1168{ 1169 pd_entry_t pde; 1170 pt_entry_t pte; 1171 vm_page_t m; 1172 1173 m = NULL; 1174 vm_page_lock_queues(); 1175 PMAP_LOCK(pmap); 1176 pde = *pmap_pde(pmap, va); 1177 if (pde != 0) { 1178 if (pde & PG_PS) { 1179 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1180 m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1181 (va & PDRMASK)); 1182 vm_page_hold(m); 1183 } 1184 } else { 1185 sched_pin(); 1186 pte = *pmap_pte_quick(pmap, va); 1187 if (pte != 0 && 1188 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1189 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1190 vm_page_hold(m); 1191 } 1192 sched_unpin(); 1193 } 1194 } 1195 vm_page_unlock_queues(); 1196 PMAP_UNLOCK(pmap); 1197 return (m); 1198} 1199 1200/*************************************************** 1201 * Low level mapping routines..... 1202 ***************************************************/ 1203 1204/* 1205 * Add a wired page to the kva. 1206 * Note: not SMP coherent. 1207 */ 1208PMAP_INLINE void 1209pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1210{ 1211 pt_entry_t *pte; 1212 1213 pte = vtopte(va); 1214 pte_store(pte, pa | PG_RW | PG_V | pgeflag); 1215} 1216 1217static __inline void 1218pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1219{ 1220 pt_entry_t *pte; 1221 1222 pte = vtopte(va); 1223 pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1224} 1225 1226/* 1227 * Remove a page from the kernel pagetables. 1228 * Note: not SMP coherent. 1229 */ 1230PMAP_INLINE void 1231pmap_kremove(vm_offset_t va) 1232{ 1233 pt_entry_t *pte; 1234 1235 pte = vtopte(va); 1236 pte_clear(pte); 1237} 1238 1239/* 1240 * Used to map a range of physical addresses into kernel 1241 * virtual address space. 1242 * 1243 * The value passed in '*virt' is a suggested virtual address for 1244 * the mapping. Architectures which can support a direct-mapped 1245 * physical to virtual region can return the appropriate address 1246 * within that region, leaving '*virt' unchanged. Other 1247 * architectures should map the pages starting at '*virt' and 1248 * update '*virt' with the first usable address after the mapped 1249 * region. 1250 */ 1251vm_offset_t 1252pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1253{ 1254 vm_offset_t va, sva; 1255 1256 va = sva = *virt; 1257 while (start < end) { 1258 pmap_kenter(va, start); 1259 va += PAGE_SIZE; 1260 start += PAGE_SIZE; 1261 } 1262 pmap_invalidate_range(kernel_pmap, sva, va); 1263 *virt = va; 1264 return (sva); 1265} 1266 1267 1268/* 1269 * Add a list of wired pages to the kva 1270 * this routine is only used for temporary 1271 * kernel mappings that do not need to have 1272 * page modification or references recorded. 1273 * Note that old mappings are simply written 1274 * over. The page *must* be wired. 1275 * Note: SMP coherent. Uses a ranged shootdown IPI. 1276 */ 1277void 1278pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1279{ 1280 pt_entry_t *endpte, oldpte, *pte; 1281 1282 oldpte = 0; 1283 pte = vtopte(sva); 1284 endpte = pte + count; 1285 while (pte < endpte) { 1286 oldpte |= *pte; 1287 pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | 1288 pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V); 1289 pte++; 1290 ma++; 1291 } 1292 if ((oldpte & PG_V) != 0) 1293 pmap_invalidate_range(kernel_pmap, sva, sva + count * 1294 PAGE_SIZE); 1295} 1296 1297/* 1298 * This routine tears out page mappings from the 1299 * kernel -- it is meant only for temporary mappings. 1300 * Note: SMP coherent. Uses a ranged shootdown IPI. 1301 */ 1302void 1303pmap_qremove(vm_offset_t sva, int count) 1304{ 1305 vm_offset_t va; 1306 1307 va = sva; 1308 while (count-- > 0) { 1309 pmap_kremove(va); 1310 va += PAGE_SIZE; 1311 } 1312 pmap_invalidate_range(kernel_pmap, sva, va); 1313} 1314 1315/*************************************************** 1316 * Page table page management routines..... 1317 ***************************************************/ 1318static __inline void 1319pmap_free_zero_pages(vm_page_t free) 1320{ 1321 vm_page_t m; 1322 1323 while (free != NULL) { 1324 m = free; 1325 free = m->right; 1326 /* Preserve the page's PG_ZERO setting. */ 1327 vm_page_free_toq(m); 1328 } 1329} 1330 1331/* 1332 * Schedule the specified unused page table page to be freed. Specifically, 1333 * add the page to the specified list of pages that will be released to the 1334 * physical memory manager after the TLB has been updated. 1335 */ 1336static __inline void 1337pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO) 1338{ 1339 1340 if (set_PG_ZERO) 1341 m->flags |= PG_ZERO; 1342 else 1343 m->flags &= ~PG_ZERO; 1344 m->right = *free; 1345 *free = m; 1346} 1347 1348/* 1349 * Inserts the specified page table page into the specified pmap's collection 1350 * of idle page table pages. Each of a pmap's page table pages is responsible 1351 * for mapping a distinct range of virtual addresses. The pmap's collection is 1352 * ordered by this virtual address range. 1353 */ 1354static void 1355pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 1356{ 1357 vm_page_t root; 1358 1359 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1360 root = pmap->pm_root; 1361 if (root == NULL) { 1362 mpte->left = NULL; 1363 mpte->right = NULL; 1364 } else { 1365 root = vm_page_splay(mpte->pindex, root); 1366 if (mpte->pindex < root->pindex) { 1367 mpte->left = root->left; 1368 mpte->right = root; 1369 root->left = NULL; 1370 } else if (mpte->pindex == root->pindex) 1371 panic("pmap_insert_pt_page: pindex already inserted"); 1372 else { 1373 mpte->right = root->right; 1374 mpte->left = root; 1375 root->right = NULL; 1376 } 1377 } 1378 pmap->pm_root = mpte; 1379} 1380 1381/* 1382 * Looks for a page table page mapping the specified virtual address in the 1383 * specified pmap's collection of idle page table pages. Returns NULL if there 1384 * is no page table page corresponding to the specified virtual address. 1385 */ 1386static vm_page_t 1387pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) 1388{ 1389 vm_page_t mpte; 1390 vm_pindex_t pindex = va >> PDRSHIFT; 1391 1392 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1393 if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { 1394 mpte = vm_page_splay(pindex, mpte); 1395 if ((pmap->pm_root = mpte)->pindex != pindex) 1396 mpte = NULL; 1397 } 1398 return (mpte); 1399} 1400 1401/* 1402 * Removes the specified page table page from the specified pmap's collection 1403 * of idle page table pages. The specified page table page must be a member of 1404 * the pmap's collection. 1405 */ 1406static void 1407pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) 1408{ 1409 vm_page_t root; 1410 1411 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1412 if (mpte != pmap->pm_root) 1413 vm_page_splay(mpte->pindex, pmap->pm_root); 1414 if (mpte->left == NULL) 1415 root = mpte->right; 1416 else { 1417 root = vm_page_splay(mpte->pindex, mpte->left); 1418 root->right = mpte->right; 1419 } 1420 pmap->pm_root = root; 1421} 1422 1423/* 1424 * This routine unholds page table pages, and if the hold count 1425 * drops to zero, then it decrements the wire count. 1426 */ 1427static __inline int 1428pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1429{ 1430 1431 --m->wire_count; 1432 if (m->wire_count == 0) 1433 return _pmap_unwire_pte_hold(pmap, m, free); 1434 else 1435 return 0; 1436} 1437 1438static int 1439_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1440{ 1441 vm_offset_t pteva; 1442 1443 /* 1444 * unmap the page table page 1445 */ 1446 pmap->pm_pdir[m->pindex] = 0; 1447 --pmap->pm_stats.resident_count; 1448 1449 /* 1450 * This is a release store so that the ordinary store unmapping 1451 * the page table page is globally performed before TLB shoot- 1452 * down is begun. 1453 */ 1454 atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1455 1456 /* 1457 * Do an invltlb to make the invalidated mapping 1458 * take effect immediately. 1459 */ 1460 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1461 pmap_invalidate_page(pmap, pteva); 1462 1463 /* 1464 * Put page on a list so that it is released after 1465 * *ALL* TLB shootdown is done 1466 */ 1467 pmap_add_delayed_free_list(m, free, TRUE); 1468 1469 return 1; 1470} 1471 1472/* 1473 * After removing a page table entry, this routine is used to 1474 * conditionally free the page, and manage the hold/wire counts. 1475 */ 1476static int 1477pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1478{ 1479 pd_entry_t ptepde; 1480 vm_page_t mpte; 1481 1482 if (va >= VM_MAXUSER_ADDRESS) 1483 return 0; 1484 ptepde = *pmap_pde(pmap, va); 1485 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1486 return pmap_unwire_pte_hold(pmap, mpte, free); 1487} 1488 1489void 1490pmap_pinit0(pmap_t pmap) 1491{ 1492 1493 PMAP_LOCK_INIT(pmap); 1494 pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1495#ifdef PAE 1496 pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1497#endif 1498 pmap->pm_root = NULL; 1499 pmap->pm_active = 0; 1500 PCPU_SET(curpmap, pmap); 1501 TAILQ_INIT(&pmap->pm_pvchunk); 1502 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1503 mtx_lock_spin(&allpmaps_lock); 1504 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1505 mtx_unlock_spin(&allpmaps_lock); 1506} 1507 1508/* 1509 * Initialize a preallocated and zeroed pmap structure, 1510 * such as one in a vmspace structure. 1511 */ 1512int 1513pmap_pinit(pmap_t pmap) 1514{ 1515 vm_page_t m, ptdpg[NPGPTD]; 1516 vm_paddr_t pa; 1517 static int color; 1518 int i; 1519 1520 PMAP_LOCK_INIT(pmap); 1521 1522 /* 1523 * No need to allocate page table space yet but we do need a valid 1524 * page directory table. 1525 */ 1526 if (pmap->pm_pdir == NULL) { 1527 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1528 NBPTD); 1529 1530 if (pmap->pm_pdir == NULL) { 1531 PMAP_LOCK_DESTROY(pmap); 1532 return (0); 1533 } 1534#ifdef PAE 1535 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1536 KASSERT(((vm_offset_t)pmap->pm_pdpt & 1537 ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1538 ("pmap_pinit: pdpt misaligned")); 1539 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1540 ("pmap_pinit: pdpt above 4g")); 1541#endif 1542 pmap->pm_root = NULL; 1543 } 1544 KASSERT(pmap->pm_root == NULL, 1545 ("pmap_pinit: pmap has reserved page table page(s)")); 1546 1547 /* 1548 * allocate the page directory page(s) 1549 */ 1550 for (i = 0; i < NPGPTD;) { 1551 m = vm_page_alloc(NULL, color++, 1552 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1553 VM_ALLOC_ZERO); 1554 if (m == NULL) 1555 VM_WAIT; 1556 else { 1557 ptdpg[i++] = m; 1558 } 1559 } 1560 1561 pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1562 1563 for (i = 0; i < NPGPTD; i++) { 1564 if ((ptdpg[i]->flags & PG_ZERO) == 0) 1565 bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); 1566 } 1567 1568 mtx_lock_spin(&allpmaps_lock); 1569 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1570 mtx_unlock_spin(&allpmaps_lock); 1571 /* Wire in kernel global address entries. */ 1572 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1573 1574 /* install self-referential address mapping entry(s) */ 1575 for (i = 0; i < NPGPTD; i++) { 1576 pa = VM_PAGE_TO_PHYS(ptdpg[i]); 1577 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 1578#ifdef PAE 1579 pmap->pm_pdpt[i] = pa | PG_V; 1580#endif 1581 } 1582 1583 pmap->pm_active = 0; 1584 TAILQ_INIT(&pmap->pm_pvchunk); 1585 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1586 1587 return (1); 1588} 1589 1590/* 1591 * this routine is called if the page table page is not 1592 * mapped correctly. 1593 */ 1594static vm_page_t 1595_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1596{ 1597 vm_paddr_t ptepa; 1598 vm_page_t m; 1599 1600 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1601 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1602 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1603 1604 /* 1605 * Allocate a page table page. 1606 */ 1607 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1608 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1609 if (flags & M_WAITOK) { 1610 PMAP_UNLOCK(pmap); 1611 vm_page_unlock_queues(); 1612 VM_WAIT; 1613 vm_page_lock_queues(); 1614 PMAP_LOCK(pmap); 1615 } 1616 1617 /* 1618 * Indicate the need to retry. While waiting, the page table 1619 * page may have been allocated. 1620 */ 1621 return (NULL); 1622 } 1623 if ((m->flags & PG_ZERO) == 0) 1624 pmap_zero_page(m); 1625 1626 /* 1627 * Map the pagetable page into the process address space, if 1628 * it isn't already there. 1629 */ 1630 1631 pmap->pm_stats.resident_count++; 1632 1633 ptepa = VM_PAGE_TO_PHYS(m); 1634 pmap->pm_pdir[ptepindex] = 1635 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1636 1637 return m; 1638} 1639 1640static vm_page_t 1641pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1642{ 1643 unsigned ptepindex; 1644 pd_entry_t ptepa; 1645 vm_page_t m; 1646 1647 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1648 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1649 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1650 1651 /* 1652 * Calculate pagetable page index 1653 */ 1654 ptepindex = va >> PDRSHIFT; 1655retry: 1656 /* 1657 * Get the page directory entry 1658 */ 1659 ptepa = pmap->pm_pdir[ptepindex]; 1660 1661 /* 1662 * This supports switching from a 4MB page to a 1663 * normal 4K page. 1664 */ 1665 if (ptepa & PG_PS) { 1666 (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); 1667 ptepa = pmap->pm_pdir[ptepindex]; 1668 } 1669 1670 /* 1671 * If the page table page is mapped, we just increment the 1672 * hold count, and activate it. 1673 */ 1674 if (ptepa) { 1675 m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 1676 m->wire_count++; 1677 } else { 1678 /* 1679 * Here if the pte page isn't mapped, or if it has 1680 * been deallocated. 1681 */ 1682 m = _pmap_allocpte(pmap, ptepindex, flags); 1683 if (m == NULL && (flags & M_WAITOK)) 1684 goto retry; 1685 } 1686 return (m); 1687} 1688 1689 1690/*************************************************** 1691* Pmap allocation/deallocation routines. 1692 ***************************************************/ 1693 1694#ifdef SMP 1695/* 1696 * Deal with a SMP shootdown of other users of the pmap that we are 1697 * trying to dispose of. This can be a bit hairy. 1698 */ 1699static cpumask_t *lazymask; 1700static u_int lazyptd; 1701static volatile u_int lazywait; 1702 1703void pmap_lazyfix_action(void); 1704 1705void 1706pmap_lazyfix_action(void) 1707{ 1708 cpumask_t mymask = PCPU_GET(cpumask); 1709 1710#ifdef COUNT_IPIS 1711 (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1712#endif 1713 if (rcr3() == lazyptd) 1714 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1715 atomic_clear_int(lazymask, mymask); 1716 atomic_store_rel_int(&lazywait, 1); 1717} 1718 1719static void 1720pmap_lazyfix_self(cpumask_t mymask) 1721{ 1722 1723 if (rcr3() == lazyptd) 1724 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1725 atomic_clear_int(lazymask, mymask); 1726} 1727 1728 1729static void 1730pmap_lazyfix(pmap_t pmap) 1731{ 1732 cpumask_t mymask, mask; 1733 u_int spins; 1734 1735 while ((mask = pmap->pm_active) != 0) { 1736 spins = 50000000; 1737 mask = mask & -mask; /* Find least significant set bit */ 1738 mtx_lock_spin(&smp_ipi_mtx); 1739#ifdef PAE 1740 lazyptd = vtophys(pmap->pm_pdpt); 1741#else 1742 lazyptd = vtophys(pmap->pm_pdir); 1743#endif 1744 mymask = PCPU_GET(cpumask); 1745 if (mask == mymask) { 1746 lazymask = &pmap->pm_active; 1747 pmap_lazyfix_self(mymask); 1748 } else { 1749 atomic_store_rel_int((u_int *)&lazymask, 1750 (u_int)&pmap->pm_active); 1751 atomic_store_rel_int(&lazywait, 0); 1752 ipi_selected(mask, IPI_LAZYPMAP); 1753 while (lazywait == 0) { 1754 ia32_pause(); 1755 if (--spins == 0) 1756 break; 1757 } 1758 } 1759 mtx_unlock_spin(&smp_ipi_mtx); 1760 if (spins == 0) 1761 printf("pmap_lazyfix: spun for 50000000\n"); 1762 } 1763} 1764 1765#else /* SMP */ 1766 1767/* 1768 * Cleaning up on uniprocessor is easy. For various reasons, we're 1769 * unlikely to have to even execute this code, including the fact 1770 * that the cleanup is deferred until the parent does a wait(2), which 1771 * means that another userland process has run. 1772 */ 1773static void 1774pmap_lazyfix(pmap_t pmap) 1775{ 1776 u_int cr3; 1777 1778 cr3 = vtophys(pmap->pm_pdir); 1779 if (cr3 == rcr3()) { 1780 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1781 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1782 } 1783} 1784#endif /* SMP */ 1785 1786/* 1787 * Release any resources held by the given physical map. 1788 * Called when a pmap initialized by pmap_pinit is being released. 1789 * Should only be called if the map contains no valid mappings. 1790 */ 1791void 1792pmap_release(pmap_t pmap) 1793{ 1794 vm_page_t m, ptdpg[NPGPTD]; 1795 int i; 1796 1797 KASSERT(pmap->pm_stats.resident_count == 0, 1798 ("pmap_release: pmap resident count %ld != 0", 1799 pmap->pm_stats.resident_count)); 1800 KASSERT(pmap->pm_root == NULL, 1801 ("pmap_release: pmap has reserved page table page(s)")); 1802 1803 pmap_lazyfix(pmap); 1804 mtx_lock_spin(&allpmaps_lock); 1805 LIST_REMOVE(pmap, pm_list); 1806 mtx_unlock_spin(&allpmaps_lock); 1807 1808 for (i = 0; i < NPGPTD; i++) 1809 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & 1810 PG_FRAME); 1811 1812 bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 1813 sizeof(*pmap->pm_pdir)); 1814 1815 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1816 1817 for (i = 0; i < NPGPTD; i++) { 1818 m = ptdpg[i]; 1819#ifdef PAE 1820 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1821 ("pmap_release: got wrong ptd page")); 1822#endif 1823 m->wire_count--; 1824 atomic_subtract_int(&cnt.v_wire_count, 1); 1825 vm_page_free_zero(m); 1826 } 1827 PMAP_LOCK_DESTROY(pmap); 1828} 1829 1830static int 1831kvm_size(SYSCTL_HANDLER_ARGS) 1832{ 1833 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1834 1835 return sysctl_handle_long(oidp, &ksize, 0, req); 1836} 1837SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1838 0, 0, kvm_size, "IU", "Size of KVM"); 1839 1840static int 1841kvm_free(SYSCTL_HANDLER_ARGS) 1842{ 1843 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1844 1845 return sysctl_handle_long(oidp, &kfree, 0, req); 1846} 1847SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1848 0, 0, kvm_free, "IU", "Amount of KVM free"); 1849 1850/* 1851 * grow the number of kernel page table entries, if needed 1852 */ 1853void 1854pmap_growkernel(vm_offset_t addr) 1855{ 1856 struct pmap *pmap; 1857 vm_paddr_t ptppaddr; 1858 vm_page_t nkpg; 1859 pd_entry_t newpdir; 1860 pt_entry_t *pde; 1861 1862 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1863 if (kernel_vm_end == 0) { 1864 kernel_vm_end = KERNBASE; 1865 nkpt = 0; 1866 while (pdir_pde(PTD, kernel_vm_end)) { 1867 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1868 nkpt++; 1869 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1870 kernel_vm_end = kernel_map->max_offset; 1871 break; 1872 } 1873 } 1874 } 1875 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1876 if (addr - 1 >= kernel_map->max_offset) 1877 addr = kernel_map->max_offset; 1878 while (kernel_vm_end < addr) { 1879 if (pdir_pde(PTD, kernel_vm_end)) { 1880 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1881 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1882 kernel_vm_end = kernel_map->max_offset; 1883 break; 1884 } 1885 continue; 1886 } 1887 1888 nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, 1889 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1890 VM_ALLOC_ZERO); 1891 if (nkpg == NULL) 1892 panic("pmap_growkernel: no memory to grow kernel"); 1893 1894 nkpt++; 1895 1896 if ((nkpg->flags & PG_ZERO) == 0) 1897 pmap_zero_page(nkpg); 1898 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1899 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1900 pdir_pde(PTD, kernel_vm_end) = newpdir; 1901 1902 mtx_lock_spin(&allpmaps_lock); 1903 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1904 pde = pmap_pde(pmap, kernel_vm_end); 1905 pde_store(pde, newpdir); 1906 } 1907 mtx_unlock_spin(&allpmaps_lock); 1908 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1909 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1910 kernel_vm_end = kernel_map->max_offset; 1911 break; 1912 } 1913 } 1914} 1915 1916 1917/*************************************************** 1918 * page management routines. 1919 ***************************************************/ 1920 1921CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1922CTASSERT(_NPCM == 11); 1923 1924static __inline struct pv_chunk * 1925pv_to_chunk(pv_entry_t pv) 1926{ 1927 1928 return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); 1929} 1930 1931#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1932 1933#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1934#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1935 1936static uint32_t pc_freemask[11] = { 1937 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1938 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1939 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1940 PC_FREE0_9, PC_FREE10 1941}; 1942 1943SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1944 "Current number of pv entries"); 1945 1946#ifdef PV_STATS 1947static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1948 1949SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1950 "Current number of pv entry chunks"); 1951SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1952 "Current number of pv entry chunks allocated"); 1953SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1954 "Current number of pv entry chunks frees"); 1955SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1956 "Number of times tried to get a chunk page but failed."); 1957 1958static long pv_entry_frees, pv_entry_allocs; 1959static int pv_entry_spare; 1960 1961SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1962 "Current number of pv entry frees"); 1963SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1964 "Current number of pv entry allocs"); 1965SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1966 "Current number of spare pv entries"); 1967 1968static int pmap_collect_inactive, pmap_collect_active; 1969 1970SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, 1971 "Current number times pmap_collect called on inactive queue"); 1972SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, 1973 "Current number times pmap_collect called on active queue"); 1974#endif 1975 1976/* 1977 * We are in a serious low memory condition. Resort to 1978 * drastic measures to free some pages so we can allocate 1979 * another pv entry chunk. This is normally called to 1980 * unmap inactive pages, and if necessary, active pages. 1981 */ 1982static void 1983pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) 1984{ 1985 struct md_page *pvh; 1986 pd_entry_t *pde; 1987 pmap_t pmap; 1988 pt_entry_t *pte, tpte; 1989 pv_entry_t next_pv, pv; 1990 vm_offset_t va; 1991 vm_page_t m, free; 1992 1993 sched_pin(); 1994 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1995 if (m->hold_count || m->busy) 1996 continue; 1997 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1998 va = pv->pv_va; 1999 pmap = PV_PMAP(pv); 2000 /* Avoid deadlock and lock recursion. */ 2001 if (pmap > locked_pmap) 2002 PMAP_LOCK(pmap); 2003 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 2004 continue; 2005 pmap->pm_stats.resident_count--; 2006 pde = pmap_pde(pmap, va); 2007 KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" 2008 " a 4mpage in page %p's pv list", m)); 2009 pte = pmap_pte_quick(pmap, va); 2010 tpte = pte_load_clear(pte); 2011 KASSERT((tpte & PG_W) == 0, 2012 ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); 2013 if (tpte & PG_A) 2014 vm_page_flag_set(m, PG_REFERENCED); 2015 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2016 vm_page_dirty(m); 2017 free = NULL; 2018 pmap_unuse_pt(pmap, va, &free); 2019 pmap_invalidate_page(pmap, va); 2020 pmap_free_zero_pages(free); 2021 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2022 if (TAILQ_EMPTY(&m->md.pv_list)) { 2023 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2024 if (TAILQ_EMPTY(&pvh->pv_list)) 2025 vm_page_flag_clear(m, PG_WRITEABLE); 2026 } 2027 free_pv_entry(pmap, pv); 2028 if (pmap != locked_pmap) 2029 PMAP_UNLOCK(pmap); 2030 } 2031 } 2032 sched_unpin(); 2033} 2034 2035 2036/* 2037 * free the pv_entry back to the free list 2038 */ 2039static void 2040free_pv_entry(pmap_t pmap, pv_entry_t pv) 2041{ 2042 vm_page_t m; 2043 struct pv_chunk *pc; 2044 int idx, field, bit; 2045 2046 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2047 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2048 PV_STAT(pv_entry_frees++); 2049 PV_STAT(pv_entry_spare++); 2050 pv_entry_count--; 2051 pc = pv_to_chunk(pv); 2052 idx = pv - &pc->pc_pventry[0]; 2053 field = idx / 32; 2054 bit = idx % 32; 2055 pc->pc_map[field] |= 1ul << bit; 2056 /* move to head of list */ 2057 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2058 for (idx = 0; idx < _NPCM; idx++) 2059 if (pc->pc_map[idx] != pc_freemask[idx]) { 2060 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2061 return; 2062 } 2063 PV_STAT(pv_entry_spare -= _NPCPV); 2064 PV_STAT(pc_chunk_count--); 2065 PV_STAT(pc_chunk_frees++); 2066 /* entire chunk is free, return it */ 2067 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2068 pmap_qremove((vm_offset_t)pc, 1); 2069 vm_page_unwire(m, 0); 2070 vm_page_free(m); 2071 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2072} 2073 2074/* 2075 * get a new pv_entry, allocating a block from the system 2076 * when needed. 2077 */ 2078static pv_entry_t 2079get_pv_entry(pmap_t pmap, int try) 2080{ 2081 static const struct timeval printinterval = { 60, 0 }; 2082 static struct timeval lastprint; 2083 static vm_pindex_t colour; 2084 struct vpgqueues *pq; 2085 int bit, field; 2086 pv_entry_t pv; 2087 struct pv_chunk *pc; 2088 vm_page_t m; 2089 2090 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2091 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2092 PV_STAT(pv_entry_allocs++); 2093 pv_entry_count++; 2094 if (pv_entry_count > pv_entry_high_water) 2095 if (ratecheck(&lastprint, &printinterval)) 2096 printf("Approaching the limit on PV entries, consider " 2097 "increasing either the vm.pmap.shpgperproc or the " 2098 "vm.pmap.pv_entry_max tunable.\n"); 2099 pq = NULL; 2100retry: 2101 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2102 if (pc != NULL) { 2103 for (field = 0; field < _NPCM; field++) { 2104 if (pc->pc_map[field]) { 2105 bit = bsfl(pc->pc_map[field]); 2106 break; 2107 } 2108 } 2109 if (field < _NPCM) { 2110 pv = &pc->pc_pventry[field * 32 + bit]; 2111 pc->pc_map[field] &= ~(1ul << bit); 2112 /* If this was the last item, move it to tail */ 2113 for (field = 0; field < _NPCM; field++) 2114 if (pc->pc_map[field] != 0) { 2115 PV_STAT(pv_entry_spare--); 2116 return (pv); /* not full, return */ 2117 } 2118 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2119 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2120 PV_STAT(pv_entry_spare--); 2121 return (pv); 2122 } 2123 } 2124 /* 2125 * Access to the ptelist "pv_vafree" is synchronized by the page 2126 * queues lock. If "pv_vafree" is currently non-empty, it will 2127 * remain non-empty until pmap_ptelist_alloc() completes. 2128 */ 2129 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == 2130 &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | 2131 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2132 if (try) { 2133 pv_entry_count--; 2134 PV_STAT(pc_chunk_tryfail++); 2135 return (NULL); 2136 } 2137 /* 2138 * Reclaim pv entries: At first, destroy mappings to 2139 * inactive pages. After that, if a pv chunk entry 2140 * is still needed, destroy mappings to active pages. 2141 */ 2142 if (pq == NULL) { 2143 PV_STAT(pmap_collect_inactive++); 2144 pq = &vm_page_queues[PQ_INACTIVE]; 2145 } else if (pq == &vm_page_queues[PQ_INACTIVE]) { 2146 PV_STAT(pmap_collect_active++); 2147 pq = &vm_page_queues[PQ_ACTIVE]; 2148 } else 2149 panic("get_pv_entry: increase vm.pmap.shpgperproc"); 2150 pmap_collect(pmap, pq); 2151 goto retry; 2152 } 2153 PV_STAT(pc_chunk_count++); 2154 PV_STAT(pc_chunk_allocs++); 2155 colour++; 2156 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2157 pmap_qenter((vm_offset_t)pc, &m, 1); 2158 pc->pc_pmap = pmap; 2159 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2160 for (field = 1; field < _NPCM; field++) 2161 pc->pc_map[field] = pc_freemask[field]; 2162 pv = &pc->pc_pventry[0]; 2163 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2164 PV_STAT(pv_entry_spare += _NPCPV - 1); 2165 return (pv); 2166} 2167 2168static __inline pv_entry_t 2169pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2170{ 2171 pv_entry_t pv; 2172 2173 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2174 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 2175 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2176 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2177 break; 2178 } 2179 } 2180 return (pv); 2181} 2182 2183static void 2184pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2185{ 2186 struct md_page *pvh; 2187 pv_entry_t pv; 2188 vm_offset_t va_last; 2189 vm_page_t m; 2190 2191 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2192 KASSERT((pa & PDRMASK) == 0, 2193 ("pmap_pv_demote_pde: pa is not 4mpage aligned")); 2194 2195 /* 2196 * Transfer the 4mpage's pv entry for this mapping to the first 2197 * page's pv list. 2198 */ 2199 pvh = pa_to_pvh(pa); 2200 va = trunc_4mpage(va); 2201 pv = pmap_pvh_remove(pvh, pmap, va); 2202 KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); 2203 m = PHYS_TO_VM_PAGE(pa); 2204 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2205 /* Instantiate the remaining NPTEPG - 1 pv entries. */ 2206 va_last = va + NBPDR - PAGE_SIZE; 2207 do { 2208 m++; 2209 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2210 ("pmap_pv_demote_pde: page %p is not managed", m)); 2211 va += PAGE_SIZE; 2212 pmap_insert_entry(pmap, va, m); 2213 } while (va < va_last); 2214} 2215 2216static void 2217pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2218{ 2219 struct md_page *pvh; 2220 pv_entry_t pv; 2221 vm_offset_t va_last; 2222 vm_page_t m; 2223 2224 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2225 KASSERT((pa & PDRMASK) == 0, 2226 ("pmap_pv_promote_pde: pa is not 4mpage aligned")); 2227 2228 /* 2229 * Transfer the first page's pv entry for this mapping to the 2230 * 4mpage's pv list. Aside from avoiding the cost of a call 2231 * to get_pv_entry(), a transfer avoids the possibility that 2232 * get_pv_entry() calls pmap_collect() and that pmap_collect() 2233 * removes one of the mappings that is being promoted. 2234 */ 2235 m = PHYS_TO_VM_PAGE(pa); 2236 va = trunc_4mpage(va); 2237 pv = pmap_pvh_remove(&m->md, pmap, va); 2238 KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); 2239 pvh = pa_to_pvh(pa); 2240 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 2241 /* Free the remaining NPTEPG - 1 pv entries. */ 2242 va_last = va + NBPDR - PAGE_SIZE; 2243 do { 2244 m++; 2245 va += PAGE_SIZE; 2246 pmap_pvh_free(&m->md, pmap, va); 2247 } while (va < va_last); 2248} 2249 2250static void 2251pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2252{ 2253 pv_entry_t pv; 2254 2255 pv = pmap_pvh_remove(pvh, pmap, va); 2256 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2257 free_pv_entry(pmap, pv); 2258} 2259 2260static void 2261pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2262{ 2263 struct md_page *pvh; 2264 2265 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2266 pmap_pvh_free(&m->md, pmap, va); 2267 if (TAILQ_EMPTY(&m->md.pv_list)) { 2268 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2269 if (TAILQ_EMPTY(&pvh->pv_list)) 2270 vm_page_flag_clear(m, PG_WRITEABLE); 2271 } 2272} 2273 2274/* 2275 * Create a pv entry for page at pa for 2276 * (pmap, va). 2277 */ 2278static void 2279pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2280{ 2281 pv_entry_t pv; 2282 2283 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2284 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2285 pv = get_pv_entry(pmap, FALSE); 2286 pv->pv_va = va; 2287 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2288} 2289 2290/* 2291 * Conditionally create a pv entry. 2292 */ 2293static boolean_t 2294pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2295{ 2296 pv_entry_t pv; 2297 2298 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2299 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2300 if (pv_entry_count < pv_entry_high_water && 2301 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2302 pv->pv_va = va; 2303 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2304 return (TRUE); 2305 } else 2306 return (FALSE); 2307} 2308 2309/* 2310 * Create the pv entries for each of the pages within a superpage. 2311 */ 2312static boolean_t 2313pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2314{ 2315 struct md_page *pvh; 2316 pv_entry_t pv; 2317 2318 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2319 if (pv_entry_count < pv_entry_high_water && 2320 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2321 pv->pv_va = va; 2322 pvh = pa_to_pvh(pa); 2323 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 2324 return (TRUE); 2325 } else 2326 return (FALSE); 2327} 2328 2329/* 2330 * Fills a page table page with mappings to consecutive physical pages. 2331 */ 2332static void 2333pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) 2334{ 2335 pt_entry_t *pte; 2336 2337 for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { 2338 *pte = newpte; 2339 newpte += PAGE_SIZE; 2340 } 2341} 2342 2343/* 2344 * Tries to demote a 2- or 4MB page mapping. If demotion fails, the 2345 * 2- or 4MB page mapping is invalidated. 2346 */ 2347static boolean_t 2348pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 2349{ 2350 pd_entry_t newpde, oldpde; 2351 pmap_t allpmaps_entry; 2352 pt_entry_t *firstpte, newpte; 2353 vm_paddr_t mptepa; 2354 vm_page_t free, mpte; 2355 2356 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2357 oldpde = *pde; 2358 KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), 2359 ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); 2360 mpte = pmap_lookup_pt_page(pmap, va); 2361 if (mpte != NULL) 2362 pmap_remove_pt_page(pmap, mpte); 2363 else { 2364 KASSERT((oldpde & PG_W) == 0, 2365 ("pmap_demote_pde: page table page for a wired mapping" 2366 " is missing")); 2367 2368 /* 2369 * Invalidate the 2- or 4MB page mapping and return 2370 * "failure" if the mapping was never accessed or the 2371 * allocation of the new page table page fails. 2372 */ 2373 if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, 2374 va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | 2375 VM_ALLOC_WIRED)) == NULL) { 2376 free = NULL; 2377 pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); 2378 pmap_invalidate_page(pmap, trunc_4mpage(va)); 2379 pmap_free_zero_pages(free); 2380 CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" 2381 " in pmap %p", va, pmap); 2382 return (FALSE); 2383 } 2384 if (va < VM_MAXUSER_ADDRESS) 2385 pmap->pm_stats.resident_count++; 2386 } 2387 mptepa = VM_PAGE_TO_PHYS(mpte); 2388 2389 /* 2390 * Temporarily map the page table page (mpte) into the kernel's 2391 * address space at either PADDR1 or PADDR2. 2392 */ 2393 if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) { 2394 if ((*PMAP1 & PG_FRAME) != mptepa) { 2395 *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2396#ifdef SMP 2397 PMAP1cpu = PCPU_GET(cpuid); 2398#endif 2399 invlcaddr(PADDR1); 2400 PMAP1changed++; 2401 } else 2402#ifdef SMP 2403 if (PMAP1cpu != PCPU_GET(cpuid)) { 2404 PMAP1cpu = PCPU_GET(cpuid); 2405 invlcaddr(PADDR1); 2406 PMAP1changedcpu++; 2407 } else 2408#endif 2409 PMAP1unchanged++; 2410 firstpte = PADDR1; 2411 } else { 2412 mtx_lock(&PMAP2mutex); 2413 if ((*PMAP2 & PG_FRAME) != mptepa) { 2414 *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2415 pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 2416 } 2417 firstpte = PADDR2; 2418 } 2419 newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; 2420 KASSERT((oldpde & PG_A) != 0, 2421 ("pmap_demote_pde: oldpde is missing PG_A")); 2422 KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, 2423 ("pmap_demote_pde: oldpde is missing PG_M")); 2424 newpte = oldpde & ~PG_PS; 2425 if ((newpte & PG_PDE_PAT) != 0) 2426 newpte ^= PG_PDE_PAT | PG_PTE_PAT; 2427 2428 /* 2429 * If the page table page is new, initialize it. 2430 */ 2431 if (mpte->wire_count == 1) { 2432 mpte->wire_count = NPTEPG; 2433 pmap_fill_ptp(firstpte, newpte); 2434 } 2435 KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), 2436 ("pmap_demote_pde: firstpte and newpte map different physical" 2437 " addresses")); 2438 2439 /* 2440 * If the mapping has changed attributes, update the page table 2441 * entries. 2442 */ 2443 if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) 2444 pmap_fill_ptp(firstpte, newpte); 2445 2446 /* 2447 * Demote the mapping. This pmap is locked. The old PDE has 2448 * PG_A set. If the old PDE has PG_RW set, it also has PG_M 2449 * set. Thus, there is no danger of a race with another 2450 * processor changing the setting of PG_A and/or PG_M between 2451 * the read above and the store below. 2452 */ 2453 if (pmap == kernel_pmap) { 2454 /* 2455 * A harmless race exists between this loop and the bcopy() 2456 * in pmap_pinit() that initializes the kernel segment of 2457 * the new page table. Specifically, that bcopy() may copy 2458 * the new PDE from the PTD, which is first in allpmaps, to 2459 * the new page table before this loop updates that new 2460 * page table. 2461 */ 2462 mtx_lock_spin(&allpmaps_lock); 2463 LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { 2464 pde = pmap_pde(allpmaps_entry, va); 2465 KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) == 2466 (oldpde & PG_PTE_PROMOTE), 2467 ("pmap_demote_pde: pde was %#jx, expected %#jx", 2468 (uintmax_t)*pde, (uintmax_t)oldpde)); 2469 pde_store(pde, newpde); 2470 } 2471 mtx_unlock_spin(&allpmaps_lock); 2472 } else 2473 pde_store(pde, newpde); 2474 if (firstpte == PADDR2) 2475 mtx_unlock(&PMAP2mutex); 2476 2477 /* 2478 * Invalidate the recursive mapping of the page table page. 2479 */ 2480 pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); 2481 2482 /* 2483 * Demote the pv entry. This depends on the earlier demotion 2484 * of the mapping. Specifically, the (re)creation of a per- 2485 * page pv entry might trigger the execution of pmap_collect(), 2486 * which might reclaim a newly (re)created per-page pv entry 2487 * and destroy the associated mapping. In order to destroy 2488 * the mapping, the PDE must have already changed from mapping 2489 * the 2mpage to referencing the page table page. 2490 */ 2491 if ((oldpde & PG_MANAGED) != 0) 2492 pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); 2493 2494 pmap_pde_demotions++; 2495 CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" 2496 " in pmap %p", va, pmap); 2497 return (TRUE); 2498} 2499 2500/* 2501 * pmap_remove_pde: do the things to unmap a superpage in a process 2502 */ 2503static void 2504pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 2505 vm_page_t *free) 2506{ 2507 struct md_page *pvh; 2508 pd_entry_t oldpde; 2509 vm_offset_t eva, va; 2510 vm_page_t m, mpte; 2511 2512 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2513 KASSERT((sva & PDRMASK) == 0, 2514 ("pmap_remove_pde: sva is not 4mpage aligned")); 2515 oldpde = pte_load_clear(pdq); 2516 if (oldpde & PG_W) 2517 pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; 2518 2519 /* 2520 * Machines that don't support invlpg, also don't support 2521 * PG_G. 2522 */ 2523 if (oldpde & PG_G) 2524 pmap_invalidate_page(kernel_pmap, sva); 2525 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2526 if (oldpde & PG_MANAGED) { 2527 pvh = pa_to_pvh(oldpde & PG_PS_FRAME); 2528 pmap_pvh_free(pvh, pmap, sva); 2529 eva = sva + NBPDR; 2530 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 2531 va < eva; va += PAGE_SIZE, m++) { 2532 if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2533 vm_page_dirty(m); 2534 if (oldpde & PG_A) 2535 vm_page_flag_set(m, PG_REFERENCED); 2536 if (TAILQ_EMPTY(&m->md.pv_list) && 2537 TAILQ_EMPTY(&pvh->pv_list)) 2538 vm_page_flag_clear(m, PG_WRITEABLE); 2539 } 2540 } 2541 if (pmap == kernel_pmap) { 2542 if (!pmap_demote_pde(pmap, pdq, sva)) 2543 panic("pmap_remove_pde: failed demotion"); 2544 } else { 2545 mpte = pmap_lookup_pt_page(pmap, sva); 2546 if (mpte != NULL) { 2547 pmap_remove_pt_page(pmap, mpte); 2548 pmap->pm_stats.resident_count--; 2549 KASSERT(mpte->wire_count == NPTEPG, 2550 ("pmap_remove_pde: pte page wire count error")); 2551 mpte->wire_count = 0; 2552 pmap_add_delayed_free_list(mpte, free, FALSE); 2553 atomic_subtract_int(&cnt.v_wire_count, 1); 2554 } 2555 } 2556} 2557 2558/* 2559 * pmap_remove_pte: do the things to unmap a page in a process 2560 */ 2561static int 2562pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2563{ 2564 pt_entry_t oldpte; 2565 vm_page_t m; 2566 2567 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2568 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2569 oldpte = pte_load_clear(ptq); 2570 if (oldpte & PG_W) 2571 pmap->pm_stats.wired_count -= 1; 2572 /* 2573 * Machines that don't support invlpg, also don't support 2574 * PG_G. 2575 */ 2576 if (oldpte & PG_G) 2577 pmap_invalidate_page(kernel_pmap, va); 2578 pmap->pm_stats.resident_count -= 1; 2579 if (oldpte & PG_MANAGED) { 2580 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); 2581 if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2582 vm_page_dirty(m); 2583 if (oldpte & PG_A) 2584 vm_page_flag_set(m, PG_REFERENCED); 2585 pmap_remove_entry(pmap, m, va); 2586 } 2587 return (pmap_unuse_pt(pmap, va, free)); 2588} 2589 2590/* 2591 * Remove a single page from a process address space 2592 */ 2593static void 2594pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2595{ 2596 pt_entry_t *pte; 2597 2598 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2599 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2600 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2601 if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) 2602 return; 2603 pmap_remove_pte(pmap, pte, va, free); 2604 pmap_invalidate_page(pmap, va); 2605} 2606 2607/* 2608 * Remove the given range of addresses from the specified map. 2609 * 2610 * It is assumed that the start and end are properly 2611 * rounded to the page size. 2612 */ 2613void 2614pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2615{ 2616 vm_offset_t pdnxt; 2617 pd_entry_t ptpaddr; 2618 pt_entry_t *pte; 2619 vm_page_t free = NULL; 2620 int anyvalid; 2621 2622 /* 2623 * Perform an unsynchronized read. This is, however, safe. 2624 */ 2625 if (pmap->pm_stats.resident_count == 0) 2626 return; 2627 2628 anyvalid = 0; 2629 2630 vm_page_lock_queues(); 2631 sched_pin(); 2632 PMAP_LOCK(pmap); 2633 2634 /* 2635 * special handling of removing one page. a very 2636 * common operation and easy to short circuit some 2637 * code. 2638 */ 2639 if ((sva + PAGE_SIZE == eva) && 2640 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2641 pmap_remove_page(pmap, sva, &free); 2642 goto out; 2643 } 2644 2645 for (; sva < eva; sva = pdnxt) { 2646 unsigned pdirindex; 2647 2648 /* 2649 * Calculate index for next page table. 2650 */ 2651 pdnxt = (sva + NBPDR) & ~PDRMASK; 2652 if (pdnxt < sva) 2653 pdnxt = eva; 2654 if (pmap->pm_stats.resident_count == 0) 2655 break; 2656 2657 pdirindex = sva >> PDRSHIFT; 2658 ptpaddr = pmap->pm_pdir[pdirindex]; 2659 2660 /* 2661 * Weed out invalid mappings. Note: we assume that the page 2662 * directory table is always allocated, and in kernel virtual. 2663 */ 2664 if (ptpaddr == 0) 2665 continue; 2666 2667 /* 2668 * Check for large page. 2669 */ 2670 if ((ptpaddr & PG_PS) != 0) { 2671 /* 2672 * Are we removing the entire large page? If not, 2673 * demote the mapping and fall through. 2674 */ 2675 if (sva + NBPDR == pdnxt && eva >= pdnxt) { 2676 /* 2677 * The TLB entry for a PG_G mapping is 2678 * invalidated by pmap_remove_pde(). 2679 */ 2680 if ((ptpaddr & PG_G) == 0) 2681 anyvalid = 1; 2682 pmap_remove_pde(pmap, 2683 &pmap->pm_pdir[pdirindex], sva, &free); 2684 continue; 2685 } else if (!pmap_demote_pde(pmap, 2686 &pmap->pm_pdir[pdirindex], sva)) { 2687 /* The large page mapping was destroyed. */ 2688 continue; 2689 } 2690 } 2691 2692 /* 2693 * Limit our scan to either the end of the va represented 2694 * by the current page table page, or to the end of the 2695 * range being removed. 2696 */ 2697 if (pdnxt > eva) 2698 pdnxt = eva; 2699 2700 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2701 sva += PAGE_SIZE) { 2702 if (*pte == 0) 2703 continue; 2704 2705 /* 2706 * The TLB entry for a PG_G mapping is invalidated 2707 * by pmap_remove_pte(). 2708 */ 2709 if ((*pte & PG_G) == 0) 2710 anyvalid = 1; 2711 if (pmap_remove_pte(pmap, pte, sva, &free)) 2712 break; 2713 } 2714 } 2715out: 2716 sched_unpin(); 2717 if (anyvalid) 2718 pmap_invalidate_all(pmap); 2719 vm_page_unlock_queues(); 2720 PMAP_UNLOCK(pmap); 2721 pmap_free_zero_pages(free); 2722} 2723 2724/* 2725 * Routine: pmap_remove_all 2726 * Function: 2727 * Removes this physical page from 2728 * all physical maps in which it resides. 2729 * Reflects back modify bits to the pager. 2730 * 2731 * Notes: 2732 * Original versions of this routine were very 2733 * inefficient because they iteratively called 2734 * pmap_remove (slow...) 2735 */ 2736 2737void 2738pmap_remove_all(vm_page_t m) 2739{ 2740 struct md_page *pvh; 2741 pv_entry_t pv; 2742 pmap_t pmap; 2743 pt_entry_t *pte, tpte; 2744 pd_entry_t *pde; 2745 vm_offset_t va; 2746 vm_page_t free; 2747 2748 KASSERT((m->flags & PG_FICTITIOUS) == 0, 2749 ("pmap_remove_all: page %p is fictitious", m)); 2750 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2751 sched_pin(); 2752 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2753 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2754 va = pv->pv_va; 2755 pmap = PV_PMAP(pv); 2756 PMAP_LOCK(pmap); 2757 pde = pmap_pde(pmap, va); 2758 (void)pmap_demote_pde(pmap, pde, va); 2759 PMAP_UNLOCK(pmap); 2760 } 2761 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2762 pmap = PV_PMAP(pv); 2763 PMAP_LOCK(pmap); 2764 pmap->pm_stats.resident_count--; 2765 pde = pmap_pde(pmap, pv->pv_va); 2766 KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" 2767 " a 4mpage in page %p's pv list", m)); 2768 pte = pmap_pte_quick(pmap, pv->pv_va); 2769 tpte = pte_load_clear(pte); 2770 if (tpte & PG_W) 2771 pmap->pm_stats.wired_count--; 2772 if (tpte & PG_A) 2773 vm_page_flag_set(m, PG_REFERENCED); 2774 2775 /* 2776 * Update the vm_page_t clean and reference bits. 2777 */ 2778 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2779 vm_page_dirty(m); 2780 free = NULL; 2781 pmap_unuse_pt(pmap, pv->pv_va, &free); 2782 pmap_invalidate_page(pmap, pv->pv_va); 2783 pmap_free_zero_pages(free); 2784 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2785 free_pv_entry(pmap, pv); 2786 PMAP_UNLOCK(pmap); 2787 } 2788 vm_page_flag_clear(m, PG_WRITEABLE); 2789 sched_unpin(); 2790} 2791 2792/* 2793 * pmap_protect_pde: do the things to protect a 4mpage in a process 2794 */ 2795static boolean_t 2796pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) 2797{ 2798 pd_entry_t newpde, oldpde; 2799 vm_offset_t eva, va; 2800 vm_page_t m; 2801 boolean_t anychanged; 2802 2803 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2804 KASSERT((sva & PDRMASK) == 0, 2805 ("pmap_protect_pde: sva is not 4mpage aligned")); 2806 anychanged = FALSE; 2807retry: 2808 oldpde = newpde = *pde; 2809 if (oldpde & PG_MANAGED) { 2810 eva = sva + NBPDR; 2811 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 2812 va < eva; va += PAGE_SIZE, m++) { 2813 /* 2814 * In contrast to the analogous operation on a 4KB page 2815 * mapping, the mapping's PG_A flag is not cleared and 2816 * the page's PG_REFERENCED flag is not set. The 2817 * reason is that pmap_demote_pde() expects that a 2/4MB 2818 * page mapping with a stored page table page has PG_A 2819 * set. 2820 */ 2821 if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2822 vm_page_dirty(m); 2823 } 2824 } 2825 if ((prot & VM_PROT_WRITE) == 0) 2826 newpde &= ~(PG_RW | PG_M); 2827#ifdef PAE 2828 if ((prot & VM_PROT_EXECUTE) == 0) 2829 newpde |= pg_nx; 2830#endif 2831 if (newpde != oldpde) { 2832 if (!pde_cmpset(pde, oldpde, newpde)) 2833 goto retry; 2834 if (oldpde & PG_G) 2835 pmap_invalidate_page(pmap, sva); 2836 else 2837 anychanged = TRUE; 2838 } 2839 return (anychanged); 2840} 2841 2842/* 2843 * Set the physical protection on the 2844 * specified range of this map as requested. 2845 */ 2846void 2847pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2848{ 2849 vm_offset_t pdnxt; 2850 pd_entry_t ptpaddr; 2851 pt_entry_t *pte; 2852 int anychanged; 2853 2854 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2855 pmap_remove(pmap, sva, eva); 2856 return; 2857 } 2858 2859#ifdef PAE 2860 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2861 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2862 return; 2863#else 2864 if (prot & VM_PROT_WRITE) 2865 return; 2866#endif 2867 2868 anychanged = 0; 2869 2870 vm_page_lock_queues(); 2871 sched_pin(); 2872 PMAP_LOCK(pmap); 2873 for (; sva < eva; sva = pdnxt) { 2874 pt_entry_t obits, pbits; 2875 unsigned pdirindex; 2876 2877 pdnxt = (sva + NBPDR) & ~PDRMASK; 2878 if (pdnxt < sva) 2879 pdnxt = eva; 2880 2881 pdirindex = sva >> PDRSHIFT; 2882 ptpaddr = pmap->pm_pdir[pdirindex]; 2883 2884 /* 2885 * Weed out invalid mappings. Note: we assume that the page 2886 * directory table is always allocated, and in kernel virtual. 2887 */ 2888 if (ptpaddr == 0) 2889 continue; 2890 2891 /* 2892 * Check for large page. 2893 */ 2894 if ((ptpaddr & PG_PS) != 0) { 2895 /* 2896 * Are we protecting the entire large page? If not, 2897 * demote the mapping and fall through. 2898 */ 2899 if (sva + NBPDR == pdnxt && eva >= pdnxt) { 2900 /* 2901 * The TLB entry for a PG_G mapping is 2902 * invalidated by pmap_protect_pde(). 2903 */ 2904 if (pmap_protect_pde(pmap, 2905 &pmap->pm_pdir[pdirindex], sva, prot)) 2906 anychanged = 1; 2907 continue; 2908 } else if (!pmap_demote_pde(pmap, 2909 &pmap->pm_pdir[pdirindex], sva)) { 2910 /* The large page mapping was destroyed. */ 2911 continue; 2912 } 2913 } 2914 2915 if (pdnxt > eva) 2916 pdnxt = eva; 2917 2918 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2919 sva += PAGE_SIZE) { 2920 vm_page_t m; 2921 2922retry: 2923 /* 2924 * Regardless of whether a pte is 32 or 64 bits in 2925 * size, PG_RW, PG_A, and PG_M are among the least 2926 * significant 32 bits. 2927 */ 2928 obits = pbits = *pte; 2929 if ((pbits & PG_V) == 0) 2930 continue; 2931 if (pbits & PG_MANAGED) { 2932 m = NULL; 2933 if (pbits & PG_A) { 2934 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 2935 vm_page_flag_set(m, PG_REFERENCED); 2936 pbits &= ~PG_A; 2937 } 2938 if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 2939 if (m == NULL) 2940 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 2941 vm_page_dirty(m); 2942 } 2943 } 2944 2945 if ((prot & VM_PROT_WRITE) == 0) 2946 pbits &= ~(PG_RW | PG_M); 2947#ifdef PAE 2948 if ((prot & VM_PROT_EXECUTE) == 0) 2949 pbits |= pg_nx; 2950#endif 2951 2952 if (pbits != obits) { 2953#ifdef PAE 2954 if (!atomic_cmpset_64(pte, obits, pbits)) 2955 goto retry; 2956#else 2957 if (!atomic_cmpset_int((u_int *)pte, obits, 2958 pbits)) 2959 goto retry; 2960#endif 2961 if (obits & PG_G) 2962 pmap_invalidate_page(pmap, sva); 2963 else 2964 anychanged = 1; 2965 } 2966 } 2967 } 2968 sched_unpin(); 2969 if (anychanged) 2970 pmap_invalidate_all(pmap); 2971 vm_page_unlock_queues(); 2972 PMAP_UNLOCK(pmap); 2973} 2974 2975/* 2976 * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are 2977 * within a single page table page (PTP) to a single 2- or 4MB page mapping. 2978 * For promotion to occur, two conditions must be met: (1) the 4KB page 2979 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 2980 * mappings must have identical characteristics. 2981 * 2982 * Managed (PG_MANAGED) mappings within the kernel address space are not 2983 * promoted. The reason is that kernel PDEs are replicated in each pmap but 2984 * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel 2985 * pmap. 2986 */ 2987static void 2988pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 2989{ 2990 pd_entry_t newpde; 2991 pmap_t allpmaps_entry; 2992 pt_entry_t *firstpte, oldpte, pa, *pte; 2993 vm_offset_t oldpteva; 2994 vm_page_t mpte; 2995 2996 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2997 2998 /* 2999 * Examine the first PTE in the specified PTP. Abort if this PTE is 3000 * either invalid, unused, or does not map the first 4KB physical page 3001 * within a 2- or 4MB page. 3002 */ 3003 firstpte = vtopte(trunc_4mpage(va)); 3004setpde: 3005 newpde = *firstpte; 3006 if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { 3007 pmap_pde_p_failures++; 3008 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3009 " in pmap %p", va, pmap); 3010 return; 3011 } 3012 if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { 3013 pmap_pde_p_failures++; 3014 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3015 " in pmap %p", va, pmap); 3016 return; 3017 } 3018 if ((newpde & (PG_M | PG_RW)) == PG_RW) { 3019 /* 3020 * When PG_M is already clear, PG_RW can be cleared without 3021 * a TLB invalidation. 3022 */ 3023 if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & 3024 ~PG_RW)) 3025 goto setpde; 3026 newpde &= ~PG_RW; 3027 } 3028 3029 /* 3030 * Examine each of the other PTEs in the specified PTP. Abort if this 3031 * PTE maps an unexpected 4KB physical page or does not have identical 3032 * characteristics to the first PTE. 3033 */ 3034 pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; 3035 for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { 3036setpte: 3037 oldpte = *pte; 3038 if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { 3039 pmap_pde_p_failures++; 3040 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3041 " in pmap %p", va, pmap); 3042 return; 3043 } 3044 if ((oldpte & (PG_M | PG_RW)) == PG_RW) { 3045 /* 3046 * When PG_M is already clear, PG_RW can be cleared 3047 * without a TLB invalidation. 3048 */ 3049 if (!atomic_cmpset_int((u_int *)pte, oldpte, 3050 oldpte & ~PG_RW)) 3051 goto setpte; 3052 oldpte &= ~PG_RW; 3053 oldpteva = (oldpte & PG_FRAME & PDRMASK) | 3054 (va & ~PDRMASK); 3055 CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" 3056 " in pmap %p", oldpteva, pmap); 3057 } 3058 if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { 3059 pmap_pde_p_failures++; 3060 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3061 " in pmap %p", va, pmap); 3062 return; 3063 } 3064 pa -= PAGE_SIZE; 3065 } 3066 3067 /* 3068 * Save the page table page in its current state until the PDE 3069 * mapping the superpage is demoted by pmap_demote_pde() or 3070 * destroyed by pmap_remove_pde(). 3071 */ 3072 mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); 3073 KASSERT(mpte >= vm_page_array && 3074 mpte < &vm_page_array[vm_page_array_size], 3075 ("pmap_promote_pde: page table page is out of range")); 3076 KASSERT(mpte->pindex == va >> PDRSHIFT, 3077 ("pmap_promote_pde: page table page's pindex is wrong")); 3078 pmap_insert_pt_page(pmap, mpte); 3079 3080 /* 3081 * Promote the pv entries. 3082 */ 3083 if ((newpde & PG_MANAGED) != 0) 3084 pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); 3085 3086 /* 3087 * Propagate the PAT index to its proper position. 3088 */ 3089 if ((newpde & PG_PTE_PAT) != 0) 3090 newpde ^= PG_PDE_PAT | PG_PTE_PAT; 3091 3092 /* 3093 * Map the superpage. 3094 */ 3095 if (pmap == kernel_pmap) { 3096 mtx_lock_spin(&allpmaps_lock); 3097 LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { 3098 pde = pmap_pde(allpmaps_entry, va); 3099 pde_store(pde, PG_PS | newpde); 3100 } 3101 mtx_unlock_spin(&allpmaps_lock); 3102 } else 3103 pde_store(pde, PG_PS | newpde); 3104 3105 pmap_pde_promotions++; 3106 CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" 3107 " in pmap %p", va, pmap); 3108} 3109 3110/* 3111 * Insert the given physical page (p) at 3112 * the specified virtual address (v) in the 3113 * target physical map with the protection requested. 3114 * 3115 * If specified, the page will be wired down, meaning 3116 * that the related pte can not be reclaimed. 3117 * 3118 * NB: This is the only routine which MAY NOT lazy-evaluate 3119 * or lose information. That is, this routine must actually 3120 * insert this page into the given map NOW. 3121 */ 3122void 3123pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 3124 vm_prot_t prot, boolean_t wired) 3125{ 3126 vm_paddr_t pa; 3127 pd_entry_t *pde; 3128 pt_entry_t *pte; 3129 vm_paddr_t opa; 3130 pt_entry_t origpte, newpte; 3131 vm_page_t mpte, om; 3132 boolean_t invlva; 3133 3134 va = trunc_page(va); 3135 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 3136 KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 3137 ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); 3138 3139 mpte = NULL; 3140 3141 vm_page_lock_queues(); 3142 PMAP_LOCK(pmap); 3143 sched_pin(); 3144 3145 /* 3146 * In the case that a page table page is not 3147 * resident, we are creating it here. 3148 */ 3149 if (va < VM_MAXUSER_ADDRESS) { 3150 mpte = pmap_allocpte(pmap, va, M_WAITOK); 3151 } 3152 3153 pde = pmap_pde(pmap, va); 3154 if ((*pde & PG_PS) != 0) 3155 panic("pmap_enter: attempted pmap_enter on 4MB page"); 3156 pte = pmap_pte_quick(pmap, va); 3157 3158 /* 3159 * Page Directory table entry not valid, we need a new PT page 3160 */ 3161 if (pte == NULL) { 3162 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 3163 (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 3164 } 3165 3166 pa = VM_PAGE_TO_PHYS(m); 3167 om = NULL; 3168 origpte = *pte; 3169 opa = origpte & PG_FRAME; 3170 3171 /* 3172 * Mapping has not changed, must be protection or wiring change. 3173 */ 3174 if (origpte && (opa == pa)) { 3175 /* 3176 * Wiring change, just update stats. We don't worry about 3177 * wiring PT pages as they remain resident as long as there 3178 * are valid mappings in them. Hence, if a user page is wired, 3179 * the PT page will be also. 3180 */ 3181 if (wired && ((origpte & PG_W) == 0)) 3182 pmap->pm_stats.wired_count++; 3183 else if (!wired && (origpte & PG_W)) 3184 pmap->pm_stats.wired_count--; 3185 3186 /* 3187 * Remove extra pte reference 3188 */ 3189 if (mpte) 3190 mpte->wire_count--; 3191 3192 /* 3193 * We might be turning off write access to the page, 3194 * so we go ahead and sense modify status. 3195 */ 3196 if (origpte & PG_MANAGED) { 3197 om = m; 3198 pa |= PG_MANAGED; 3199 } 3200 goto validate; 3201 } 3202 /* 3203 * Mapping has changed, invalidate old range and fall through to 3204 * handle validating new mapping. 3205 */ 3206 if (opa) { 3207 if (origpte & PG_W) 3208 pmap->pm_stats.wired_count--; 3209 if (origpte & PG_MANAGED) { 3210 om = PHYS_TO_VM_PAGE(opa); 3211 pmap_remove_entry(pmap, om, va); 3212 } 3213 if (mpte != NULL) { 3214 mpte->wire_count--; 3215 KASSERT(mpte->wire_count > 0, 3216 ("pmap_enter: missing reference to page table page," 3217 " va: 0x%x", va)); 3218 } 3219 } else 3220 pmap->pm_stats.resident_count++; 3221 3222 /* 3223 * Enter on the PV list if part of our managed memory. 3224 */ 3225 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 3226 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3227 ("pmap_enter: managed mapping within the clean submap")); 3228 pmap_insert_entry(pmap, va, m); 3229 pa |= PG_MANAGED; 3230 } 3231 3232 /* 3233 * Increment counters 3234 */ 3235 if (wired) 3236 pmap->pm_stats.wired_count++; 3237 3238validate: 3239 /* 3240 * Now validate mapping with desired protection/wiring. 3241 */ 3242 newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); 3243 if ((prot & VM_PROT_WRITE) != 0) { 3244 newpte |= PG_RW; 3245 vm_page_flag_set(m, PG_WRITEABLE); 3246 } 3247#ifdef PAE 3248 if ((prot & VM_PROT_EXECUTE) == 0) 3249 newpte |= pg_nx; 3250#endif 3251 if (wired) 3252 newpte |= PG_W; 3253 if (va < VM_MAXUSER_ADDRESS) 3254 newpte |= PG_U; 3255 if (pmap == kernel_pmap) 3256 newpte |= pgeflag; 3257 3258 /* 3259 * if the mapping or permission bits are different, we need 3260 * to update the pte. 3261 */ 3262 if ((origpte & ~(PG_M|PG_A)) != newpte) { 3263 newpte |= PG_A; 3264 if ((access & VM_PROT_WRITE) != 0) 3265 newpte |= PG_M; 3266 if (origpte & PG_V) { 3267 invlva = FALSE; 3268 origpte = pte_load_store(pte, newpte); 3269 if (origpte & PG_A) { 3270 if (origpte & PG_MANAGED) 3271 vm_page_flag_set(om, PG_REFERENCED); 3272 if (opa != VM_PAGE_TO_PHYS(m)) 3273 invlva = TRUE; 3274#ifdef PAE 3275 if ((origpte & PG_NX) == 0 && 3276 (newpte & PG_NX) != 0) 3277 invlva = TRUE; 3278#endif 3279 } 3280 if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 3281 if ((origpte & PG_MANAGED) != 0) 3282 vm_page_dirty(om); 3283 if ((prot & VM_PROT_WRITE) == 0) 3284 invlva = TRUE; 3285 } 3286 if (invlva) 3287 pmap_invalidate_page(pmap, va); 3288 } else 3289 pte_store(pte, newpte); 3290 } 3291 3292 /* 3293 * If both the page table page and the reservation are fully 3294 * populated, then attempt promotion. 3295 */ 3296 if ((mpte == NULL || mpte->wire_count == NPTEPG) && 3297 pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) 3298 pmap_promote_pde(pmap, pde, va); 3299 3300 sched_unpin(); 3301 vm_page_unlock_queues(); 3302 PMAP_UNLOCK(pmap); 3303} 3304 3305/* 3306 * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and 3307 * FALSE otherwise. Fails if (1) a page table page cannot be allocated without 3308 * blocking, (2) a mapping already exists at the specified virtual address, or 3309 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3310 */ 3311static boolean_t 3312pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3313{ 3314 pd_entry_t *pde, newpde; 3315 3316 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3317 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3318 pde = pmap_pde(pmap, va); 3319 if (*pde != 0) { 3320 CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3321 " in pmap %p", va, pmap); 3322 return (FALSE); 3323 } 3324 newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | 3325 PG_PS | PG_V; 3326 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 3327 newpde |= PG_MANAGED; 3328 3329 /* 3330 * Abort this mapping if its PV entry could not be created. 3331 */ 3332 if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { 3333 CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3334 " in pmap %p", va, pmap); 3335 return (FALSE); 3336 } 3337 } 3338#ifdef PAE 3339 if ((prot & VM_PROT_EXECUTE) == 0) 3340 newpde |= pg_nx; 3341#endif 3342 if (va < VM_MAXUSER_ADDRESS) 3343 newpde |= PG_U; 3344 3345 /* 3346 * Increment counters. 3347 */ 3348 pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 3349 3350 /* 3351 * Map the superpage. 3352 */ 3353 pde_store(pde, newpde); 3354 3355 pmap_pde_mappings++; 3356 CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" 3357 " in pmap %p", va, pmap); 3358 return (TRUE); 3359} 3360 3361/* 3362 * Maps a sequence of resident pages belonging to the same object. 3363 * The sequence begins with the given page m_start. This page is 3364 * mapped at the given virtual address start. Each subsequent page is 3365 * mapped at a virtual address that is offset from start by the same 3366 * amount as the page is offset from m_start within the object. The 3367 * last page in the sequence is the page with the largest offset from 3368 * m_start that can be mapped at a virtual address less than the given 3369 * virtual address end. Not every virtual page between start and end 3370 * is mapped; only those for which a resident page exists with the 3371 * corresponding offset from m_start are mapped. 3372 */ 3373void 3374pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3375 vm_page_t m_start, vm_prot_t prot) 3376{ 3377 vm_offset_t va; 3378 vm_page_t m, mpte; 3379 vm_pindex_t diff, psize; 3380 3381 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 3382 psize = atop(end - start); 3383 mpte = NULL; 3384 m = m_start; 3385 PMAP_LOCK(pmap); 3386 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3387 va = start + ptoa(diff); 3388 if ((va & PDRMASK) == 0 && va + NBPDR <= end && 3389 (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && 3390 pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && 3391 pmap_enter_pde(pmap, va, m, prot)) 3392 m = &m[NBPDR / PAGE_SIZE - 1]; 3393 else 3394 mpte = pmap_enter_quick_locked(pmap, va, m, prot, 3395 mpte); 3396 m = TAILQ_NEXT(m, listq); 3397 } 3398 PMAP_UNLOCK(pmap); 3399} 3400 3401/* 3402 * this code makes some *MAJOR* assumptions: 3403 * 1. Current pmap & pmap exists. 3404 * 2. Not wired. 3405 * 3. Read access. 3406 * 4. No page table pages. 3407 * but is *MUCH* faster than pmap_enter... 3408 */ 3409 3410void 3411pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3412{ 3413 3414 PMAP_LOCK(pmap); 3415 (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); 3416 PMAP_UNLOCK(pmap); 3417} 3418 3419static vm_page_t 3420pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3421 vm_prot_t prot, vm_page_t mpte) 3422{ 3423 pt_entry_t *pte; 3424 vm_paddr_t pa; 3425 vm_page_t free; 3426 3427 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3428 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 3429 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3430 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3431 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3432 3433 /* 3434 * In the case that a page table page is not 3435 * resident, we are creating it here. 3436 */ 3437 if (va < VM_MAXUSER_ADDRESS) { 3438 unsigned ptepindex; 3439 pd_entry_t ptepa; 3440 3441 /* 3442 * Calculate pagetable page index 3443 */ 3444 ptepindex = va >> PDRSHIFT; 3445 if (mpte && (mpte->pindex == ptepindex)) { 3446 mpte->wire_count++; 3447 } else { 3448 /* 3449 * Get the page directory entry 3450 */ 3451 ptepa = pmap->pm_pdir[ptepindex]; 3452 3453 /* 3454 * If the page table page is mapped, we just increment 3455 * the hold count, and activate it. 3456 */ 3457 if (ptepa) { 3458 if (ptepa & PG_PS) 3459 return (NULL); 3460 mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 3461 mpte->wire_count++; 3462 } else { 3463 mpte = _pmap_allocpte(pmap, ptepindex, 3464 M_NOWAIT); 3465 if (mpte == NULL) 3466 return (mpte); 3467 } 3468 } 3469 } else { 3470 mpte = NULL; 3471 } 3472 3473 /* 3474 * This call to vtopte makes the assumption that we are 3475 * entering the page into the current pmap. In order to support 3476 * quick entry into any pmap, one would likely use pmap_pte_quick. 3477 * But that isn't as quick as vtopte. 3478 */ 3479 pte = vtopte(va); 3480 if (*pte) { 3481 if (mpte != NULL) { 3482 mpte->wire_count--; 3483 mpte = NULL; 3484 } 3485 return (mpte); 3486 } 3487 3488 /* 3489 * Enter on the PV list if part of our managed memory. 3490 */ 3491 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 3492 !pmap_try_insert_pv_entry(pmap, va, m)) { 3493 if (mpte != NULL) { 3494 free = NULL; 3495 if (pmap_unwire_pte_hold(pmap, mpte, &free)) { 3496 pmap_invalidate_page(pmap, va); 3497 pmap_free_zero_pages(free); 3498 } 3499 3500 mpte = NULL; 3501 } 3502 return (mpte); 3503 } 3504 3505 /* 3506 * Increment counters 3507 */ 3508 pmap->pm_stats.resident_count++; 3509 3510 pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); 3511#ifdef PAE 3512 if ((prot & VM_PROT_EXECUTE) == 0) 3513 pa |= pg_nx; 3514#endif 3515 3516 /* 3517 * Now validate mapping with RO protection 3518 */ 3519 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3520 pte_store(pte, pa | PG_V | PG_U); 3521 else 3522 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3523 return mpte; 3524} 3525 3526/* 3527 * Make a temporary mapping for a physical address. This is only intended 3528 * to be used for panic dumps. 3529 */ 3530void * 3531pmap_kenter_temporary(vm_paddr_t pa, int i) 3532{ 3533 vm_offset_t va; 3534 3535 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3536 pmap_kenter(va, pa); 3537 invlpg(va); 3538 return ((void *)crashdumpmap); 3539} 3540 3541/* 3542 * This code maps large physical mmap regions into the 3543 * processor address space. Note that some shortcuts 3544 * are taken, but the code works. 3545 */ 3546void 3547pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3548 vm_pindex_t pindex, vm_size_t size) 3549{ 3550 pd_entry_t *pde; 3551 vm_paddr_t pa, ptepa; 3552 vm_page_t p; 3553 int pat_mode; 3554 3555 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3556 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3557 ("pmap_object_init_pt: non-device object")); 3558 if (pseflag && 3559 (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3560 if (!vm_object_populate(object, pindex, pindex + atop(size))) 3561 return; 3562 p = vm_page_lookup(object, pindex); 3563 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3564 ("pmap_object_init_pt: invalid page %p", p)); 3565 pat_mode = p->md.pat_mode; 3566 3567 /* 3568 * Abort the mapping if the first page is not physically 3569 * aligned to a 2/4MB page boundary. 3570 */ 3571 ptepa = VM_PAGE_TO_PHYS(p); 3572 if (ptepa & (NBPDR - 1)) 3573 return; 3574 3575 /* 3576 * Skip the first page. Abort the mapping if the rest of 3577 * the pages are not physically contiguous or have differing 3578 * memory attributes. 3579 */ 3580 p = TAILQ_NEXT(p, listq); 3581 for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3582 pa += PAGE_SIZE) { 3583 KASSERT(p->valid == VM_PAGE_BITS_ALL, 3584 ("pmap_object_init_pt: invalid page %p", p)); 3585 if (pa != VM_PAGE_TO_PHYS(p) || 3586 pat_mode != p->md.pat_mode) 3587 return; 3588 p = TAILQ_NEXT(p, listq); 3589 } 3590 3591 /* 3592 * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and 3593 * "size" is a multiple of 2/4M, adding the PAT setting to 3594 * "pa" will not affect the termination of this loop. 3595 */ 3596 PMAP_LOCK(pmap); 3597 for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3598 size; pa += NBPDR) { 3599 pde = pmap_pde(pmap, addr); 3600 if (*pde == 0) { 3601 pde_store(pde, pa | PG_PS | PG_M | PG_A | 3602 PG_U | PG_RW | PG_V); 3603 pmap->pm_stats.resident_count += NBPDR / 3604 PAGE_SIZE; 3605 pmap_pde_mappings++; 3606 } 3607 /* Else continue on if the PDE is already valid. */ 3608 addr += NBPDR; 3609 } 3610 PMAP_UNLOCK(pmap); 3611 } 3612} 3613 3614/* 3615 * Routine: pmap_change_wiring 3616 * Function: Change the wiring attribute for a map/virtual-address 3617 * pair. 3618 * In/out conditions: 3619 * The mapping must already exist in the pmap. 3620 */ 3621void 3622pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3623{ 3624 pd_entry_t *pde; 3625 pt_entry_t *pte; 3626 boolean_t are_queues_locked; 3627 3628 are_queues_locked = FALSE; 3629retry: 3630 PMAP_LOCK(pmap); 3631 pde = pmap_pde(pmap, va); 3632 if ((*pde & PG_PS) != 0) { 3633 if (!wired != ((*pde & PG_W) == 0)) { 3634 if (!are_queues_locked) { 3635 are_queues_locked = TRUE; 3636 if (!mtx_trylock(&vm_page_queue_mtx)) { 3637 PMAP_UNLOCK(pmap); 3638 vm_page_lock_queues(); 3639 goto retry; 3640 } 3641 } 3642 if (!pmap_demote_pde(pmap, pde, va)) 3643 panic("pmap_change_wiring: demotion failed"); 3644 } else 3645 goto out; 3646 } 3647 pte = pmap_pte(pmap, va); 3648 3649 if (wired && !pmap_pte_w(pte)) 3650 pmap->pm_stats.wired_count++; 3651 else if (!wired && pmap_pte_w(pte)) 3652 pmap->pm_stats.wired_count--; 3653 3654 /* 3655 * Wiring is not a hardware characteristic so there is no need to 3656 * invalidate TLB. 3657 */ 3658 pmap_pte_set_w(pte, wired); 3659 pmap_pte_release(pte); 3660out: 3661 if (are_queues_locked) 3662 vm_page_unlock_queues(); 3663 PMAP_UNLOCK(pmap); 3664} 3665 3666 3667 3668/* 3669 * Copy the range specified by src_addr/len 3670 * from the source map to the range dst_addr/len 3671 * in the destination map. 3672 * 3673 * This routine is only advisory and need not do anything. 3674 */ 3675 3676void 3677pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3678 vm_offset_t src_addr) 3679{ 3680 vm_page_t free; 3681 vm_offset_t addr; 3682 vm_offset_t end_addr = src_addr + len; 3683 vm_offset_t pdnxt; 3684 3685 if (dst_addr != src_addr) 3686 return; 3687 3688 if (!pmap_is_current(src_pmap)) 3689 return; 3690 3691 vm_page_lock_queues(); 3692 if (dst_pmap < src_pmap) { 3693 PMAP_LOCK(dst_pmap); 3694 PMAP_LOCK(src_pmap); 3695 } else { 3696 PMAP_LOCK(src_pmap); 3697 PMAP_LOCK(dst_pmap); 3698 } 3699 sched_pin(); 3700 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3701 pt_entry_t *src_pte, *dst_pte; 3702 vm_page_t dstmpte, srcmpte; 3703 pd_entry_t srcptepaddr; 3704 unsigned ptepindex; 3705 3706 KASSERT(addr < UPT_MIN_ADDRESS, 3707 ("pmap_copy: invalid to pmap_copy page tables")); 3708 3709 pdnxt = (addr + NBPDR) & ~PDRMASK; 3710 if (pdnxt < addr) 3711 pdnxt = end_addr; 3712 ptepindex = addr >> PDRSHIFT; 3713 3714 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 3715 if (srcptepaddr == 0) 3716 continue; 3717 3718 if (srcptepaddr & PG_PS) { 3719 if (dst_pmap->pm_pdir[ptepindex] == 0 && 3720 ((srcptepaddr & PG_MANAGED) == 0 || 3721 pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & 3722 PG_PS_FRAME))) { 3723 dst_pmap->pm_pdir[ptepindex] = srcptepaddr & 3724 ~PG_W; 3725 dst_pmap->pm_stats.resident_count += 3726 NBPDR / PAGE_SIZE; 3727 } 3728 continue; 3729 } 3730 3731 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3732 KASSERT(srcmpte->wire_count > 0, 3733 ("pmap_copy: source page table page is unused")); 3734 3735 if (pdnxt > end_addr) 3736 pdnxt = end_addr; 3737 3738 src_pte = vtopte(addr); 3739 while (addr < pdnxt) { 3740 pt_entry_t ptetemp; 3741 ptetemp = *src_pte; 3742 /* 3743 * we only virtual copy managed pages 3744 */ 3745 if ((ptetemp & PG_MANAGED) != 0) { 3746 dstmpte = pmap_allocpte(dst_pmap, addr, 3747 M_NOWAIT); 3748 if (dstmpte == NULL) 3749 goto out; 3750 dst_pte = pmap_pte_quick(dst_pmap, addr); 3751 if (*dst_pte == 0 && 3752 pmap_try_insert_pv_entry(dst_pmap, addr, 3753 PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { 3754 /* 3755 * Clear the wired, modified, and 3756 * accessed (referenced) bits 3757 * during the copy. 3758 */ 3759 *dst_pte = ptetemp & ~(PG_W | PG_M | 3760 PG_A); 3761 dst_pmap->pm_stats.resident_count++; 3762 } else { 3763 free = NULL; 3764 if (pmap_unwire_pte_hold(dst_pmap, 3765 dstmpte, &free)) { 3766 pmap_invalidate_page(dst_pmap, 3767 addr); 3768 pmap_free_zero_pages(free); 3769 } 3770 goto out; 3771 } 3772 if (dstmpte->wire_count >= srcmpte->wire_count) 3773 break; 3774 } 3775 addr += PAGE_SIZE; 3776 src_pte++; 3777 } 3778 } 3779out: 3780 sched_unpin(); 3781 vm_page_unlock_queues(); 3782 PMAP_UNLOCK(src_pmap); 3783 PMAP_UNLOCK(dst_pmap); 3784} 3785 3786static __inline void 3787pagezero(void *page) 3788{ 3789#if defined(I686_CPU) 3790 if (cpu_class == CPUCLASS_686) { 3791#if defined(CPU_ENABLE_SSE) 3792 if (cpu_feature & CPUID_SSE2) 3793 sse2_pagezero(page); 3794 else 3795#endif 3796 i686_pagezero(page); 3797 } else 3798#endif 3799 bzero(page, PAGE_SIZE); 3800} 3801 3802/* 3803 * pmap_zero_page zeros the specified hardware page by mapping 3804 * the page into KVM and using bzero to clear its contents. 3805 */ 3806void 3807pmap_zero_page(vm_page_t m) 3808{ 3809 struct sysmaps *sysmaps; 3810 3811 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3812 mtx_lock(&sysmaps->lock); 3813 if (*sysmaps->CMAP2) 3814 panic("pmap_zero_page: CMAP2 busy"); 3815 sched_pin(); 3816 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3817 pmap_cache_bits(m->md.pat_mode, 0); 3818 invlcaddr(sysmaps->CADDR2); 3819 pagezero(sysmaps->CADDR2); 3820 *sysmaps->CMAP2 = 0; 3821 sched_unpin(); 3822 mtx_unlock(&sysmaps->lock); 3823} 3824 3825/* 3826 * pmap_zero_page_area zeros the specified hardware page by mapping 3827 * the page into KVM and using bzero to clear its contents. 3828 * 3829 * off and size may not cover an area beyond a single hardware page. 3830 */ 3831void 3832pmap_zero_page_area(vm_page_t m, int off, int size) 3833{ 3834 struct sysmaps *sysmaps; 3835 3836 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3837 mtx_lock(&sysmaps->lock); 3838 if (*sysmaps->CMAP2) 3839 panic("pmap_zero_page_area: CMAP2 busy"); 3840 sched_pin(); 3841 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3842 pmap_cache_bits(m->md.pat_mode, 0); 3843 invlcaddr(sysmaps->CADDR2); 3844 if (off == 0 && size == PAGE_SIZE) 3845 pagezero(sysmaps->CADDR2); 3846 else 3847 bzero((char *)sysmaps->CADDR2 + off, size); 3848 *sysmaps->CMAP2 = 0; 3849 sched_unpin(); 3850 mtx_unlock(&sysmaps->lock); 3851} 3852 3853/* 3854 * pmap_zero_page_idle zeros the specified hardware page by mapping 3855 * the page into KVM and using bzero to clear its contents. This 3856 * is intended to be called from the vm_pagezero process only and 3857 * outside of Giant. 3858 */ 3859void 3860pmap_zero_page_idle(vm_page_t m) 3861{ 3862 3863 if (*CMAP3) 3864 panic("pmap_zero_page_idle: CMAP3 busy"); 3865 sched_pin(); 3866 *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3867 pmap_cache_bits(m->md.pat_mode, 0); 3868 invlcaddr(CADDR3); 3869 pagezero(CADDR3); 3870 *CMAP3 = 0; 3871 sched_unpin(); 3872} 3873 3874/* 3875 * pmap_copy_page copies the specified (machine independent) 3876 * page by mapping the page into virtual memory and using 3877 * bcopy to copy the page, one machine dependent page at a 3878 * time. 3879 */ 3880void 3881pmap_copy_page(vm_page_t src, vm_page_t dst) 3882{ 3883 struct sysmaps *sysmaps; 3884 3885 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3886 mtx_lock(&sysmaps->lock); 3887 if (*sysmaps->CMAP1) 3888 panic("pmap_copy_page: CMAP1 busy"); 3889 if (*sysmaps->CMAP2) 3890 panic("pmap_copy_page: CMAP2 busy"); 3891 sched_pin(); 3892 invlpg((u_int)sysmaps->CADDR1); 3893 invlpg((u_int)sysmaps->CADDR2); 3894 *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | 3895 pmap_cache_bits(src->md.pat_mode, 0); 3896 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | 3897 pmap_cache_bits(dst->md.pat_mode, 0); 3898 bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3899 *sysmaps->CMAP1 = 0; 3900 *sysmaps->CMAP2 = 0; 3901 sched_unpin(); 3902 mtx_unlock(&sysmaps->lock); 3903} 3904 3905/* 3906 * Returns true if the pmap's pv is one of the first 3907 * 16 pvs linked to from this page. This count may 3908 * be changed upwards or downwards in the future; it 3909 * is only necessary that true be returned for a small 3910 * subset of pmaps for proper page aging. 3911 */ 3912boolean_t 3913pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3914{ 3915 struct md_page *pvh; 3916 pv_entry_t pv; 3917 int loops = 0; 3918 3919 if (m->flags & PG_FICTITIOUS) 3920 return FALSE; 3921 3922 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3923 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3924 if (PV_PMAP(pv) == pmap) { 3925 return TRUE; 3926 } 3927 loops++; 3928 if (loops >= 16) 3929 break; 3930 } 3931 if (loops < 16) { 3932 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3933 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 3934 if (PV_PMAP(pv) == pmap) 3935 return (TRUE); 3936 loops++; 3937 if (loops >= 16) 3938 break; 3939 } 3940 } 3941 return (FALSE); 3942} 3943 3944/* 3945 * pmap_page_wired_mappings: 3946 * 3947 * Return the number of managed mappings to the given physical page 3948 * that are wired. 3949 */ 3950int 3951pmap_page_wired_mappings(vm_page_t m) 3952{ 3953 int count; 3954 3955 count = 0; 3956 if ((m->flags & PG_FICTITIOUS) != 0) 3957 return (count); 3958 count = pmap_pvh_wired_mappings(&m->md, count); 3959 return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count)); 3960} 3961 3962/* 3963 * pmap_pvh_wired_mappings: 3964 * 3965 * Return the updated number "count" of managed mappings that are wired. 3966 */ 3967static int 3968pmap_pvh_wired_mappings(struct md_page *pvh, int count) 3969{ 3970 pmap_t pmap; 3971 pt_entry_t *pte; 3972 pv_entry_t pv; 3973 3974 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3975 sched_pin(); 3976 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 3977 pmap = PV_PMAP(pv); 3978 PMAP_LOCK(pmap); 3979 pte = pmap_pte_quick(pmap, pv->pv_va); 3980 if ((*pte & PG_W) != 0) 3981 count++; 3982 PMAP_UNLOCK(pmap); 3983 } 3984 sched_unpin(); 3985 return (count); 3986} 3987 3988/* 3989 * Returns TRUE if the given page is mapped individually or as part of 3990 * a 4mpage. Otherwise, returns FALSE. 3991 */ 3992boolean_t 3993pmap_page_is_mapped(vm_page_t m) 3994{ 3995 struct md_page *pvh; 3996 3997 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 3998 return (FALSE); 3999 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4000 if (TAILQ_EMPTY(&m->md.pv_list)) { 4001 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4002 return (!TAILQ_EMPTY(&pvh->pv_list)); 4003 } else 4004 return (TRUE); 4005} 4006 4007/* 4008 * Remove all pages from specified address space 4009 * this aids process exit speeds. Also, this code 4010 * is special cased for current process only, but 4011 * can have the more generic (and slightly slower) 4012 * mode enabled. This is much faster than pmap_remove 4013 * in the case of running down an entire address space. 4014 */ 4015void 4016pmap_remove_pages(pmap_t pmap) 4017{ 4018 pt_entry_t *pte, tpte; 4019 vm_page_t free = NULL; 4020 vm_page_t m, mpte, mt; 4021 pv_entry_t pv; 4022 struct md_page *pvh; 4023 struct pv_chunk *pc, *npc; 4024 int field, idx; 4025 int32_t bit; 4026 uint32_t inuse, bitmask; 4027 int allfree; 4028 4029 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 4030 printf("warning: pmap_remove_pages called with non-current pmap\n"); 4031 return; 4032 } 4033 vm_page_lock_queues(); 4034 PMAP_LOCK(pmap); 4035 sched_pin(); 4036 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 4037 allfree = 1; 4038 for (field = 0; field < _NPCM; field++) { 4039 inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 4040 while (inuse != 0) { 4041 bit = bsfl(inuse); 4042 bitmask = 1UL << bit; 4043 idx = field * 32 + bit; 4044 pv = &pc->pc_pventry[idx]; 4045 inuse &= ~bitmask; 4046 4047 pte = pmap_pde(pmap, pv->pv_va); 4048 tpte = *pte; 4049 if ((tpte & PG_PS) == 0) { 4050 pte = vtopte(pv->pv_va); 4051 tpte = *pte & ~PG_PTE_PAT; 4052 } 4053 4054 if (tpte == 0) { 4055 printf( 4056 "TPTE at %p IS ZERO @ VA %08x\n", 4057 pte, pv->pv_va); 4058 panic("bad pte"); 4059 } 4060 4061/* 4062 * We cannot remove wired pages from a process' mapping at this time 4063 */ 4064 if (tpte & PG_W) { 4065 allfree = 0; 4066 continue; 4067 } 4068 4069 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 4070 KASSERT(m->phys_addr == (tpte & PG_FRAME), 4071 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 4072 m, (uintmax_t)m->phys_addr, 4073 (uintmax_t)tpte)); 4074 4075 KASSERT(m < &vm_page_array[vm_page_array_size], 4076 ("pmap_remove_pages: bad tpte %#jx", 4077 (uintmax_t)tpte)); 4078 4079 pte_clear(pte); 4080 4081 /* 4082 * Update the vm_page_t clean/reference bits. 4083 */ 4084 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4085 if ((tpte & PG_PS) != 0) { 4086 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4087 vm_page_dirty(mt); 4088 } else 4089 vm_page_dirty(m); 4090 } 4091 4092 /* Mark free */ 4093 PV_STAT(pv_entry_frees++); 4094 PV_STAT(pv_entry_spare++); 4095 pv_entry_count--; 4096 pc->pc_map[field] |= bitmask; 4097 if ((tpte & PG_PS) != 0) { 4098 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 4099 pvh = pa_to_pvh(tpte & PG_PS_FRAME); 4100 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 4101 if (TAILQ_EMPTY(&pvh->pv_list)) { 4102 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4103 if (TAILQ_EMPTY(&mt->md.pv_list)) 4104 vm_page_flag_clear(mt, PG_WRITEABLE); 4105 } 4106 mpte = pmap_lookup_pt_page(pmap, pv->pv_va); 4107 if (mpte != NULL) { 4108 pmap_remove_pt_page(pmap, mpte); 4109 pmap->pm_stats.resident_count--; 4110 KASSERT(mpte->wire_count == NPTEPG, 4111 ("pmap_remove_pages: pte page wire count error")); 4112 mpte->wire_count = 0; 4113 pmap_add_delayed_free_list(mpte, &free, FALSE); 4114 atomic_subtract_int(&cnt.v_wire_count, 1); 4115 } 4116 } else { 4117 pmap->pm_stats.resident_count--; 4118 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4119 if (TAILQ_EMPTY(&m->md.pv_list)) { 4120 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4121 if (TAILQ_EMPTY(&pvh->pv_list)) 4122 vm_page_flag_clear(m, PG_WRITEABLE); 4123 } 4124 pmap_unuse_pt(pmap, pv->pv_va, &free); 4125 } 4126 } 4127 } 4128 if (allfree) { 4129 PV_STAT(pv_entry_spare -= _NPCPV); 4130 PV_STAT(pc_chunk_count--); 4131 PV_STAT(pc_chunk_frees++); 4132 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4133 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4134 pmap_qremove((vm_offset_t)pc, 1); 4135 vm_page_unwire(m, 0); 4136 vm_page_free(m); 4137 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4138 } 4139 } 4140 sched_unpin(); 4141 pmap_invalidate_all(pmap); 4142 vm_page_unlock_queues(); 4143 PMAP_UNLOCK(pmap); 4144 pmap_free_zero_pages(free); 4145} 4146 4147/* 4148 * pmap_is_modified: 4149 * 4150 * Return whether or not the specified physical page was modified 4151 * in any physical maps. 4152 */ 4153boolean_t 4154pmap_is_modified(vm_page_t m) 4155{ 4156 4157 if (m->flags & PG_FICTITIOUS) 4158 return (FALSE); 4159 if (pmap_is_modified_pvh(&m->md)) 4160 return (TRUE); 4161 return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4162} 4163 4164/* 4165 * Returns TRUE if any of the given mappings were used to modify 4166 * physical memory. Otherwise, returns FALSE. Both page and 2mpage 4167 * mappings are supported. 4168 */ 4169static boolean_t 4170pmap_is_modified_pvh(struct md_page *pvh) 4171{ 4172 pv_entry_t pv; 4173 pt_entry_t *pte; 4174 pmap_t pmap; 4175 boolean_t rv; 4176 4177 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4178 rv = FALSE; 4179 sched_pin(); 4180 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4181 pmap = PV_PMAP(pv); 4182 PMAP_LOCK(pmap); 4183 pte = pmap_pte_quick(pmap, pv->pv_va); 4184 rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); 4185 PMAP_UNLOCK(pmap); 4186 if (rv) 4187 break; 4188 } 4189 sched_unpin(); 4190 return (rv); 4191} 4192 4193/* 4194 * pmap_is_prefaultable: 4195 * 4196 * Return whether or not the specified virtual address is elgible 4197 * for prefault. 4198 */ 4199boolean_t 4200pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 4201{ 4202 pd_entry_t *pde; 4203 pt_entry_t *pte; 4204 boolean_t rv; 4205 4206 rv = FALSE; 4207 PMAP_LOCK(pmap); 4208 pde = pmap_pde(pmap, addr); 4209 if (*pde != 0 && (*pde & PG_PS) == 0) { 4210 pte = vtopte(addr); 4211 rv = *pte == 0; 4212 } 4213 PMAP_UNLOCK(pmap); 4214 return (rv); 4215} 4216 4217/* 4218 * Clear the write and modified bits in each of the given page's mappings. 4219 */ 4220void 4221pmap_remove_write(vm_page_t m) 4222{ 4223 struct md_page *pvh; 4224 pv_entry_t next_pv, pv; 4225 pmap_t pmap; 4226 pd_entry_t *pde; 4227 pt_entry_t oldpte, *pte; 4228 vm_offset_t va; 4229 4230 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4231 if ((m->flags & PG_FICTITIOUS) != 0 || 4232 (m->flags & PG_WRITEABLE) == 0) 4233 return; 4234 sched_pin(); 4235 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4236 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4237 va = pv->pv_va; 4238 pmap = PV_PMAP(pv); 4239 PMAP_LOCK(pmap); 4240 pde = pmap_pde(pmap, va); 4241 if ((*pde & PG_RW) != 0) 4242 (void)pmap_demote_pde(pmap, pde, va); 4243 PMAP_UNLOCK(pmap); 4244 } 4245 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4246 pmap = PV_PMAP(pv); 4247 PMAP_LOCK(pmap); 4248 pde = pmap_pde(pmap, pv->pv_va); 4249 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" 4250 " a 4mpage in page %p's pv list", m)); 4251 pte = pmap_pte_quick(pmap, pv->pv_va); 4252retry: 4253 oldpte = *pte; 4254 if ((oldpte & PG_RW) != 0) { 4255 /* 4256 * Regardless of whether a pte is 32 or 64 bits 4257 * in size, PG_RW and PG_M are among the least 4258 * significant 32 bits. 4259 */ 4260 if (!atomic_cmpset_int((u_int *)pte, oldpte, 4261 oldpte & ~(PG_RW | PG_M))) 4262 goto retry; 4263 if ((oldpte & PG_M) != 0) 4264 vm_page_dirty(m); 4265 pmap_invalidate_page(pmap, pv->pv_va); 4266 } 4267 PMAP_UNLOCK(pmap); 4268 } 4269 vm_page_flag_clear(m, PG_WRITEABLE); 4270 sched_unpin(); 4271} 4272 4273/* 4274 * pmap_ts_referenced: 4275 * 4276 * Return a count of reference bits for a page, clearing those bits. 4277 * It is not necessary for every reference bit to be cleared, but it 4278 * is necessary that 0 only be returned when there are truly no 4279 * reference bits set. 4280 * 4281 * XXX: The exact number of bits to check and clear is a matter that 4282 * should be tested and standardized at some point in the future for 4283 * optimal aging of shared pages. 4284 */ 4285int 4286pmap_ts_referenced(vm_page_t m) 4287{ 4288 struct md_page *pvh; 4289 pv_entry_t pv, pvf, pvn; 4290 pmap_t pmap; 4291 pd_entry_t oldpde, *pde; 4292 pt_entry_t *pte; 4293 vm_offset_t va; 4294 int rtval = 0; 4295 4296 if (m->flags & PG_FICTITIOUS) 4297 return (rtval); 4298 sched_pin(); 4299 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4300 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4301 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { 4302 va = pv->pv_va; 4303 pmap = PV_PMAP(pv); 4304 PMAP_LOCK(pmap); 4305 pde = pmap_pde(pmap, va); 4306 oldpde = *pde; 4307 if ((oldpde & PG_A) != 0) { 4308 if (pmap_demote_pde(pmap, pde, va)) { 4309 if ((oldpde & PG_W) == 0) { 4310 /* 4311 * Remove the mapping to a single page 4312 * so that a subsequent access may 4313 * repromote. Since the underlying 4314 * page table page is fully populated, 4315 * this removal never frees a page 4316 * table page. 4317 */ 4318 va += VM_PAGE_TO_PHYS(m) - (oldpde & 4319 PG_PS_FRAME); 4320 pmap_remove_page(pmap, va, NULL); 4321 rtval++; 4322 if (rtval > 4) { 4323 PMAP_UNLOCK(pmap); 4324 return (rtval); 4325 } 4326 } 4327 } 4328 } 4329 PMAP_UNLOCK(pmap); 4330 } 4331 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 4332 pvf = pv; 4333 do { 4334 pvn = TAILQ_NEXT(pv, pv_list); 4335 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4336 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 4337 pmap = PV_PMAP(pv); 4338 PMAP_LOCK(pmap); 4339 pde = pmap_pde(pmap, pv->pv_va); 4340 KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" 4341 " found a 4mpage in page %p's pv list", m)); 4342 pte = pmap_pte_quick(pmap, pv->pv_va); 4343 if ((*pte & PG_A) != 0) { 4344 atomic_clear_int((u_int *)pte, PG_A); 4345 pmap_invalidate_page(pmap, pv->pv_va); 4346 rtval++; 4347 if (rtval > 4) 4348 pvn = NULL; 4349 } 4350 PMAP_UNLOCK(pmap); 4351 } while ((pv = pvn) != NULL && pv != pvf); 4352 } 4353 sched_unpin(); 4354 return (rtval); 4355} 4356 4357/* 4358 * Clear the modify bits on the specified physical page. 4359 */ 4360void 4361pmap_clear_modify(vm_page_t m) 4362{ 4363 struct md_page *pvh; 4364 pv_entry_t next_pv, pv; 4365 pmap_t pmap; 4366 pd_entry_t oldpde, *pde; 4367 pt_entry_t oldpte, *pte; 4368 vm_offset_t va; 4369 4370 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4371 if ((m->flags & PG_FICTITIOUS) != 0) 4372 return; 4373 sched_pin(); 4374 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4375 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4376 va = pv->pv_va; 4377 pmap = PV_PMAP(pv); 4378 PMAP_LOCK(pmap); 4379 pde = pmap_pde(pmap, va); 4380 oldpde = *pde; 4381 if ((oldpde & PG_RW) != 0) { 4382 if (pmap_demote_pde(pmap, pde, va)) { 4383 if ((oldpde & PG_W) == 0) { 4384 /* 4385 * Write protect the mapping to a 4386 * single page so that a subsequent 4387 * write access may repromote. 4388 */ 4389 va += VM_PAGE_TO_PHYS(m) - (oldpde & 4390 PG_PS_FRAME); 4391 pte = pmap_pte_quick(pmap, va); 4392 oldpte = *pte; 4393 if ((oldpte & PG_V) != 0) { 4394 /* 4395 * Regardless of whether a pte is 32 or 64 bits 4396 * in size, PG_RW and PG_M are among the least 4397 * significant 32 bits. 4398 */ 4399 while (!atomic_cmpset_int((u_int *)pte, 4400 oldpte, 4401 oldpte & ~(PG_M | PG_RW))) 4402 oldpte = *pte; 4403 vm_page_dirty(m); 4404 pmap_invalidate_page(pmap, va); 4405 } 4406 } 4407 } 4408 } 4409 PMAP_UNLOCK(pmap); 4410 } 4411 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4412 pmap = PV_PMAP(pv); 4413 PMAP_LOCK(pmap); 4414 pde = pmap_pde(pmap, pv->pv_va); 4415 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" 4416 " a 4mpage in page %p's pv list", m)); 4417 pte = pmap_pte_quick(pmap, pv->pv_va); 4418 if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4419 /* 4420 * Regardless of whether a pte is 32 or 64 bits 4421 * in size, PG_M is among the least significant 4422 * 32 bits. 4423 */ 4424 atomic_clear_int((u_int *)pte, PG_M); 4425 pmap_invalidate_page(pmap, pv->pv_va); 4426 } 4427 PMAP_UNLOCK(pmap); 4428 } 4429 sched_unpin(); 4430} 4431 4432/* 4433 * pmap_clear_reference: 4434 * 4435 * Clear the reference bit on the specified physical page. 4436 */ 4437void 4438pmap_clear_reference(vm_page_t m) 4439{ 4440 struct md_page *pvh; 4441 pv_entry_t next_pv, pv; 4442 pmap_t pmap; 4443 pd_entry_t oldpde, *pde; 4444 pt_entry_t *pte; 4445 vm_offset_t va; 4446 4447 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4448 if ((m->flags & PG_FICTITIOUS) != 0) 4449 return; 4450 sched_pin(); 4451 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4452 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4453 va = pv->pv_va; 4454 pmap = PV_PMAP(pv); 4455 PMAP_LOCK(pmap); 4456 pde = pmap_pde(pmap, va); 4457 oldpde = *pde; 4458 if ((oldpde & PG_A) != 0) { 4459 if (pmap_demote_pde(pmap, pde, va)) { 4460 /* 4461 * Remove the mapping to a single page so 4462 * that a subsequent access may repromote. 4463 * Since the underlying page table page is 4464 * fully populated, this removal never frees 4465 * a page table page. 4466 */ 4467 va += VM_PAGE_TO_PHYS(m) - (oldpde & 4468 PG_PS_FRAME); 4469 pmap_remove_page(pmap, va, NULL); 4470 } 4471 } 4472 PMAP_UNLOCK(pmap); 4473 } 4474 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4475 pmap = PV_PMAP(pv); 4476 PMAP_LOCK(pmap); 4477 pde = pmap_pde(pmap, pv->pv_va); 4478 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" 4479 " a 4mpage in page %p's pv list", m)); 4480 pte = pmap_pte_quick(pmap, pv->pv_va); 4481 if ((*pte & PG_A) != 0) { 4482 /* 4483 * Regardless of whether a pte is 32 or 64 bits 4484 * in size, PG_A is among the least significant 4485 * 32 bits. 4486 */ 4487 atomic_clear_int((u_int *)pte, PG_A); 4488 pmap_invalidate_page(pmap, pv->pv_va); 4489 } 4490 PMAP_UNLOCK(pmap); 4491 } 4492 sched_unpin(); 4493} 4494 4495/* 4496 * Miscellaneous support routines follow 4497 */ 4498 4499/* Adjust the cache mode for a 4KB page mapped via a PTE. */ 4500static __inline void 4501pmap_pte_attr(pt_entry_t *pte, int cache_bits) 4502{ 4503 u_int opte, npte; 4504 4505 /* 4506 * The cache mode bits are all in the low 32-bits of the 4507 * PTE, so we can just spin on updating the low 32-bits. 4508 */ 4509 do { 4510 opte = *(u_int *)pte; 4511 npte = opte & ~PG_PTE_CACHE; 4512 npte |= cache_bits; 4513 } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); 4514} 4515 4516/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ 4517static __inline void 4518pmap_pde_attr(pd_entry_t *pde, int cache_bits) 4519{ 4520 u_int opde, npde; 4521 4522 /* 4523 * The cache mode bits are all in the low 32-bits of the 4524 * PDE, so we can just spin on updating the low 32-bits. 4525 */ 4526 do { 4527 opde = *(u_int *)pde; 4528 npde = opde & ~PG_PDE_CACHE; 4529 npde |= cache_bits; 4530 } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); 4531} 4532 4533/* 4534 * Map a set of physical memory pages into the kernel virtual 4535 * address space. Return a pointer to where it is mapped. This 4536 * routine is intended to be used for mapping device memory, 4537 * NOT real memory. 4538 */ 4539void * 4540pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 4541{ 4542 vm_offset_t va, offset; 4543 vm_size_t tmpsize; 4544 4545 offset = pa & PAGE_MASK; 4546 size = roundup(offset + size, PAGE_SIZE); 4547 pa = pa & PG_FRAME; 4548 4549 if (pa < KERNLOAD && pa + size <= KERNLOAD) 4550 va = KERNBASE + pa; 4551 else 4552 va = kmem_alloc_nofault(kernel_map, size); 4553 if (!va) 4554 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4555 4556 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4557 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 4558 pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 4559 pmap_invalidate_cache_range(va, va + size); 4560 return ((void *)(va + offset)); 4561} 4562 4563void * 4564pmap_mapdev(vm_paddr_t pa, vm_size_t size) 4565{ 4566 4567 return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 4568} 4569 4570void * 4571pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4572{ 4573 4574 return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4575} 4576 4577void 4578pmap_unmapdev(vm_offset_t va, vm_size_t size) 4579{ 4580 vm_offset_t base, offset, tmpva; 4581 4582 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4583 return; 4584 base = trunc_page(va); 4585 offset = va & PAGE_MASK; 4586 size = roundup(offset + size, PAGE_SIZE); 4587 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 4588 pmap_kremove(tmpva); 4589 pmap_invalidate_range(kernel_pmap, va, tmpva); 4590 kmem_free(kernel_map, base, size); 4591} 4592 4593/* 4594 * Sets the memory attribute for the specified page. 4595 */ 4596void 4597pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4598{ 4599 struct sysmaps *sysmaps; 4600 vm_offset_t sva, eva; 4601 4602 m->md.pat_mode = ma; 4603 if ((m->flags & PG_FICTITIOUS) != 0) 4604 return; 4605 4606 /* 4607 * If "m" is a normal page, flush it from the cache. 4608 * See pmap_invalidate_cache_range(). 4609 * 4610 * First, try to find an existing mapping of the page by sf 4611 * buffer. sf_buf_invalidate_cache() modifies mapping and 4612 * flushes the cache. 4613 */ 4614 if (sf_buf_invalidate_cache(m)) 4615 return; 4616 4617 /* 4618 * If page is not mapped by sf buffer, but CPU does not 4619 * support self snoop, map the page transient and do 4620 * invalidation. In the worst case, whole cache is flushed by 4621 * pmap_invalidate_cache_range(). 4622 */ 4623 if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { 4624 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4625 mtx_lock(&sysmaps->lock); 4626 if (*sysmaps->CMAP2) 4627 panic("pmap_page_set_memattr: CMAP2 busy"); 4628 sched_pin(); 4629 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | 4630 PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); 4631 invlcaddr(sysmaps->CADDR2); 4632 sva = (vm_offset_t)sysmaps->CADDR2; 4633 eva = sva + PAGE_SIZE; 4634 } else 4635 sva = eva = 0; /* gcc */ 4636 pmap_invalidate_cache_range(sva, eva); 4637 if (sva != 0) { 4638 *sysmaps->CMAP2 = 0; 4639 sched_unpin(); 4640 mtx_unlock(&sysmaps->lock); 4641 } 4642} 4643 4644/* 4645 * Changes the specified virtual address range's memory type to that given by 4646 * the parameter "mode". The specified virtual address range must be 4647 * completely contained within either the kernel map. 4648 * 4649 * Returns zero if the change completed successfully, and either EINVAL or 4650 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4651 * of the virtual address range was not mapped, and ENOMEM is returned if 4652 * there was insufficient memory available to complete the change. 4653 */ 4654int 4655pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4656{ 4657 vm_offset_t base, offset, tmpva; 4658 pd_entry_t *pde; 4659 pt_entry_t *pte; 4660 int cache_bits_pte, cache_bits_pde; 4661 boolean_t changed; 4662 4663 base = trunc_page(va); 4664 offset = va & PAGE_MASK; 4665 size = roundup(offset + size, PAGE_SIZE); 4666 4667 /* 4668 * Only supported on kernel virtual addresses above the recursive map. 4669 */ 4670 if (base < VM_MIN_KERNEL_ADDRESS) 4671 return (EINVAL); 4672 4673 cache_bits_pde = pmap_cache_bits(mode, 1); 4674 cache_bits_pte = pmap_cache_bits(mode, 0); 4675 changed = FALSE; 4676 4677 /* 4678 * Pages that aren't mapped aren't supported. Also break down 4679 * 2/4MB pages into 4KB pages if required. 4680 */ 4681 PMAP_LOCK(kernel_pmap); 4682 for (tmpva = base; tmpva < base + size; ) { 4683 pde = pmap_pde(kernel_pmap, tmpva); 4684 if (*pde == 0) { 4685 PMAP_UNLOCK(kernel_pmap); 4686 return (EINVAL); 4687 } 4688 if (*pde & PG_PS) { 4689 /* 4690 * If the current 2/4MB page already has 4691 * the required memory type, then we need not 4692 * demote this page. Just increment tmpva to 4693 * the next 2/4MB page frame. 4694 */ 4695 if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { 4696 tmpva = trunc_4mpage(tmpva) + NBPDR; 4697 continue; 4698 } 4699 4700 /* 4701 * If the current offset aligns with a 2/4MB 4702 * page frame and there is at least 2/4MB left 4703 * within the range, then we need not break 4704 * down this page into 4KB pages. 4705 */ 4706 if ((tmpva & PDRMASK) == 0 && 4707 tmpva + PDRMASK < base + size) { 4708 tmpva += NBPDR; 4709 continue; 4710 } 4711 if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { 4712 PMAP_UNLOCK(kernel_pmap); 4713 return (ENOMEM); 4714 } 4715 } 4716 pte = vtopte(tmpva); 4717 if (*pte == 0) { 4718 PMAP_UNLOCK(kernel_pmap); 4719 return (EINVAL); 4720 } 4721 tmpva += PAGE_SIZE; 4722 } 4723 PMAP_UNLOCK(kernel_pmap); 4724 4725 /* 4726 * Ok, all the pages exist, so run through them updating their 4727 * cache mode if required. 4728 */ 4729 for (tmpva = base; tmpva < base + size; ) { 4730 pde = pmap_pde(kernel_pmap, tmpva); 4731 if (*pde & PG_PS) { 4732 if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { 4733 pmap_pde_attr(pde, cache_bits_pde); 4734 changed = TRUE; 4735 } 4736 tmpva = trunc_4mpage(tmpva) + NBPDR; 4737 } else { 4738 pte = vtopte(tmpva); 4739 if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { 4740 pmap_pte_attr(pte, cache_bits_pte); 4741 changed = TRUE; 4742 } 4743 tmpva += PAGE_SIZE; 4744 } 4745 } 4746 4747 /* 4748 * Flush CPU caches to make sure any data isn't cached that 4749 * shouldn't be, etc. 4750 */ 4751 if (changed) { 4752 pmap_invalidate_range(kernel_pmap, base, tmpva); 4753 pmap_invalidate_cache_range(base, tmpva); 4754 } 4755 return (0); 4756} 4757 4758/* 4759 * perform the pmap work for mincore 4760 */ 4761int 4762pmap_mincore(pmap_t pmap, vm_offset_t addr) 4763{ 4764 pd_entry_t *pdep; 4765 pt_entry_t *ptep, pte; 4766 vm_paddr_t pa; 4767 vm_page_t m; 4768 int val = 0; 4769 4770 PMAP_LOCK(pmap); 4771 pdep = pmap_pde(pmap, addr); 4772 if (*pdep != 0) { 4773 if (*pdep & PG_PS) { 4774 pte = *pdep; 4775 val = MINCORE_SUPER; 4776 /* Compute the physical address of the 4KB page. */ 4777 pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & 4778 PG_FRAME; 4779 } else { 4780 ptep = pmap_pte(pmap, addr); 4781 pte = *ptep; 4782 pmap_pte_release(ptep); 4783 pa = pte & PG_FRAME; 4784 } 4785 } else { 4786 pte = 0; 4787 pa = 0; 4788 } 4789 PMAP_UNLOCK(pmap); 4790 4791 if (pte != 0) { 4792 val |= MINCORE_INCORE; 4793 if ((pte & PG_MANAGED) == 0) 4794 return val; 4795 4796 m = PHYS_TO_VM_PAGE(pa); 4797 4798 /* 4799 * Modified by us 4800 */ 4801 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4802 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 4803 else { 4804 /* 4805 * Modified by someone else 4806 */ 4807 vm_page_lock_queues(); 4808 if (m->dirty || pmap_is_modified(m)) 4809 val |= MINCORE_MODIFIED_OTHER; 4810 vm_page_unlock_queues(); 4811 } 4812 /* 4813 * Referenced by us 4814 */ 4815 if (pte & PG_A) 4816 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 4817 else { 4818 /* 4819 * Referenced by someone else 4820 */ 4821 vm_page_lock_queues(); 4822 if ((m->flags & PG_REFERENCED) || 4823 pmap_ts_referenced(m)) { 4824 val |= MINCORE_REFERENCED_OTHER; 4825 vm_page_flag_set(m, PG_REFERENCED); 4826 } 4827 vm_page_unlock_queues(); 4828 } 4829 } 4830 return val; 4831} 4832 4833void 4834pmap_activate(struct thread *td) 4835{ 4836 pmap_t pmap, oldpmap; 4837 u_int32_t cr3; 4838 4839 critical_enter(); 4840 pmap = vmspace_pmap(td->td_proc->p_vmspace); 4841 oldpmap = PCPU_GET(curpmap); 4842#if defined(SMP) 4843 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 4844 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 4845#else 4846 oldpmap->pm_active &= ~1; 4847 pmap->pm_active |= 1; 4848#endif 4849#ifdef PAE 4850 cr3 = vtophys(pmap->pm_pdpt); 4851#else 4852 cr3 = vtophys(pmap->pm_pdir); 4853#endif 4854 /* 4855 * pmap_activate is for the current thread on the current cpu 4856 */ 4857 td->td_pcb->pcb_cr3 = cr3; 4858 load_cr3(cr3); 4859 PCPU_SET(curpmap, pmap); 4860 critical_exit(); 4861} 4862 4863void 4864pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4865{ 4866} 4867 4868/* 4869 * Increase the starting virtual address of the given mapping if a 4870 * different alignment might result in more superpage mappings. 4871 */ 4872void 4873pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4874 vm_offset_t *addr, vm_size_t size) 4875{ 4876 vm_offset_t superpage_offset; 4877 4878 if (size < NBPDR) 4879 return; 4880 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4881 offset += ptoa(object->pg_color); 4882 superpage_offset = offset & PDRMASK; 4883 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4884 (*addr & PDRMASK) == superpage_offset) 4885 return; 4886 if ((*addr & PDRMASK) < superpage_offset) 4887 *addr = (*addr & ~PDRMASK) + superpage_offset; 4888 else 4889 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4890} 4891 4892 4893#if defined(PMAP_DEBUG) 4894pmap_pid_dump(int pid) 4895{ 4896 pmap_t pmap; 4897 struct proc *p; 4898 int npte = 0; 4899 int index; 4900 4901 sx_slock(&allproc_lock); 4902 FOREACH_PROC_IN_SYSTEM(p) { 4903 if (p->p_pid != pid) 4904 continue; 4905 4906 if (p->p_vmspace) { 4907 int i,j; 4908 index = 0; 4909 pmap = vmspace_pmap(p->p_vmspace); 4910 for (i = 0; i < NPDEPTD; i++) { 4911 pd_entry_t *pde; 4912 pt_entry_t *pte; 4913 vm_offset_t base = i << PDRSHIFT; 4914 4915 pde = &pmap->pm_pdir[i]; 4916 if (pde && pmap_pde_v(pde)) { 4917 for (j = 0; j < NPTEPG; j++) { 4918 vm_offset_t va = base + (j << PAGE_SHIFT); 4919 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4920 if (index) { 4921 index = 0; 4922 printf("\n"); 4923 } 4924 sx_sunlock(&allproc_lock); 4925 return npte; 4926 } 4927 pte = pmap_pte(pmap, va); 4928 if (pte && pmap_pte_v(pte)) { 4929 pt_entry_t pa; 4930 vm_page_t m; 4931 pa = *pte; 4932 m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4933 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4934 va, pa, m->hold_count, m->wire_count, m->flags); 4935 npte++; 4936 index++; 4937 if (index >= 2) { 4938 index = 0; 4939 printf("\n"); 4940 } else { 4941 printf(" "); 4942 } 4943 } 4944 } 4945 } 4946 } 4947 } 4948 } 4949 sx_sunlock(&allproc_lock); 4950 return npte; 4951} 4952#endif 4953 4954#if defined(DEBUG) 4955 4956static void pads(pmap_t pm); 4957void pmap_pvdump(vm_offset_t pa); 4958 4959/* print address space of pmap*/ 4960static void 4961pads(pmap_t pm) 4962{ 4963 int i, j; 4964 vm_paddr_t va; 4965 pt_entry_t *ptep; 4966 4967 if (pm == kernel_pmap) 4968 return; 4969 for (i = 0; i < NPDEPTD; i++) 4970 if (pm->pm_pdir[i]) 4971 for (j = 0; j < NPTEPG; j++) { 4972 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4973 if (pm == kernel_pmap && va < KERNBASE) 4974 continue; 4975 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4976 continue; 4977 ptep = pmap_pte(pm, va); 4978 if (pmap_pte_v(ptep)) 4979 printf("%x:%x ", va, *ptep); 4980 }; 4981 4982} 4983 4984void 4985pmap_pvdump(vm_paddr_t pa) 4986{ 4987 pv_entry_t pv; 4988 pmap_t pmap; 4989 vm_page_t m; 4990 4991 printf("pa %x", pa); 4992 m = PHYS_TO_VM_PAGE(pa); 4993 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4994 pmap = PV_PMAP(pv); 4995 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 4996 pads(pmap); 4997 } 4998 printf(" "); 4999} 5000#endif 5001