pmap.c revision 160419
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu> 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 */ 45/*- 46 * Copyright (c) 2003 Networks Associates Technology, Inc. 47 * All rights reserved. 48 * 49 * This software was developed for the FreeBSD Project by Jake Burkholder, 50 * Safeport Network Services, and Network Associates Laboratories, the 51 * Security Research Division of Network Associates, Inc. under 52 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53 * CHATS research program. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 */ 76 77#include <sys/cdefs.h> 78__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 160419 2006-07-17 03:10:17Z alc $"); 79 80/* 81 * Manages physical address maps. 82 * 83 * In addition to hardware address maps, this 84 * module is called upon to provide software-use-only 85 * maps which may or may not be stored in the same 86 * form as hardware maps. These pseudo-maps are 87 * used to store intermediate results from copy 88 * operations to and from address spaces. 89 * 90 * Since the information managed by this module is 91 * also stored by the logical address mapping module, 92 * this module may throw away valid virtual-to-physical 93 * mappings at almost any time. However, invalidations 94 * of virtual-to-physical mappings must be done as 95 * requested. 96 * 97 * In order to cope with hardware architectures which 98 * make virtual-to-physical map invalidates expensive, 99 * this module may delay invalidate or reduced protection 100 * operations until such time as they are actually 101 * necessary. This module is given full information as 102 * to which processors are currently using which maps, 103 * and to when physical maps must be made correct. 104 */ 105 106#include "opt_cpu.h" 107#include "opt_pmap.h" 108#include "opt_msgbuf.h" 109#include "opt_smp.h" 110#include "opt_xbox.h" 111 112#include <sys/param.h> 113#include <sys/systm.h> 114#include <sys/kernel.h> 115#include <sys/lock.h> 116#include <sys/malloc.h> 117#include <sys/mman.h> 118#include <sys/msgbuf.h> 119#include <sys/mutex.h> 120#include <sys/proc.h> 121#include <sys/sx.h> 122#include <sys/vmmeter.h> 123#include <sys/sched.h> 124#include <sys/sysctl.h> 125#ifdef SMP 126#include <sys/smp.h> 127#endif 128 129#include <vm/vm.h> 130#include <vm/vm_param.h> 131#include <vm/vm_kern.h> 132#include <vm/vm_page.h> 133#include <vm/vm_map.h> 134#include <vm/vm_object.h> 135#include <vm/vm_extern.h> 136#include <vm/vm_pageout.h> 137#include <vm/vm_pager.h> 138#include <vm/uma.h> 139 140#include <machine/cpu.h> 141#include <machine/cputypes.h> 142#include <machine/md_var.h> 143#include <machine/pcb.h> 144#include <machine/specialreg.h> 145#ifdef SMP 146#include <machine/smp.h> 147#endif 148 149#ifdef XBOX 150#include <machine/xbox.h> 151#endif 152 153#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 154#define CPU_ENABLE_SSE 155#endif 156 157#ifndef PMAP_SHPGPERPROC 158#define PMAP_SHPGPERPROC 200 159#endif 160 161#if defined(DIAGNOSTIC) 162#define PMAP_DIAGNOSTIC 163#endif 164 165#if !defined(PMAP_DIAGNOSTIC) 166#define PMAP_INLINE __inline 167#else 168#define PMAP_INLINE 169#endif 170 171#define PV_STATS 172#ifdef PV_STATS 173#define PV_STAT(x) do { x ; } while (0) 174#else 175#define PV_STAT(x) do { } while (0) 176#endif 177 178/* 179 * Get PDEs and PTEs for user/kernel address space 180 */ 181#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 182#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 183 184#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 185#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 186#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 187#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 188#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 189 190#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ 191 atomic_clear_int((u_int *)(pte), PG_W)) 192#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 193 194struct pmap kernel_pmap_store; 195LIST_HEAD(pmaplist, pmap); 196static struct pmaplist allpmaps; 197static struct mtx allpmaps_lock; 198 199vm_paddr_t avail_end; /* PA of last available physical page */ 200vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 201vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 202int pgeflag = 0; /* PG_G or-in */ 203int pseflag = 0; /* PG_PS or-in */ 204 205static int nkpt; 206vm_offset_t kernel_vm_end; 207extern u_int32_t KERNend; 208 209#ifdef PAE 210static uma_zone_t pdptzone; 211#endif 212 213/* 214 * Data for the pv entry allocation mechanism 215 */ 216static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 217static int shpgperproc = PMAP_SHPGPERPROC; 218 219struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 220int pv_maxchunks; /* How many chunks we have KVA for */ 221vm_offset_t pv_vafree; /* freelist stored in the PTE */ 222 223/* 224 * All those kernel PT submaps that BSD is so fond of 225 */ 226struct sysmaps { 227 struct mtx lock; 228 pt_entry_t *CMAP1; 229 pt_entry_t *CMAP2; 230 caddr_t CADDR1; 231 caddr_t CADDR2; 232}; 233static struct sysmaps sysmaps_pcpu[MAXCPU]; 234pt_entry_t *CMAP1 = 0; 235static pt_entry_t *CMAP3; 236caddr_t CADDR1 = 0, ptvmmap = 0; 237static caddr_t CADDR3; 238struct msgbuf *msgbufp = 0; 239 240/* 241 * Crashdump maps. 242 */ 243static caddr_t crashdumpmap; 244 245#ifdef SMP 246extern pt_entry_t *SMPpt; 247#endif 248static pt_entry_t *PMAP1 = 0, *PMAP2; 249static pt_entry_t *PADDR1 = 0, *PADDR2; 250#ifdef SMP 251static int PMAP1cpu; 252static int PMAP1changedcpu; 253SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 254 &PMAP1changedcpu, 0, 255 "Number of times pmap_pte_quick changed CPU with same PMAP1"); 256#endif 257static int PMAP1changed; 258SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 259 &PMAP1changed, 0, 260 "Number of times pmap_pte_quick changed PMAP1"); 261static int PMAP1unchanged; 262SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 263 &PMAP1unchanged, 0, 264 "Number of times pmap_pte_quick didn't change PMAP1"); 265static struct mtx PMAP2mutex; 266 267static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 268static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); 269static void pmap_clear_ptes(vm_page_t m, int bit); 270 271static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 272 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 273static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); 274static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 275static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 276 vm_offset_t va); 277static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 278static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 279 vm_page_t m); 280 281static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 282 283static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 284static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m); 285static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 286static void pmap_pte_release(pt_entry_t *pte); 287static int pmap_unuse_pt(pmap_t, vm_offset_t); 288static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 289#ifdef PAE 290static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 291#endif 292 293CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 294CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 295 296/* 297 * Move the kernel virtual free pointer to the next 298 * 4MB. This is used to help improve performance 299 * by using a large (4MB) page for much of the kernel 300 * (.text, .data, .bss) 301 */ 302static vm_offset_t 303pmap_kmem_choose(vm_offset_t addr) 304{ 305 vm_offset_t newaddr = addr; 306 307#ifndef DISABLE_PSE 308 if (cpu_feature & CPUID_PSE) 309 newaddr = (addr + PDRMASK) & ~PDRMASK; 310#endif 311 return newaddr; 312} 313 314/* 315 * Bootstrap the system enough to run with virtual memory. 316 * 317 * On the i386 this is called after mapping has already been enabled 318 * and just syncs the pmap module with what has already been done. 319 * [We can't call it easily with mapping off since the kernel is not 320 * mapped with PA == VA, hence we would have to relocate every address 321 * from the linked base (virtual) address "KERNBASE" to the actual 322 * (physical) address starting relative to 0] 323 */ 324void 325pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) 326{ 327 vm_offset_t va; 328 pt_entry_t *pte, *unused; 329 struct sysmaps *sysmaps; 330 int i; 331 332 /* 333 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 334 * large. It should instead be correctly calculated in locore.s and 335 * not based on 'first' (which is a physical address, not a virtual 336 * address, for the start of unused physical memory). The kernel 337 * page tables are NOT double mapped and thus should not be included 338 * in this calculation. 339 */ 340 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 341 virtual_avail = pmap_kmem_choose(virtual_avail); 342 343 virtual_end = VM_MAX_KERNEL_ADDRESS; 344 345 /* 346 * Initialize the kernel pmap (which is statically allocated). 347 */ 348 PMAP_LOCK_INIT(kernel_pmap); 349 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 350#ifdef PAE 351 kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 352#endif 353 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 354 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 355 LIST_INIT(&allpmaps); 356 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 357 mtx_lock_spin(&allpmaps_lock); 358 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 359 mtx_unlock_spin(&allpmaps_lock); 360 nkpt = NKPT; 361 362 /* 363 * Reserve some special page table entries/VA space for temporary 364 * mapping of pages. 365 */ 366#define SYSMAP(c, p, v, n) \ 367 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 368 369 va = virtual_avail; 370 pte = vtopte(va); 371 372 /* 373 * CMAP1/CMAP2 are used for zeroing and copying pages. 374 * CMAP3 is used for the idle process page zeroing. 375 */ 376 for (i = 0; i < MAXCPU; i++) { 377 sysmaps = &sysmaps_pcpu[i]; 378 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 379 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 380 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 381 } 382 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 383 SYSMAP(caddr_t, CMAP3, CADDR3, 1) 384 *CMAP3 = 0; 385 386 /* 387 * Crashdump maps. 388 */ 389 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 390 391 /* 392 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 393 */ 394 SYSMAP(caddr_t, unused, ptvmmap, 1) 395 396 /* 397 * msgbufp is used to map the system message buffer. 398 */ 399 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 400 401 /* 402 * ptemap is used for pmap_pte_quick 403 */ 404 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 405 SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); 406 407 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 408 409 virtual_avail = va; 410 411 *CMAP1 = 0; 412 413#ifdef XBOX 414 /* FIXME: This is gross, but needed for the XBOX. Since we are in such 415 * an early stadium, we cannot yet neatly map video memory ... :-( 416 * Better fixes are very welcome! */ 417 if (!arch_i386_is_xbox) 418#endif 419 for (i = 0; i < NKPT; i++) 420 PTD[i] = 0; 421 422 /* Initialize the PAT MSR if present. */ 423 pmap_init_pat(); 424 425 /* Turn on PG_G on kernel page(s) */ 426 pmap_set_pg(); 427} 428 429/* 430 * Setup the PAT MSR. 431 */ 432void 433pmap_init_pat(void) 434{ 435 uint64_t pat_msr; 436 437 /* Bail if this CPU doesn't implement PAT. */ 438 if (!(cpu_feature & CPUID_PAT)) 439 return; 440 441#ifdef PAT_WORKS 442 /* 443 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. 444 * Program 4 and 5 as WP and WC. 445 * Leave 6 and 7 as UC and UC-. 446 */ 447 pat_msr = rdmsr(MSR_PAT); 448 pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 449 pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 450 PAT_VALUE(5, PAT_WRITE_COMBINING); 451#else 452 /* 453 * Due to some Intel errata, we can only safely use the lower 4 454 * PAT entries. Thus, just replace PAT Index 2 with WC instead 455 * of UC-. 456 * 457 * Intel Pentium III Processor Specification Update 458 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 459 * or Mode C Paging) 460 * 461 * Intel Pentium IV Processor Specification Update 462 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 463 */ 464 pat_msr = rdmsr(MSR_PAT); 465 pat_msr &= ~PAT_MASK(2); 466 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 467#endif 468 wrmsr(MSR_PAT, pat_msr); 469} 470 471/* 472 * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 473 */ 474void 475pmap_set_pg(void) 476{ 477 pd_entry_t pdir; 478 pt_entry_t *pte; 479 vm_offset_t va, endva; 480 int i; 481 482 if (pgeflag == 0) 483 return; 484 485 i = KERNLOAD/NBPDR; 486 endva = KERNBASE + KERNend; 487 488 if (pseflag) { 489 va = KERNBASE + KERNLOAD; 490 while (va < endva) { 491 pdir = kernel_pmap->pm_pdir[KPTDI+i]; 492 pdir |= pgeflag; 493 kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; 494 invltlb(); /* Play it safe, invltlb() every time */ 495 i++; 496 va += NBPDR; 497 } 498 } else { 499 va = (vm_offset_t)btext; 500 while (va < endva) { 501 pte = vtopte(va); 502 if (*pte) 503 *pte |= pgeflag; 504 invltlb(); /* Play it safe, invltlb() every time */ 505 va += PAGE_SIZE; 506 } 507 } 508} 509 510/* 511 * Initialize a vm_page's machine-dependent fields. 512 */ 513void 514pmap_page_init(vm_page_t m) 515{ 516 517 TAILQ_INIT(&m->md.pv_list); 518 m->md.pv_list_count = 0; 519} 520 521#ifdef PAE 522 523static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt"); 524 525static void * 526pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 527{ 528 *flags = UMA_SLAB_PRIV; 529 return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL, 530 1, 0)); 531} 532#endif 533 534/* 535 * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 536 * Requirements: 537 * - Must deal with pages in order to ensure that none of the PG_* bits 538 * are ever set, PG_V in particular. 539 * - Assumes we can write to ptes without pte_store() atomic ops, even 540 * on PAE systems. This should be ok. 541 * - Assumes nothing will ever test these addresses for 0 to indicate 542 * no mapping instead of correctly checking PG_V. 543 * - Assumes a vm_offset_t will fit in a pte (true for i386). 544 * Because PG_V is never set, there can be no mappings to invalidate. 545 */ 546static vm_offset_t 547pmap_ptelist_alloc(vm_offset_t *head) 548{ 549 pt_entry_t *pte; 550 vm_offset_t va; 551 552 va = *head; 553 if (va == 0) 554 return (va); /* Out of memory */ 555 pte = vtopte(va); 556 *head = *pte; 557 if (*head & PG_V) 558 panic("pmap_ptelist_alloc: va with PG_V set!"); 559 *pte = 0; 560 return (va); 561} 562 563static void 564pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 565{ 566 pt_entry_t *pte; 567 568 if (va & PG_V) 569 panic("pmap_ptelist_free: freeing va with PG_V set!"); 570 pte = vtopte(va); 571 *pte = *head; /* virtual! PG_V is 0 though */ 572 *head = va; 573} 574 575static void 576pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 577{ 578 int i; 579 vm_offset_t va; 580 581 *head = 0; 582 for (i = npages - 1; i >= 0; i--) { 583 va = (vm_offset_t)base + i * PAGE_SIZE; 584 pmap_ptelist_free(head, va); 585 } 586} 587 588 589/* 590 * Initialize the pmap module. 591 * Called by vm_init, to initialize any structures that the pmap 592 * system needs to map virtual memory. 593 */ 594void 595pmap_init(void) 596{ 597 598 /* 599 * Initialize the address space (zone) for the pv entries. Set a 600 * high water mark so that the system can recover from excessive 601 * numbers of pv entries. 602 */ 603 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 604 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 605 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 606 pv_entry_max = roundup(pv_entry_max, _NPCPV); 607 pv_entry_high_water = 9 * (pv_entry_max / 10); 608 609 pv_maxchunks = pv_entry_max / _NPCPV; 610 pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 611 PAGE_SIZE * pv_maxchunks); 612 if (pv_chunkbase == NULL) 613 panic("pmap_init: not enough kvm for pv chunks"); 614 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 615#ifdef PAE 616 pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 617 NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 618 UMA_ZONE_VM | UMA_ZONE_NOFREE); 619 uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 620#endif 621} 622 623 624SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 625SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 626 "Max number of PV entries"); 627SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 628 "Page share factor per proc"); 629 630/*************************************************** 631 * Low level helper routines..... 632 ***************************************************/ 633 634#ifdef SMP 635/* 636 * For SMP, these functions have to use the IPI mechanism for coherence. 637 */ 638void 639pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 640{ 641 u_int cpumask; 642 u_int other_cpus; 643 644 if (smp_started) { 645 if (!(read_eflags() & PSL_I)) 646 panic("%s: interrupts disabled", __func__); 647 mtx_lock_spin(&smp_ipi_mtx); 648 } else 649 critical_enter(); 650 /* 651 * We need to disable interrupt preemption but MUST NOT have 652 * interrupts disabled here. 653 * XXX we may need to hold schedlock to get a coherent pm_active 654 * XXX critical sections disable interrupts again 655 */ 656 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 657 invlpg(va); 658 smp_invlpg(va); 659 } else { 660 cpumask = PCPU_GET(cpumask); 661 other_cpus = PCPU_GET(other_cpus); 662 if (pmap->pm_active & cpumask) 663 invlpg(va); 664 if (pmap->pm_active & other_cpus) 665 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 666 } 667 if (smp_started) 668 mtx_unlock_spin(&smp_ipi_mtx); 669 else 670 critical_exit(); 671} 672 673void 674pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 675{ 676 u_int cpumask; 677 u_int other_cpus; 678 vm_offset_t addr; 679 680 if (smp_started) { 681 if (!(read_eflags() & PSL_I)) 682 panic("%s: interrupts disabled", __func__); 683 mtx_lock_spin(&smp_ipi_mtx); 684 } else 685 critical_enter(); 686 /* 687 * We need to disable interrupt preemption but MUST NOT have 688 * interrupts disabled here. 689 * XXX we may need to hold schedlock to get a coherent pm_active 690 * XXX critical sections disable interrupts again 691 */ 692 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 693 for (addr = sva; addr < eva; addr += PAGE_SIZE) 694 invlpg(addr); 695 smp_invlpg_range(sva, eva); 696 } else { 697 cpumask = PCPU_GET(cpumask); 698 other_cpus = PCPU_GET(other_cpus); 699 if (pmap->pm_active & cpumask) 700 for (addr = sva; addr < eva; addr += PAGE_SIZE) 701 invlpg(addr); 702 if (pmap->pm_active & other_cpus) 703 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 704 sva, eva); 705 } 706 if (smp_started) 707 mtx_unlock_spin(&smp_ipi_mtx); 708 else 709 critical_exit(); 710} 711 712void 713pmap_invalidate_all(pmap_t pmap) 714{ 715 u_int cpumask; 716 u_int other_cpus; 717 718 if (smp_started) { 719 if (!(read_eflags() & PSL_I)) 720 panic("%s: interrupts disabled", __func__); 721 mtx_lock_spin(&smp_ipi_mtx); 722 } else 723 critical_enter(); 724 /* 725 * We need to disable interrupt preemption but MUST NOT have 726 * interrupts disabled here. 727 * XXX we may need to hold schedlock to get a coherent pm_active 728 * XXX critical sections disable interrupts again 729 */ 730 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 731 invltlb(); 732 smp_invltlb(); 733 } else { 734 cpumask = PCPU_GET(cpumask); 735 other_cpus = PCPU_GET(other_cpus); 736 if (pmap->pm_active & cpumask) 737 invltlb(); 738 if (pmap->pm_active & other_cpus) 739 smp_masked_invltlb(pmap->pm_active & other_cpus); 740 } 741 if (smp_started) 742 mtx_unlock_spin(&smp_ipi_mtx); 743 else 744 critical_exit(); 745} 746 747void 748pmap_invalidate_cache(void) 749{ 750 751 if (smp_started) { 752 if (!(read_eflags() & PSL_I)) 753 panic("%s: interrupts disabled", __func__); 754 mtx_lock_spin(&smp_ipi_mtx); 755 } else 756 critical_enter(); 757 /* 758 * We need to disable interrupt preemption but MUST NOT have 759 * interrupts disabled here. 760 * XXX we may need to hold schedlock to get a coherent pm_active 761 * XXX critical sections disable interrupts again 762 */ 763 wbinvd(); 764 smp_cache_flush(); 765 if (smp_started) 766 mtx_unlock_spin(&smp_ipi_mtx); 767 else 768 critical_exit(); 769} 770#else /* !SMP */ 771/* 772 * Normal, non-SMP, 486+ invalidation functions. 773 * We inline these within pmap.c for speed. 774 */ 775PMAP_INLINE void 776pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 777{ 778 779 if (pmap == kernel_pmap || pmap->pm_active) 780 invlpg(va); 781} 782 783PMAP_INLINE void 784pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 785{ 786 vm_offset_t addr; 787 788 if (pmap == kernel_pmap || pmap->pm_active) 789 for (addr = sva; addr < eva; addr += PAGE_SIZE) 790 invlpg(addr); 791} 792 793PMAP_INLINE void 794pmap_invalidate_all(pmap_t pmap) 795{ 796 797 if (pmap == kernel_pmap || pmap->pm_active) 798 invltlb(); 799} 800 801PMAP_INLINE void 802pmap_invalidate_cache(void) 803{ 804 805 wbinvd(); 806} 807#endif /* !SMP */ 808 809/* 810 * Are we current address space or kernel? N.B. We return FALSE when 811 * a pmap's page table is in use because a kernel thread is borrowing 812 * it. The borrowed page table can change spontaneously, making any 813 * dependence on its continued use subject to a race condition. 814 */ 815static __inline int 816pmap_is_current(pmap_t pmap) 817{ 818 819 return (pmap == kernel_pmap || 820 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 821 (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 822} 823 824/* 825 * If the given pmap is not the current or kernel pmap, the returned pte must 826 * be released by passing it to pmap_pte_release(). 827 */ 828pt_entry_t * 829pmap_pte(pmap_t pmap, vm_offset_t va) 830{ 831 pd_entry_t newpf; 832 pd_entry_t *pde; 833 834 pde = pmap_pde(pmap, va); 835 if (*pde & PG_PS) 836 return (pde); 837 if (*pde != 0) { 838 /* are we current address space or kernel? */ 839 if (pmap_is_current(pmap)) 840 return (vtopte(va)); 841 mtx_lock(&PMAP2mutex); 842 newpf = *pde & PG_FRAME; 843 if ((*PMAP2 & PG_FRAME) != newpf) { 844 *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; 845 pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 846 } 847 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 848 } 849 return (0); 850} 851 852/* 853 * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 854 * being NULL. 855 */ 856static __inline void 857pmap_pte_release(pt_entry_t *pte) 858{ 859 860 if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) 861 mtx_unlock(&PMAP2mutex); 862} 863 864static __inline void 865invlcaddr(void *caddr) 866{ 867 868 invlpg((u_int)caddr); 869} 870 871/* 872 * Super fast pmap_pte routine best used when scanning 873 * the pv lists. This eliminates many coarse-grained 874 * invltlb calls. Note that many of the pv list 875 * scans are across different pmaps. It is very wasteful 876 * to do an entire invltlb for checking a single mapping. 877 * 878 * If the given pmap is not the current pmap, vm_page_queue_mtx 879 * must be held and curthread pinned to a CPU. 880 */ 881static pt_entry_t * 882pmap_pte_quick(pmap_t pmap, vm_offset_t va) 883{ 884 pd_entry_t newpf; 885 pd_entry_t *pde; 886 887 pde = pmap_pde(pmap, va); 888 if (*pde & PG_PS) 889 return (pde); 890 if (*pde != 0) { 891 /* are we current address space or kernel? */ 892 if (pmap_is_current(pmap)) 893 return (vtopte(va)); 894 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 895 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 896 newpf = *pde & PG_FRAME; 897 if ((*PMAP1 & PG_FRAME) != newpf) { 898 *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; 899#ifdef SMP 900 PMAP1cpu = PCPU_GET(cpuid); 901#endif 902 invlcaddr(PADDR1); 903 PMAP1changed++; 904 } else 905#ifdef SMP 906 if (PMAP1cpu != PCPU_GET(cpuid)) { 907 PMAP1cpu = PCPU_GET(cpuid); 908 invlcaddr(PADDR1); 909 PMAP1changedcpu++; 910 } else 911#endif 912 PMAP1unchanged++; 913 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 914 } 915 return (0); 916} 917 918/* 919 * Routine: pmap_extract 920 * Function: 921 * Extract the physical page address associated 922 * with the given map/virtual_address pair. 923 */ 924vm_paddr_t 925pmap_extract(pmap_t pmap, vm_offset_t va) 926{ 927 vm_paddr_t rtval; 928 pt_entry_t *pte; 929 pd_entry_t pde; 930 931 rtval = 0; 932 PMAP_LOCK(pmap); 933 pde = pmap->pm_pdir[va >> PDRSHIFT]; 934 if (pde != 0) { 935 if ((pde & PG_PS) != 0) { 936 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 937 PMAP_UNLOCK(pmap); 938 return rtval; 939 } 940 pte = pmap_pte(pmap, va); 941 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 942 pmap_pte_release(pte); 943 } 944 PMAP_UNLOCK(pmap); 945 return (rtval); 946} 947 948/* 949 * Routine: pmap_extract_and_hold 950 * Function: 951 * Atomically extract and hold the physical page 952 * with the given pmap and virtual address pair 953 * if that mapping permits the given protection. 954 */ 955vm_page_t 956pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 957{ 958 pd_entry_t pde; 959 pt_entry_t pte; 960 vm_page_t m; 961 962 m = NULL; 963 vm_page_lock_queues(); 964 PMAP_LOCK(pmap); 965 pde = *pmap_pde(pmap, va); 966 if (pde != 0) { 967 if (pde & PG_PS) { 968 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 969 m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) | 970 (va & PDRMASK)); 971 vm_page_hold(m); 972 } 973 } else { 974 sched_pin(); 975 pte = *pmap_pte_quick(pmap, va); 976 if (pte != 0 && 977 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 978 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 979 vm_page_hold(m); 980 } 981 sched_unpin(); 982 } 983 } 984 vm_page_unlock_queues(); 985 PMAP_UNLOCK(pmap); 986 return (m); 987} 988 989/*************************************************** 990 * Low level mapping routines..... 991 ***************************************************/ 992 993/* 994 * Add a wired page to the kva. 995 * Note: not SMP coherent. 996 */ 997PMAP_INLINE void 998pmap_kenter(vm_offset_t va, vm_paddr_t pa) 999{ 1000 pt_entry_t *pte; 1001 1002 pte = vtopte(va); 1003 pte_store(pte, pa | PG_RW | PG_V | pgeflag); 1004} 1005 1006/* 1007 * Remove a page from the kernel pagetables. 1008 * Note: not SMP coherent. 1009 */ 1010PMAP_INLINE void 1011pmap_kremove(vm_offset_t va) 1012{ 1013 pt_entry_t *pte; 1014 1015 pte = vtopte(va); 1016 pte_clear(pte); 1017} 1018 1019/* 1020 * Used to map a range of physical addresses into kernel 1021 * virtual address space. 1022 * 1023 * The value passed in '*virt' is a suggested virtual address for 1024 * the mapping. Architectures which can support a direct-mapped 1025 * physical to virtual region can return the appropriate address 1026 * within that region, leaving '*virt' unchanged. Other 1027 * architectures should map the pages starting at '*virt' and 1028 * update '*virt' with the first usable address after the mapped 1029 * region. 1030 */ 1031vm_offset_t 1032pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1033{ 1034 vm_offset_t va, sva; 1035 1036 va = sva = *virt; 1037 while (start < end) { 1038 pmap_kenter(va, start); 1039 va += PAGE_SIZE; 1040 start += PAGE_SIZE; 1041 } 1042 pmap_invalidate_range(kernel_pmap, sva, va); 1043 *virt = va; 1044 return (sva); 1045} 1046 1047 1048/* 1049 * Add a list of wired pages to the kva 1050 * this routine is only used for temporary 1051 * kernel mappings that do not need to have 1052 * page modification or references recorded. 1053 * Note that old mappings are simply written 1054 * over. The page *must* be wired. 1055 * Note: SMP coherent. Uses a ranged shootdown IPI. 1056 */ 1057void 1058pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1059{ 1060 pt_entry_t *endpte, oldpte, *pte; 1061 1062 oldpte = 0; 1063 pte = vtopte(sva); 1064 endpte = pte + count; 1065 while (pte < endpte) { 1066 oldpte |= *pte; 1067 pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V); 1068 pte++; 1069 ma++; 1070 } 1071 if ((oldpte & PG_V) != 0) 1072 pmap_invalidate_range(kernel_pmap, sva, sva + count * 1073 PAGE_SIZE); 1074} 1075 1076/* 1077 * This routine tears out page mappings from the 1078 * kernel -- it is meant only for temporary mappings. 1079 * Note: SMP coherent. Uses a ranged shootdown IPI. 1080 */ 1081void 1082pmap_qremove(vm_offset_t sva, int count) 1083{ 1084 vm_offset_t va; 1085 1086 va = sva; 1087 while (count-- > 0) { 1088 pmap_kremove(va); 1089 va += PAGE_SIZE; 1090 } 1091 pmap_invalidate_range(kernel_pmap, sva, va); 1092} 1093 1094/*************************************************** 1095 * Page table page management routines..... 1096 ***************************************************/ 1097 1098/* 1099 * This routine unholds page table pages, and if the hold count 1100 * drops to zero, then it decrements the wire count. 1101 */ 1102static PMAP_INLINE int 1103pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1104{ 1105 1106 --m->wire_count; 1107 if (m->wire_count == 0) 1108 return _pmap_unwire_pte_hold(pmap, m); 1109 else 1110 return 0; 1111} 1112 1113static int 1114_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1115{ 1116 vm_offset_t pteva; 1117 1118 /* 1119 * unmap the page table page 1120 */ 1121 pmap->pm_pdir[m->pindex] = 0; 1122 --pmap->pm_stats.resident_count; 1123 1124 /* 1125 * Do an invltlb to make the invalidated mapping 1126 * take effect immediately. 1127 */ 1128 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1129 pmap_invalidate_page(pmap, pteva); 1130 1131 vm_page_free_zero(m); 1132 atomic_subtract_int(&cnt.v_wire_count, 1); 1133 return 1; 1134} 1135 1136/* 1137 * After removing a page table entry, this routine is used to 1138 * conditionally free the page, and manage the hold/wire counts. 1139 */ 1140static int 1141pmap_unuse_pt(pmap_t pmap, vm_offset_t va) 1142{ 1143 pd_entry_t ptepde; 1144 vm_page_t mpte; 1145 1146 if (va >= VM_MAXUSER_ADDRESS) 1147 return 0; 1148 ptepde = *pmap_pde(pmap, va); 1149 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1150 return pmap_unwire_pte_hold(pmap, mpte); 1151} 1152 1153void 1154pmap_pinit0(pmap_t pmap) 1155{ 1156 1157 PMAP_LOCK_INIT(pmap); 1158 pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1159#ifdef PAE 1160 pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1161#endif 1162 pmap->pm_active = 0; 1163 PCPU_SET(curpmap, pmap); 1164 TAILQ_INIT(&pmap->pm_pvchunk); 1165 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1166 mtx_lock_spin(&allpmaps_lock); 1167 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1168 mtx_unlock_spin(&allpmaps_lock); 1169} 1170 1171/* 1172 * Initialize a preallocated and zeroed pmap structure, 1173 * such as one in a vmspace structure. 1174 */ 1175void 1176pmap_pinit(pmap_t pmap) 1177{ 1178 vm_page_t m, ptdpg[NPGPTD]; 1179 vm_paddr_t pa; 1180 static int color; 1181 int i; 1182 1183 PMAP_LOCK_INIT(pmap); 1184 1185 /* 1186 * No need to allocate page table space yet but we do need a valid 1187 * page directory table. 1188 */ 1189 if (pmap->pm_pdir == NULL) { 1190 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1191 NBPTD); 1192#ifdef PAE 1193 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1194 KASSERT(((vm_offset_t)pmap->pm_pdpt & 1195 ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1196 ("pmap_pinit: pdpt misaligned")); 1197 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1198 ("pmap_pinit: pdpt above 4g")); 1199#endif 1200 } 1201 1202 /* 1203 * allocate the page directory page(s) 1204 */ 1205 for (i = 0; i < NPGPTD;) { 1206 m = vm_page_alloc(NULL, color++, 1207 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1208 VM_ALLOC_ZERO); 1209 if (m == NULL) 1210 VM_WAIT; 1211 else { 1212 ptdpg[i++] = m; 1213 } 1214 } 1215 1216 pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1217 1218 for (i = 0; i < NPGPTD; i++) { 1219 if ((ptdpg[i]->flags & PG_ZERO) == 0) 1220 bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); 1221 } 1222 1223 mtx_lock_spin(&allpmaps_lock); 1224 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1225 mtx_unlock_spin(&allpmaps_lock); 1226 /* Wire in kernel global address entries. */ 1227 /* XXX copies current process, does not fill in MPPTDI */ 1228 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1229#ifdef SMP 1230 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 1231#endif 1232 1233 /* install self-referential address mapping entry(s) */ 1234 for (i = 0; i < NPGPTD; i++) { 1235 pa = VM_PAGE_TO_PHYS(ptdpg[i]); 1236 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 1237#ifdef PAE 1238 pmap->pm_pdpt[i] = pa | PG_V; 1239#endif 1240 } 1241 1242 pmap->pm_active = 0; 1243 TAILQ_INIT(&pmap->pm_pvchunk); 1244 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1245} 1246 1247/* 1248 * this routine is called if the page table page is not 1249 * mapped correctly. 1250 */ 1251static vm_page_t 1252_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1253{ 1254 vm_paddr_t ptepa; 1255 vm_page_t m; 1256 1257 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1258 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1259 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1260 1261 /* 1262 * Allocate a page table page. 1263 */ 1264 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1265 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1266 if (flags & M_WAITOK) { 1267 PMAP_UNLOCK(pmap); 1268 vm_page_unlock_queues(); 1269 VM_WAIT; 1270 vm_page_lock_queues(); 1271 PMAP_LOCK(pmap); 1272 } 1273 1274 /* 1275 * Indicate the need to retry. While waiting, the page table 1276 * page may have been allocated. 1277 */ 1278 return (NULL); 1279 } 1280 if ((m->flags & PG_ZERO) == 0) 1281 pmap_zero_page(m); 1282 1283 /* 1284 * Map the pagetable page into the process address space, if 1285 * it isn't already there. 1286 */ 1287 1288 pmap->pm_stats.resident_count++; 1289 1290 ptepa = VM_PAGE_TO_PHYS(m); 1291 pmap->pm_pdir[ptepindex] = 1292 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1293 1294 return m; 1295} 1296 1297static vm_page_t 1298pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1299{ 1300 unsigned ptepindex; 1301 pd_entry_t ptepa; 1302 vm_page_t m; 1303 1304 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1305 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1306 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1307 1308 /* 1309 * Calculate pagetable page index 1310 */ 1311 ptepindex = va >> PDRSHIFT; 1312retry: 1313 /* 1314 * Get the page directory entry 1315 */ 1316 ptepa = pmap->pm_pdir[ptepindex]; 1317 1318 /* 1319 * This supports switching from a 4MB page to a 1320 * normal 4K page. 1321 */ 1322 if (ptepa & PG_PS) { 1323 pmap->pm_pdir[ptepindex] = 0; 1324 ptepa = 0; 1325 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1326 pmap_invalidate_all(kernel_pmap); 1327 } 1328 1329 /* 1330 * If the page table page is mapped, we just increment the 1331 * hold count, and activate it. 1332 */ 1333 if (ptepa) { 1334 m = PHYS_TO_VM_PAGE(ptepa); 1335 m->wire_count++; 1336 } else { 1337 /* 1338 * Here if the pte page isn't mapped, or if it has 1339 * been deallocated. 1340 */ 1341 m = _pmap_allocpte(pmap, ptepindex, flags); 1342 if (m == NULL && (flags & M_WAITOK)) 1343 goto retry; 1344 } 1345 return (m); 1346} 1347 1348 1349/*************************************************** 1350* Pmap allocation/deallocation routines. 1351 ***************************************************/ 1352 1353#ifdef SMP 1354/* 1355 * Deal with a SMP shootdown of other users of the pmap that we are 1356 * trying to dispose of. This can be a bit hairy. 1357 */ 1358static u_int *lazymask; 1359static u_int lazyptd; 1360static volatile u_int lazywait; 1361 1362void pmap_lazyfix_action(void); 1363 1364void 1365pmap_lazyfix_action(void) 1366{ 1367 u_int mymask = PCPU_GET(cpumask); 1368 1369#ifdef COUNT_IPIS 1370 *ipi_lazypmap_counts[PCPU_GET(cpuid)]++; 1371#endif 1372 if (rcr3() == lazyptd) 1373 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1374 atomic_clear_int(lazymask, mymask); 1375 atomic_store_rel_int(&lazywait, 1); 1376} 1377 1378static void 1379pmap_lazyfix_self(u_int mymask) 1380{ 1381 1382 if (rcr3() == lazyptd) 1383 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1384 atomic_clear_int(lazymask, mymask); 1385} 1386 1387 1388static void 1389pmap_lazyfix(pmap_t pmap) 1390{ 1391 u_int mymask; 1392 u_int mask; 1393 u_int spins; 1394 1395 while ((mask = pmap->pm_active) != 0) { 1396 spins = 50000000; 1397 mask = mask & -mask; /* Find least significant set bit */ 1398 mtx_lock_spin(&smp_ipi_mtx); 1399#ifdef PAE 1400 lazyptd = vtophys(pmap->pm_pdpt); 1401#else 1402 lazyptd = vtophys(pmap->pm_pdir); 1403#endif 1404 mymask = PCPU_GET(cpumask); 1405 if (mask == mymask) { 1406 lazymask = &pmap->pm_active; 1407 pmap_lazyfix_self(mymask); 1408 } else { 1409 atomic_store_rel_int((u_int *)&lazymask, 1410 (u_int)&pmap->pm_active); 1411 atomic_store_rel_int(&lazywait, 0); 1412 ipi_selected(mask, IPI_LAZYPMAP); 1413 while (lazywait == 0) { 1414 ia32_pause(); 1415 if (--spins == 0) 1416 break; 1417 } 1418 } 1419 mtx_unlock_spin(&smp_ipi_mtx); 1420 if (spins == 0) 1421 printf("pmap_lazyfix: spun for 50000000\n"); 1422 } 1423} 1424 1425#else /* SMP */ 1426 1427/* 1428 * Cleaning up on uniprocessor is easy. For various reasons, we're 1429 * unlikely to have to even execute this code, including the fact 1430 * that the cleanup is deferred until the parent does a wait(2), which 1431 * means that another userland process has run. 1432 */ 1433static void 1434pmap_lazyfix(pmap_t pmap) 1435{ 1436 u_int cr3; 1437 1438 cr3 = vtophys(pmap->pm_pdir); 1439 if (cr3 == rcr3()) { 1440 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1441 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1442 } 1443} 1444#endif /* SMP */ 1445 1446/* 1447 * Release any resources held by the given physical map. 1448 * Called when a pmap initialized by pmap_pinit is being released. 1449 * Should only be called if the map contains no valid mappings. 1450 */ 1451void 1452pmap_release(pmap_t pmap) 1453{ 1454 vm_page_t m, ptdpg[NPGPTD]; 1455 int i; 1456 1457 KASSERT(pmap->pm_stats.resident_count == 0, 1458 ("pmap_release: pmap resident count %ld != 0", 1459 pmap->pm_stats.resident_count)); 1460 1461 pmap_lazyfix(pmap); 1462 mtx_lock_spin(&allpmaps_lock); 1463 LIST_REMOVE(pmap, pm_list); 1464 mtx_unlock_spin(&allpmaps_lock); 1465 1466 for (i = 0; i < NPGPTD; i++) 1467 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]); 1468 1469 bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 1470 sizeof(*pmap->pm_pdir)); 1471#ifdef SMP 1472 pmap->pm_pdir[MPPTDI] = 0; 1473#endif 1474 1475 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1476 1477 vm_page_lock_queues(); 1478 for (i = 0; i < NPGPTD; i++) { 1479 m = ptdpg[i]; 1480#ifdef PAE 1481 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1482 ("pmap_release: got wrong ptd page")); 1483#endif 1484 m->wire_count--; 1485 atomic_subtract_int(&cnt.v_wire_count, 1); 1486 vm_page_free_zero(m); 1487 } 1488 vm_page_unlock_queues(); 1489 PMAP_LOCK_DESTROY(pmap); 1490} 1491 1492static int 1493kvm_size(SYSCTL_HANDLER_ARGS) 1494{ 1495 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1496 1497 return sysctl_handle_long(oidp, &ksize, 0, req); 1498} 1499SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1500 0, 0, kvm_size, "IU", "Size of KVM"); 1501 1502static int 1503kvm_free(SYSCTL_HANDLER_ARGS) 1504{ 1505 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1506 1507 return sysctl_handle_long(oidp, &kfree, 0, req); 1508} 1509SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1510 0, 0, kvm_free, "IU", "Amount of KVM free"); 1511 1512/* 1513 * grow the number of kernel page table entries, if needed 1514 */ 1515void 1516pmap_growkernel(vm_offset_t addr) 1517{ 1518 struct pmap *pmap; 1519 vm_paddr_t ptppaddr; 1520 vm_page_t nkpg; 1521 pd_entry_t newpdir; 1522 pt_entry_t *pde; 1523 1524 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1525 if (kernel_vm_end == 0) { 1526 kernel_vm_end = KERNBASE; 1527 nkpt = 0; 1528 while (pdir_pde(PTD, kernel_vm_end)) { 1529 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1530 nkpt++; 1531 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1532 kernel_vm_end = kernel_map->max_offset; 1533 break; 1534 } 1535 } 1536 } 1537 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1538 if (addr - 1 >= kernel_map->max_offset) 1539 addr = kernel_map->max_offset; 1540 while (kernel_vm_end < addr) { 1541 if (pdir_pde(PTD, kernel_vm_end)) { 1542 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1543 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1544 kernel_vm_end = kernel_map->max_offset; 1545 break; 1546 } 1547 continue; 1548 } 1549 1550 /* 1551 * This index is bogus, but out of the way 1552 */ 1553 nkpg = vm_page_alloc(NULL, nkpt, 1554 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1555 if (!nkpg) 1556 panic("pmap_growkernel: no memory to grow kernel"); 1557 1558 nkpt++; 1559 1560 pmap_zero_page(nkpg); 1561 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1562 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1563 pdir_pde(PTD, kernel_vm_end) = newpdir; 1564 1565 mtx_lock_spin(&allpmaps_lock); 1566 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1567 pde = pmap_pde(pmap, kernel_vm_end); 1568 pde_store(pde, newpdir); 1569 } 1570 mtx_unlock_spin(&allpmaps_lock); 1571 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1572 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1573 kernel_vm_end = kernel_map->max_offset; 1574 break; 1575 } 1576 } 1577} 1578 1579 1580/*************************************************** 1581 * page management routines. 1582 ***************************************************/ 1583 1584CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1585CTASSERT(_NPCM == 11); 1586 1587static __inline struct pv_chunk * 1588pv_to_chunk(pv_entry_t pv) 1589{ 1590 1591 return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); 1592} 1593 1594#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1595 1596#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1597#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1598 1599static uint32_t pc_freemask[11] = { 1600 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1601 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1602 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1603 PC_FREE0_9, PC_FREE10 1604}; 1605 1606SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1607 "Current number of pv entries"); 1608 1609#ifdef PV_STATS 1610static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1611 1612SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1613 "Current number of pv entry chunks"); 1614SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1615 "Current number of pv entry chunks allocated"); 1616SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1617 "Current number of pv entry chunks frees"); 1618SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1619 "Number of times tried to get a chunk page but failed."); 1620 1621static long pv_entry_frees, pv_entry_allocs; 1622static int pv_entry_spare; 1623 1624SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1625 "Current number of pv entry frees"); 1626SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1627 "Current number of pv entry allocs"); 1628SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1629 "Current number of spare pv entries"); 1630 1631static int pmap_collect_inactive, pmap_collect_active; 1632 1633SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, 1634 "Current number times pmap_collect called on inactive queue"); 1635SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, 1636 "Current number times pmap_collect called on active queue"); 1637#endif 1638 1639/* 1640 * We are in a serious low memory condition. Resort to 1641 * drastic measures to free some pages so we can allocate 1642 * another pv entry chunk. This is normally called to 1643 * unmap inactive pages, and if necessary, active pages. 1644 */ 1645static void 1646pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) 1647{ 1648 pmap_t pmap; 1649 pt_entry_t *pte, tpte; 1650 pv_entry_t next_pv, pv; 1651 vm_offset_t va; 1652 vm_page_t m; 1653 1654 sched_pin(); 1655 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1656 if (m->hold_count || m->busy || (m->flags & PG_BUSY)) 1657 continue; 1658 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1659 va = pv->pv_va; 1660 pmap = PV_PMAP(pv); 1661 /* Avoid deadlock and lock recursion. */ 1662 if (pmap > locked_pmap) 1663 PMAP_LOCK(pmap); 1664 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1665 continue; 1666 pmap->pm_stats.resident_count--; 1667 pte = pmap_pte_quick(pmap, va); 1668 tpte = pte_load_clear(pte); 1669 KASSERT((tpte & PG_W) == 0, 1670 ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); 1671 if (tpte & PG_A) 1672 vm_page_flag_set(m, PG_REFERENCED); 1673 if (tpte & PG_M) { 1674 KASSERT((tpte & PG_RW), 1675 ("pmap_collect: modified page not writable: va: %#x, pte: %#jx", 1676 va, (uintmax_t)tpte)); 1677 vm_page_dirty(m); 1678 } 1679 pmap_invalidate_page(pmap, va); 1680 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1681 if (TAILQ_EMPTY(&m->md.pv_list)) 1682 vm_page_flag_clear(m, PG_WRITEABLE); 1683 m->md.pv_list_count--; 1684 pmap_unuse_pt(pmap, va); 1685 free_pv_entry(pmap, pv); 1686 if (pmap != locked_pmap) 1687 PMAP_UNLOCK(pmap); 1688 } 1689 } 1690 sched_unpin(); 1691} 1692 1693 1694/* 1695 * free the pv_entry back to the free list 1696 */ 1697static void 1698free_pv_entry(pmap_t pmap, pv_entry_t pv) 1699{ 1700 vm_page_t m; 1701 struct pv_chunk *pc; 1702 int idx, field, bit; 1703 1704 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1705 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1706 PV_STAT(pv_entry_frees++); 1707 PV_STAT(pv_entry_spare++); 1708 pv_entry_count--; 1709 pc = pv_to_chunk(pv); 1710 idx = pv - &pc->pc_pventry[0]; 1711 field = idx / 32; 1712 bit = idx % 32; 1713 pc->pc_map[field] |= 1ul << bit; 1714 /* move to head of list */ 1715 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1716 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1717 for (idx = 0; idx < _NPCM; idx++) 1718 if (pc->pc_map[idx] != pc_freemask[idx]) 1719 return; 1720 PV_STAT(pv_entry_spare -= _NPCPV); 1721 PV_STAT(pc_chunk_count--); 1722 PV_STAT(pc_chunk_frees++); 1723 /* entire chunk is free, return it */ 1724 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1725 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 1726 pmap_qremove((vm_offset_t)pc, 1); 1727 vm_page_unwire(m, 0); 1728 vm_page_free(m); 1729 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 1730} 1731 1732/* 1733 * get a new pv_entry, allocating a block from the system 1734 * when needed. 1735 */ 1736static pv_entry_t 1737get_pv_entry(pmap_t pmap, int try) 1738{ 1739 static const struct timeval printinterval = { 60, 0 }; 1740 static struct timeval lastprint; 1741 static vm_pindex_t colour; 1742 int bit, field, page_req; 1743 pv_entry_t pv; 1744 struct pv_chunk *pc; 1745 vm_page_t m; 1746 1747 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1748 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1749 PV_STAT(pv_entry_allocs++); 1750 pv_entry_count++; 1751 if (pv_entry_count > pv_entry_high_water) 1752 pagedaemon_wakeup(); 1753 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1754 if (pc != NULL) { 1755 for (field = 0; field < _NPCM; field++) { 1756 if (pc->pc_map[field]) { 1757 bit = bsfl(pc->pc_map[field]); 1758 break; 1759 } 1760 } 1761 if (field < _NPCM) { 1762 pv = &pc->pc_pventry[field * 32 + bit]; 1763 pc->pc_map[field] &= ~(1ul << bit); 1764 /* If this was the last item, move it to tail */ 1765 for (field = 0; field < _NPCM; field++) 1766 if (pc->pc_map[field] != 0) { 1767 PV_STAT(pv_entry_spare--); 1768 return (pv); /* not full, return */ 1769 } 1770 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1771 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 1772 PV_STAT(pv_entry_spare--); 1773 return (pv); 1774 } 1775 } 1776 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 1777 page_req = try ? VM_ALLOC_NORMAL : VM_ALLOC_SYSTEM; 1778 m = vm_page_alloc(NULL, colour, page_req | 1779 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 1780 if (m == NULL || pc == NULL) { 1781 if (try) { 1782 pv_entry_count--; 1783 PV_STAT(pc_chunk_tryfail++); 1784 if (m) { 1785 vm_page_lock_queues(); 1786 vm_page_unwire(m, 0); 1787 vm_page_free(m); 1788 vm_page_unlock_queues(); 1789 } 1790 if (pc) 1791 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 1792 return (NULL); 1793 } 1794 /* 1795 * Reclaim pv entries: At first, destroy mappings to 1796 * inactive pages. After that, if a pv chunk entry 1797 * is still needed, destroy mappings to active pages. 1798 */ 1799 if (ratecheck(&lastprint, &printinterval)) 1800 printf("Approaching the limit on PV entries, " 1801 "consider increasing tunables " 1802 "vm.pmap.shpgperproc or " 1803 "vm.pmap.pv_entry_max\n"); 1804 PV_STAT(pmap_collect_inactive++); 1805 pmap_collect(pmap, &vm_page_queues[PQ_INACTIVE]); 1806 if (m == NULL) 1807 m = vm_page_alloc(NULL, colour, VM_ALLOC_SYSTEM | 1808 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 1809 if (pc == NULL) 1810 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 1811 if (m == NULL || pc == NULL) { 1812 PV_STAT(pmap_collect_active++); 1813 pmap_collect(pmap, &vm_page_queues[PQ_ACTIVE]); 1814 if (m == NULL) 1815 m = vm_page_alloc(NULL, colour, 1816 VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | 1817 VM_ALLOC_WIRED); 1818 if (pc == NULL) 1819 pc = (struct pv_chunk *) 1820 pmap_ptelist_alloc(&pv_vafree); 1821 if (m == NULL || pc == NULL) 1822 panic("get_pv_entry: increase vm.pmap.shpgperproc"); 1823 } 1824 } 1825 PV_STAT(pc_chunk_count++); 1826 PV_STAT(pc_chunk_allocs++); 1827 colour++; 1828 pmap_qenter((vm_offset_t)pc, &m, 1); 1829 pc->pc_pmap = pmap; 1830 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 1831 for (field = 1; field < _NPCM; field++) 1832 pc->pc_map[field] = pc_freemask[field]; 1833 pv = &pc->pc_pventry[0]; 1834 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1835 PV_STAT(pv_entry_spare += _NPCPV - 1); 1836 return (pv); 1837} 1838 1839static void 1840pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1841{ 1842 pv_entry_t pv; 1843 1844 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1845 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1846 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1847 if (pmap == PV_PMAP(pv) && va == pv->pv_va) 1848 break; 1849 } 1850 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 1851 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1852 m->md.pv_list_count--; 1853 if (TAILQ_EMPTY(&m->md.pv_list)) 1854 vm_page_flag_clear(m, PG_WRITEABLE); 1855 free_pv_entry(pmap, pv); 1856} 1857 1858/* 1859 * Create a pv entry for page at pa for 1860 * (pmap, va). 1861 */ 1862static void 1863pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1864{ 1865 pv_entry_t pv; 1866 1867 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1868 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1869 pv = get_pv_entry(pmap, FALSE); 1870 pv->pv_va = va; 1871 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1872 m->md.pv_list_count++; 1873} 1874 1875/* 1876 * Conditionally create a pv entry. 1877 */ 1878static boolean_t 1879pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1880{ 1881 pv_entry_t pv; 1882 1883 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1884 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1885 if (pv_entry_count < pv_entry_high_water && 1886 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 1887 pv->pv_va = va; 1888 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1889 m->md.pv_list_count++; 1890 return (TRUE); 1891 } else 1892 return (FALSE); 1893} 1894 1895/* 1896 * pmap_remove_pte: do the things to unmap a page in a process 1897 */ 1898static int 1899pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1900{ 1901 pt_entry_t oldpte; 1902 vm_page_t m; 1903 1904 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1905 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1906 oldpte = pte_load_clear(ptq); 1907 if (oldpte & PG_W) 1908 pmap->pm_stats.wired_count -= 1; 1909 /* 1910 * Machines that don't support invlpg, also don't support 1911 * PG_G. 1912 */ 1913 if (oldpte & PG_G) 1914 pmap_invalidate_page(kernel_pmap, va); 1915 pmap->pm_stats.resident_count -= 1; 1916 if (oldpte & PG_MANAGED) { 1917 m = PHYS_TO_VM_PAGE(oldpte); 1918 if (oldpte & PG_M) { 1919 KASSERT((oldpte & PG_RW), 1920 ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx", 1921 va, (uintmax_t)oldpte)); 1922 vm_page_dirty(m); 1923 } 1924 if (oldpte & PG_A) 1925 vm_page_flag_set(m, PG_REFERENCED); 1926 pmap_remove_entry(pmap, m, va); 1927 } 1928 return (pmap_unuse_pt(pmap, va)); 1929} 1930 1931/* 1932 * Remove a single page from a process address space 1933 */ 1934static void 1935pmap_remove_page(pmap_t pmap, vm_offset_t va) 1936{ 1937 pt_entry_t *pte; 1938 1939 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1940 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1941 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1942 if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) 1943 return; 1944 pmap_remove_pte(pmap, pte, va); 1945 pmap_invalidate_page(pmap, va); 1946} 1947 1948/* 1949 * Remove the given range of addresses from the specified map. 1950 * 1951 * It is assumed that the start and end are properly 1952 * rounded to the page size. 1953 */ 1954void 1955pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1956{ 1957 vm_offset_t pdnxt; 1958 pd_entry_t ptpaddr; 1959 pt_entry_t *pte; 1960 int anyvalid; 1961 1962 /* 1963 * Perform an unsynchronized read. This is, however, safe. 1964 */ 1965 if (pmap->pm_stats.resident_count == 0) 1966 return; 1967 1968 anyvalid = 0; 1969 1970 vm_page_lock_queues(); 1971 sched_pin(); 1972 PMAP_LOCK(pmap); 1973 1974 /* 1975 * special handling of removing one page. a very 1976 * common operation and easy to short circuit some 1977 * code. 1978 */ 1979 if ((sva + PAGE_SIZE == eva) && 1980 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1981 pmap_remove_page(pmap, sva); 1982 goto out; 1983 } 1984 1985 for (; sva < eva; sva = pdnxt) { 1986 unsigned pdirindex; 1987 1988 /* 1989 * Calculate index for next page table. 1990 */ 1991 pdnxt = (sva + NBPDR) & ~PDRMASK; 1992 if (pmap->pm_stats.resident_count == 0) 1993 break; 1994 1995 pdirindex = sva >> PDRSHIFT; 1996 ptpaddr = pmap->pm_pdir[pdirindex]; 1997 1998 /* 1999 * Weed out invalid mappings. Note: we assume that the page 2000 * directory table is always allocated, and in kernel virtual. 2001 */ 2002 if (ptpaddr == 0) 2003 continue; 2004 2005 /* 2006 * Check for large page. 2007 */ 2008 if ((ptpaddr & PG_PS) != 0) { 2009 pmap->pm_pdir[pdirindex] = 0; 2010 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2011 anyvalid = 1; 2012 continue; 2013 } 2014 2015 /* 2016 * Limit our scan to either the end of the va represented 2017 * by the current page table page, or to the end of the 2018 * range being removed. 2019 */ 2020 if (pdnxt > eva) 2021 pdnxt = eva; 2022 2023 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2024 sva += PAGE_SIZE) { 2025 if (*pte == 0) 2026 continue; 2027 2028 /* 2029 * The TLB entry for a PG_G mapping is invalidated 2030 * by pmap_remove_pte(). 2031 */ 2032 if ((*pte & PG_G) == 0) 2033 anyvalid = 1; 2034 if (pmap_remove_pte(pmap, pte, sva)) 2035 break; 2036 } 2037 } 2038out: 2039 sched_unpin(); 2040 vm_page_unlock_queues(); 2041 if (anyvalid) 2042 pmap_invalidate_all(pmap); 2043 PMAP_UNLOCK(pmap); 2044} 2045 2046/* 2047 * Routine: pmap_remove_all 2048 * Function: 2049 * Removes this physical page from 2050 * all physical maps in which it resides. 2051 * Reflects back modify bits to the pager. 2052 * 2053 * Notes: 2054 * Original versions of this routine were very 2055 * inefficient because they iteratively called 2056 * pmap_remove (slow...) 2057 */ 2058 2059void 2060pmap_remove_all(vm_page_t m) 2061{ 2062 pv_entry_t pv; 2063 pmap_t pmap; 2064 pt_entry_t *pte, tpte; 2065 2066#if defined(PMAP_DIAGNOSTIC) 2067 /* 2068 * XXX This makes pmap_remove_all() illegal for non-managed pages! 2069 */ 2070 if (m->flags & PG_FICTITIOUS) { 2071 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", 2072 VM_PAGE_TO_PHYS(m)); 2073 } 2074#endif 2075 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2076 sched_pin(); 2077 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2078 pmap = PV_PMAP(pv); 2079 PMAP_LOCK(pmap); 2080 pmap->pm_stats.resident_count--; 2081 pte = pmap_pte_quick(pmap, pv->pv_va); 2082 tpte = pte_load_clear(pte); 2083 if (tpte & PG_W) 2084 pmap->pm_stats.wired_count--; 2085 if (tpte & PG_A) 2086 vm_page_flag_set(m, PG_REFERENCED); 2087 2088 /* 2089 * Update the vm_page_t clean and reference bits. 2090 */ 2091 if (tpte & PG_M) { 2092 KASSERT((tpte & PG_RW), 2093 ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx", 2094 pv->pv_va, (uintmax_t)tpte)); 2095 vm_page_dirty(m); 2096 } 2097 pmap_invalidate_page(pmap, pv->pv_va); 2098 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2099 m->md.pv_list_count--; 2100 pmap_unuse_pt(pmap, pv->pv_va); 2101 free_pv_entry(pmap, pv); 2102 PMAP_UNLOCK(pmap); 2103 } 2104 vm_page_flag_clear(m, PG_WRITEABLE); 2105 sched_unpin(); 2106} 2107 2108/* 2109 * Set the physical protection on the 2110 * specified range of this map as requested. 2111 */ 2112void 2113pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2114{ 2115 vm_offset_t pdnxt; 2116 pd_entry_t ptpaddr; 2117 pt_entry_t *pte; 2118 int anychanged; 2119 2120 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2121 pmap_remove(pmap, sva, eva); 2122 return; 2123 } 2124 2125 if (prot & VM_PROT_WRITE) 2126 return; 2127 2128 anychanged = 0; 2129 2130 vm_page_lock_queues(); 2131 sched_pin(); 2132 PMAP_LOCK(pmap); 2133 for (; sva < eva; sva = pdnxt) { 2134 unsigned obits, pbits, pdirindex; 2135 2136 pdnxt = (sva + NBPDR) & ~PDRMASK; 2137 2138 pdirindex = sva >> PDRSHIFT; 2139 ptpaddr = pmap->pm_pdir[pdirindex]; 2140 2141 /* 2142 * Weed out invalid mappings. Note: we assume that the page 2143 * directory table is always allocated, and in kernel virtual. 2144 */ 2145 if (ptpaddr == 0) 2146 continue; 2147 2148 /* 2149 * Check for large page. 2150 */ 2151 if ((ptpaddr & PG_PS) != 0) { 2152 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2153 anychanged = 1; 2154 continue; 2155 } 2156 2157 if (pdnxt > eva) 2158 pdnxt = eva; 2159 2160 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2161 sva += PAGE_SIZE) { 2162 vm_page_t m; 2163 2164retry: 2165 /* 2166 * Regardless of whether a pte is 32 or 64 bits in 2167 * size, PG_RW, PG_A, and PG_M are among the least 2168 * significant 32 bits. 2169 */ 2170 obits = pbits = *(u_int *)pte; 2171 if (pbits & PG_MANAGED) { 2172 m = NULL; 2173 if (pbits & PG_A) { 2174 m = PHYS_TO_VM_PAGE(*pte); 2175 vm_page_flag_set(m, PG_REFERENCED); 2176 pbits &= ~PG_A; 2177 } 2178 if ((pbits & PG_M) != 0) { 2179 if (m == NULL) 2180 m = PHYS_TO_VM_PAGE(*pte); 2181 vm_page_dirty(m); 2182 } 2183 } 2184 2185 pbits &= ~(PG_RW | PG_M); 2186 2187 if (pbits != obits) { 2188 if (!atomic_cmpset_int((u_int *)pte, obits, 2189 pbits)) 2190 goto retry; 2191 if (obits & PG_G) 2192 pmap_invalidate_page(pmap, sva); 2193 else 2194 anychanged = 1; 2195 } 2196 } 2197 } 2198 sched_unpin(); 2199 vm_page_unlock_queues(); 2200 if (anychanged) 2201 pmap_invalidate_all(pmap); 2202 PMAP_UNLOCK(pmap); 2203} 2204 2205/* 2206 * Insert the given physical page (p) at 2207 * the specified virtual address (v) in the 2208 * target physical map with the protection requested. 2209 * 2210 * If specified, the page will be wired down, meaning 2211 * that the related pte can not be reclaimed. 2212 * 2213 * NB: This is the only routine which MAY NOT lazy-evaluate 2214 * or lose information. That is, this routine must actually 2215 * insert this page into the given map NOW. 2216 */ 2217void 2218pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2219 boolean_t wired) 2220{ 2221 vm_paddr_t pa; 2222 pd_entry_t *pde; 2223 pt_entry_t *pte; 2224 vm_paddr_t opa; 2225 pt_entry_t origpte, newpte; 2226 vm_page_t mpte, om; 2227 boolean_t invlva; 2228 2229 va &= PG_FRAME; 2230#ifdef PMAP_DIAGNOSTIC 2231 if (va > VM_MAX_KERNEL_ADDRESS) 2232 panic("pmap_enter: toobig"); 2233 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2234 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2235#endif 2236 2237 mpte = NULL; 2238 2239 vm_page_lock_queues(); 2240 PMAP_LOCK(pmap); 2241 sched_pin(); 2242 2243 /* 2244 * In the case that a page table page is not 2245 * resident, we are creating it here. 2246 */ 2247 if (va < VM_MAXUSER_ADDRESS) { 2248 mpte = pmap_allocpte(pmap, va, M_WAITOK); 2249 } 2250#if 0 && defined(PMAP_DIAGNOSTIC) 2251 else { 2252 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2253 origpte = *pdeaddr; 2254 if ((origpte & PG_V) == 0) { 2255 panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 2256 pmap->pm_pdir[PTDPTDI], origpte, va); 2257 } 2258 } 2259#endif 2260 2261 pde = pmap_pde(pmap, va); 2262 if ((*pde & PG_PS) != 0) 2263 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2264 pte = pmap_pte_quick(pmap, va); 2265 2266 /* 2267 * Page Directory table entry not valid, we need a new PT page 2268 */ 2269 if (pte == NULL) { 2270 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", 2271 (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 2272 } 2273 2274 pa = VM_PAGE_TO_PHYS(m); 2275 om = NULL; 2276 origpte = *pte; 2277 opa = origpte & PG_FRAME; 2278 2279 /* 2280 * Mapping has not changed, must be protection or wiring change. 2281 */ 2282 if (origpte && (opa == pa)) { 2283 /* 2284 * Wiring change, just update stats. We don't worry about 2285 * wiring PT pages as they remain resident as long as there 2286 * are valid mappings in them. Hence, if a user page is wired, 2287 * the PT page will be also. 2288 */ 2289 if (wired && ((origpte & PG_W) == 0)) 2290 pmap->pm_stats.wired_count++; 2291 else if (!wired && (origpte & PG_W)) 2292 pmap->pm_stats.wired_count--; 2293 2294 /* 2295 * Remove extra pte reference 2296 */ 2297 if (mpte) 2298 mpte->wire_count--; 2299 2300 /* 2301 * We might be turning off write access to the page, 2302 * so we go ahead and sense modify status. 2303 */ 2304 if (origpte & PG_MANAGED) { 2305 om = m; 2306 pa |= PG_MANAGED; 2307 } 2308 goto validate; 2309 } 2310 /* 2311 * Mapping has changed, invalidate old range and fall through to 2312 * handle validating new mapping. 2313 */ 2314 if (opa) { 2315 if (origpte & PG_W) 2316 pmap->pm_stats.wired_count--; 2317 if (origpte & PG_MANAGED) { 2318 om = PHYS_TO_VM_PAGE(opa); 2319 pmap_remove_entry(pmap, om, va); 2320 } 2321 if (mpte != NULL) { 2322 mpte->wire_count--; 2323 KASSERT(mpte->wire_count > 0, 2324 ("pmap_enter: missing reference to page table page," 2325 " va: 0x%x", va)); 2326 } 2327 } else 2328 pmap->pm_stats.resident_count++; 2329 2330 /* 2331 * Enter on the PV list if part of our managed memory. 2332 */ 2333 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 2334 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 2335 ("pmap_enter: managed mapping within the clean submap")); 2336 pmap_insert_entry(pmap, va, m); 2337 pa |= PG_MANAGED; 2338 } 2339 2340 /* 2341 * Increment counters 2342 */ 2343 if (wired) 2344 pmap->pm_stats.wired_count++; 2345 2346validate: 2347 /* 2348 * Now validate mapping with desired protection/wiring. 2349 */ 2350 newpte = (pt_entry_t)(pa | PG_V); 2351 if ((prot & VM_PROT_WRITE) != 0) 2352 newpte |= PG_RW; 2353 if (wired) 2354 newpte |= PG_W; 2355 if (va < VM_MAXUSER_ADDRESS) 2356 newpte |= PG_U; 2357 if (pmap == kernel_pmap) 2358 newpte |= pgeflag; 2359 2360 /* 2361 * if the mapping or permission bits are different, we need 2362 * to update the pte. 2363 */ 2364 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2365 if (origpte & PG_V) { 2366 invlva = FALSE; 2367 origpte = pte_load_store(pte, newpte | PG_A); 2368 if (origpte & PG_A) { 2369 if (origpte & PG_MANAGED) 2370 vm_page_flag_set(om, PG_REFERENCED); 2371 if (opa != VM_PAGE_TO_PHYS(m)) 2372 invlva = TRUE; 2373 } 2374 if (origpte & PG_M) { 2375 KASSERT((origpte & PG_RW), 2376 ("pmap_enter: modified page not writable: va: %#x, pte: %#jx", 2377 va, (uintmax_t)origpte)); 2378 if ((origpte & PG_MANAGED) != 0) 2379 vm_page_dirty(om); 2380 if ((prot & VM_PROT_WRITE) == 0) 2381 invlva = TRUE; 2382 } 2383 if (invlva) 2384 pmap_invalidate_page(pmap, va); 2385 } else 2386 pte_store(pte, newpte | PG_A); 2387 } 2388 sched_unpin(); 2389 vm_page_unlock_queues(); 2390 PMAP_UNLOCK(pmap); 2391} 2392 2393/* 2394 * Maps a sequence of resident pages belonging to the same object. 2395 * The sequence begins with the given page m_start. This page is 2396 * mapped at the given virtual address start. Each subsequent page is 2397 * mapped at a virtual address that is offset from start by the same 2398 * amount as the page is offset from m_start within the object. The 2399 * last page in the sequence is the page with the largest offset from 2400 * m_start that can be mapped at a virtual address less than the given 2401 * virtual address end. Not every virtual page between start and end 2402 * is mapped; only those for which a resident page exists with the 2403 * corresponding offset from m_start are mapped. 2404 */ 2405void 2406pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2407 vm_page_t m_start, vm_prot_t prot) 2408{ 2409 vm_page_t m, mpte; 2410 vm_pindex_t diff, psize; 2411 2412 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2413 psize = atop(end - start); 2414 mpte = NULL; 2415 m = m_start; 2416 PMAP_LOCK(pmap); 2417 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2418 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2419 prot, mpte); 2420 m = TAILQ_NEXT(m, listq); 2421 } 2422 PMAP_UNLOCK(pmap); 2423} 2424 2425/* 2426 * this code makes some *MAJOR* assumptions: 2427 * 1. Current pmap & pmap exists. 2428 * 2. Not wired. 2429 * 3. Read access. 2430 * 4. No page table pages. 2431 * but is *MUCH* faster than pmap_enter... 2432 */ 2433 2434void 2435pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2436{ 2437 2438 PMAP_LOCK(pmap); 2439 (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2440 PMAP_UNLOCK(pmap); 2441} 2442 2443static vm_page_t 2444pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2445 vm_prot_t prot, vm_page_t mpte) 2446{ 2447 pt_entry_t *pte; 2448 vm_paddr_t pa; 2449 2450 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2451 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 2452 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2453 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2454 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2455 2456 /* 2457 * In the case that a page table page is not 2458 * resident, we are creating it here. 2459 */ 2460 if (va < VM_MAXUSER_ADDRESS) { 2461 unsigned ptepindex; 2462 pd_entry_t ptepa; 2463 2464 /* 2465 * Calculate pagetable page index 2466 */ 2467 ptepindex = va >> PDRSHIFT; 2468 if (mpte && (mpte->pindex == ptepindex)) { 2469 mpte->wire_count++; 2470 } else { 2471 /* 2472 * Get the page directory entry 2473 */ 2474 ptepa = pmap->pm_pdir[ptepindex]; 2475 2476 /* 2477 * If the page table page is mapped, we just increment 2478 * the hold count, and activate it. 2479 */ 2480 if (ptepa) { 2481 if (ptepa & PG_PS) 2482 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2483 mpte = PHYS_TO_VM_PAGE(ptepa); 2484 mpte->wire_count++; 2485 } else { 2486 mpte = _pmap_allocpte(pmap, ptepindex, 2487 M_NOWAIT); 2488 if (mpte == NULL) 2489 return (mpte); 2490 } 2491 } 2492 } else { 2493 mpte = NULL; 2494 } 2495 2496 /* 2497 * This call to vtopte makes the assumption that we are 2498 * entering the page into the current pmap. In order to support 2499 * quick entry into any pmap, one would likely use pmap_pte_quick. 2500 * But that isn't as quick as vtopte. 2501 */ 2502 pte = vtopte(va); 2503 if (*pte) { 2504 if (mpte != NULL) { 2505 pmap_unwire_pte_hold(pmap, mpte); 2506 mpte = NULL; 2507 } 2508 return (mpte); 2509 } 2510 2511 /* 2512 * Enter on the PV list if part of our managed memory. 2513 */ 2514 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2515 !pmap_try_insert_pv_entry(pmap, va, m)) { 2516 if (mpte != NULL) { 2517 pmap_unwire_pte_hold(pmap, mpte); 2518 mpte = NULL; 2519 } 2520 return (mpte); 2521 } 2522 2523 /* 2524 * Increment counters 2525 */ 2526 pmap->pm_stats.resident_count++; 2527 2528 pa = VM_PAGE_TO_PHYS(m); 2529 2530 /* 2531 * Now validate mapping with RO protection 2532 */ 2533 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2534 pte_store(pte, pa | PG_V | PG_U); 2535 else 2536 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2537 return mpte; 2538} 2539 2540/* 2541 * Make a temporary mapping for a physical address. This is only intended 2542 * to be used for panic dumps. 2543 */ 2544void * 2545pmap_kenter_temporary(vm_paddr_t pa, int i) 2546{ 2547 vm_offset_t va; 2548 2549 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2550 pmap_kenter(va, pa); 2551 invlpg(va); 2552 return ((void *)crashdumpmap); 2553} 2554 2555/* 2556 * This code maps large physical mmap regions into the 2557 * processor address space. Note that some shortcuts 2558 * are taken, but the code works. 2559 */ 2560void 2561pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2562 vm_object_t object, vm_pindex_t pindex, 2563 vm_size_t size) 2564{ 2565 vm_page_t p; 2566 2567 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2568 KASSERT(object->type == OBJT_DEVICE, 2569 ("pmap_object_init_pt: non-device object")); 2570 if (pseflag && 2571 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2572 int i; 2573 vm_page_t m[1]; 2574 unsigned int ptepindex; 2575 int npdes; 2576 pd_entry_t ptepa; 2577 2578 PMAP_LOCK(pmap); 2579 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2580 goto out; 2581 PMAP_UNLOCK(pmap); 2582retry: 2583 p = vm_page_lookup(object, pindex); 2584 if (p != NULL) { 2585 vm_page_lock_queues(); 2586 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2587 goto retry; 2588 } else { 2589 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2590 if (p == NULL) 2591 return; 2592 m[0] = p; 2593 2594 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2595 vm_page_lock_queues(); 2596 vm_page_free(p); 2597 vm_page_unlock_queues(); 2598 return; 2599 } 2600 2601 p = vm_page_lookup(object, pindex); 2602 vm_page_lock_queues(); 2603 vm_page_wakeup(p); 2604 } 2605 vm_page_unlock_queues(); 2606 2607 ptepa = VM_PAGE_TO_PHYS(p); 2608 if (ptepa & (NBPDR - 1)) 2609 return; 2610 2611 p->valid = VM_PAGE_BITS_ALL; 2612 2613 PMAP_LOCK(pmap); 2614 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2615 npdes = size >> PDRSHIFT; 2616 for(i = 0; i < npdes; i++) { 2617 pde_store(&pmap->pm_pdir[ptepindex], 2618 ptepa | PG_U | PG_RW | PG_V | PG_PS); 2619 ptepa += NBPDR; 2620 ptepindex += 1; 2621 } 2622 pmap_invalidate_all(pmap); 2623out: 2624 PMAP_UNLOCK(pmap); 2625 } 2626} 2627 2628/* 2629 * Routine: pmap_change_wiring 2630 * Function: Change the wiring attribute for a map/virtual-address 2631 * pair. 2632 * In/out conditions: 2633 * The mapping must already exist in the pmap. 2634 */ 2635void 2636pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2637{ 2638 pt_entry_t *pte; 2639 2640 PMAP_LOCK(pmap); 2641 pte = pmap_pte(pmap, va); 2642 2643 if (wired && !pmap_pte_w(pte)) 2644 pmap->pm_stats.wired_count++; 2645 else if (!wired && pmap_pte_w(pte)) 2646 pmap->pm_stats.wired_count--; 2647 2648 /* 2649 * Wiring is not a hardware characteristic so there is no need to 2650 * invalidate TLB. 2651 */ 2652 pmap_pte_set_w(pte, wired); 2653 pmap_pte_release(pte); 2654 PMAP_UNLOCK(pmap); 2655} 2656 2657 2658 2659/* 2660 * Copy the range specified by src_addr/len 2661 * from the source map to the range dst_addr/len 2662 * in the destination map. 2663 * 2664 * This routine is only advisory and need not do anything. 2665 */ 2666 2667void 2668pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2669 vm_offset_t src_addr) 2670{ 2671 vm_offset_t addr; 2672 vm_offset_t end_addr = src_addr + len; 2673 vm_offset_t pdnxt; 2674 2675 if (dst_addr != src_addr) 2676 return; 2677 2678 if (!pmap_is_current(src_pmap)) 2679 return; 2680 2681 vm_page_lock_queues(); 2682 if (dst_pmap < src_pmap) { 2683 PMAP_LOCK(dst_pmap); 2684 PMAP_LOCK(src_pmap); 2685 } else { 2686 PMAP_LOCK(src_pmap); 2687 PMAP_LOCK(dst_pmap); 2688 } 2689 sched_pin(); 2690 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2691 pt_entry_t *src_pte, *dst_pte; 2692 vm_page_t dstmpte, srcmpte; 2693 pd_entry_t srcptepaddr; 2694 unsigned ptepindex; 2695 2696 if (addr >= UPT_MIN_ADDRESS) 2697 panic("pmap_copy: invalid to pmap_copy page tables"); 2698 2699 pdnxt = (addr + NBPDR) & ~PDRMASK; 2700 ptepindex = addr >> PDRSHIFT; 2701 2702 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2703 if (srcptepaddr == 0) 2704 continue; 2705 2706 if (srcptepaddr & PG_PS) { 2707 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2708 dst_pmap->pm_pdir[ptepindex] = srcptepaddr & 2709 ~PG_W; 2710 dst_pmap->pm_stats.resident_count += 2711 NBPDR / PAGE_SIZE; 2712 } 2713 continue; 2714 } 2715 2716 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); 2717 if (srcmpte->wire_count == 0) 2718 panic("pmap_copy: source page table page is unused"); 2719 2720 if (pdnxt > end_addr) 2721 pdnxt = end_addr; 2722 2723 src_pte = vtopte(addr); 2724 while (addr < pdnxt) { 2725 pt_entry_t ptetemp; 2726 ptetemp = *src_pte; 2727 /* 2728 * we only virtual copy managed pages 2729 */ 2730 if ((ptetemp & PG_MANAGED) != 0) { 2731 dstmpte = pmap_allocpte(dst_pmap, addr, 2732 M_NOWAIT); 2733 if (dstmpte == NULL) 2734 break; 2735 dst_pte = pmap_pte_quick(dst_pmap, addr); 2736 if (*dst_pte == 0 && 2737 pmap_try_insert_pv_entry(dst_pmap, addr, 2738 PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { 2739 /* 2740 * Clear the wired, modified, and 2741 * accessed (referenced) bits 2742 * during the copy. 2743 */ 2744 *dst_pte = ptetemp & ~(PG_W | PG_M | 2745 PG_A); 2746 dst_pmap->pm_stats.resident_count++; 2747 } else 2748 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2749 if (dstmpte->wire_count >= srcmpte->wire_count) 2750 break; 2751 } 2752 addr += PAGE_SIZE; 2753 src_pte++; 2754 } 2755 } 2756 sched_unpin(); 2757 vm_page_unlock_queues(); 2758 PMAP_UNLOCK(src_pmap); 2759 PMAP_UNLOCK(dst_pmap); 2760} 2761 2762static __inline void 2763pagezero(void *page) 2764{ 2765#if defined(I686_CPU) 2766 if (cpu_class == CPUCLASS_686) { 2767#if defined(CPU_ENABLE_SSE) 2768 if (cpu_feature & CPUID_SSE2) 2769 sse2_pagezero(page); 2770 else 2771#endif 2772 i686_pagezero(page); 2773 } else 2774#endif 2775 bzero(page, PAGE_SIZE); 2776} 2777 2778/* 2779 * pmap_zero_page zeros the specified hardware page by mapping 2780 * the page into KVM and using bzero to clear its contents. 2781 */ 2782void 2783pmap_zero_page(vm_page_t m) 2784{ 2785 struct sysmaps *sysmaps; 2786 2787 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2788 mtx_lock(&sysmaps->lock); 2789 if (*sysmaps->CMAP2) 2790 panic("pmap_zero_page: CMAP2 busy"); 2791 sched_pin(); 2792 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2793 invlcaddr(sysmaps->CADDR2); 2794 pagezero(sysmaps->CADDR2); 2795 *sysmaps->CMAP2 = 0; 2796 sched_unpin(); 2797 mtx_unlock(&sysmaps->lock); 2798} 2799 2800/* 2801 * pmap_zero_page_area zeros the specified hardware page by mapping 2802 * the page into KVM and using bzero to clear its contents. 2803 * 2804 * off and size may not cover an area beyond a single hardware page. 2805 */ 2806void 2807pmap_zero_page_area(vm_page_t m, int off, int size) 2808{ 2809 struct sysmaps *sysmaps; 2810 2811 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2812 mtx_lock(&sysmaps->lock); 2813 if (*sysmaps->CMAP2) 2814 panic("pmap_zero_page: CMAP2 busy"); 2815 sched_pin(); 2816 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2817 invlcaddr(sysmaps->CADDR2); 2818 if (off == 0 && size == PAGE_SIZE) 2819 pagezero(sysmaps->CADDR2); 2820 else 2821 bzero((char *)sysmaps->CADDR2 + off, size); 2822 *sysmaps->CMAP2 = 0; 2823 sched_unpin(); 2824 mtx_unlock(&sysmaps->lock); 2825} 2826 2827/* 2828 * pmap_zero_page_idle zeros the specified hardware page by mapping 2829 * the page into KVM and using bzero to clear its contents. This 2830 * is intended to be called from the vm_pagezero process only and 2831 * outside of Giant. 2832 */ 2833void 2834pmap_zero_page_idle(vm_page_t m) 2835{ 2836 2837 if (*CMAP3) 2838 panic("pmap_zero_page: CMAP3 busy"); 2839 sched_pin(); 2840 *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2841 invlcaddr(CADDR3); 2842 pagezero(CADDR3); 2843 *CMAP3 = 0; 2844 sched_unpin(); 2845} 2846 2847/* 2848 * pmap_copy_page copies the specified (machine independent) 2849 * page by mapping the page into virtual memory and using 2850 * bcopy to copy the page, one machine dependent page at a 2851 * time. 2852 */ 2853void 2854pmap_copy_page(vm_page_t src, vm_page_t dst) 2855{ 2856 struct sysmaps *sysmaps; 2857 2858 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2859 mtx_lock(&sysmaps->lock); 2860 if (*sysmaps->CMAP1) 2861 panic("pmap_copy_page: CMAP1 busy"); 2862 if (*sysmaps->CMAP2) 2863 panic("pmap_copy_page: CMAP2 busy"); 2864 sched_pin(); 2865 invlpg((u_int)sysmaps->CADDR1); 2866 invlpg((u_int)sysmaps->CADDR2); 2867 *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; 2868 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; 2869 bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 2870 *sysmaps->CMAP1 = 0; 2871 *sysmaps->CMAP2 = 0; 2872 sched_unpin(); 2873 mtx_unlock(&sysmaps->lock); 2874} 2875 2876/* 2877 * Returns true if the pmap's pv is one of the first 2878 * 16 pvs linked to from this page. This count may 2879 * be changed upwards or downwards in the future; it 2880 * is only necessary that true be returned for a small 2881 * subset of pmaps for proper page aging. 2882 */ 2883boolean_t 2884pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2885{ 2886 pv_entry_t pv; 2887 int loops = 0; 2888 2889 if (m->flags & PG_FICTITIOUS) 2890 return FALSE; 2891 2892 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2893 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2894 if (PV_PMAP(pv) == pmap) { 2895 return TRUE; 2896 } 2897 loops++; 2898 if (loops >= 16) 2899 break; 2900 } 2901 return (FALSE); 2902} 2903 2904/* 2905 * Remove all pages from specified address space 2906 * this aids process exit speeds. Also, this code 2907 * is special cased for current process only, but 2908 * can have the more generic (and slightly slower) 2909 * mode enabled. This is much faster than pmap_remove 2910 * in the case of running down an entire address space. 2911 */ 2912void 2913pmap_remove_pages(pmap_t pmap) 2914{ 2915 pt_entry_t *pte, tpte; 2916 vm_page_t m; 2917 pv_entry_t pv; 2918 struct pv_chunk *pc, *npc; 2919 int field, idx; 2920 int32_t bit; 2921 uint32_t inuse, bitmask; 2922 int allfree; 2923 2924 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2925 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2926 return; 2927 } 2928 vm_page_lock_queues(); 2929 PMAP_LOCK(pmap); 2930 sched_pin(); 2931 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2932 allfree = 1; 2933 for (field = 0; field < _NPCM; field++) { 2934 inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 2935 while (inuse != 0) { 2936 bit = bsfl(inuse); 2937 bitmask = 1UL << bit; 2938 idx = field * 32 + bit; 2939 pv = &pc->pc_pventry[idx]; 2940 inuse &= ~bitmask; 2941 2942 pte = vtopte(pv->pv_va); 2943 tpte = *pte; 2944 2945 if (tpte == 0) { 2946 printf( 2947 "TPTE at %p IS ZERO @ VA %08x\n", 2948 pte, pv->pv_va); 2949 panic("bad pte"); 2950 } 2951 2952/* 2953 * We cannot remove wired pages from a process' mapping at this time 2954 */ 2955 if (tpte & PG_W) { 2956 allfree = 0; 2957 continue; 2958 } 2959 2960 m = PHYS_TO_VM_PAGE(tpte); 2961 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2962 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2963 m, (uintmax_t)m->phys_addr, 2964 (uintmax_t)tpte)); 2965 2966 KASSERT(m < &vm_page_array[vm_page_array_size], 2967 ("pmap_remove_pages: bad tpte %#jx", 2968 (uintmax_t)tpte)); 2969 2970 pmap->pm_stats.resident_count--; 2971 2972 pte_clear(pte); 2973 2974 /* 2975 * Update the vm_page_t clean/reference bits. 2976 */ 2977 if (tpte & PG_M) 2978 vm_page_dirty(m); 2979 2980 /* Mark free */ 2981 PV_STAT(pv_entry_frees++); 2982 PV_STAT(pv_entry_spare++); 2983 pv_entry_count--; 2984 pc->pc_map[field] |= bitmask; 2985 m->md.pv_list_count--; 2986 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2987 if (TAILQ_EMPTY(&m->md.pv_list)) 2988 vm_page_flag_clear(m, PG_WRITEABLE); 2989 2990 pmap_unuse_pt(pmap, pv->pv_va); 2991 } 2992 } 2993 if (allfree) { 2994 PV_STAT(pv_entry_spare -= _NPCPV); 2995 PV_STAT(pc_chunk_count--); 2996 PV_STAT(pc_chunk_frees++); 2997 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2998 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2999 pmap_qremove((vm_offset_t)pc, 1); 3000 vm_page_unwire(m, 0); 3001 vm_page_free(m); 3002 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 3003 } 3004 } 3005 sched_unpin(); 3006 vm_page_unlock_queues(); 3007 pmap_invalidate_all(pmap); 3008 PMAP_UNLOCK(pmap); 3009} 3010 3011/* 3012 * pmap_is_modified: 3013 * 3014 * Return whether or not the specified physical page was modified 3015 * in any physical maps. 3016 */ 3017boolean_t 3018pmap_is_modified(vm_page_t m) 3019{ 3020 pv_entry_t pv; 3021 pt_entry_t *pte; 3022 pmap_t pmap; 3023 boolean_t rv; 3024 3025 rv = FALSE; 3026 if (m->flags & PG_FICTITIOUS) 3027 return (rv); 3028 3029 sched_pin(); 3030 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3031 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3032 pmap = PV_PMAP(pv); 3033 PMAP_LOCK(pmap); 3034 pte = pmap_pte_quick(pmap, pv->pv_va); 3035 rv = (*pte & PG_M) != 0; 3036 PMAP_UNLOCK(pmap); 3037 if (rv) 3038 break; 3039 } 3040 sched_unpin(); 3041 return (rv); 3042} 3043 3044/* 3045 * pmap_is_prefaultable: 3046 * 3047 * Return whether or not the specified virtual address is elgible 3048 * for prefault. 3049 */ 3050boolean_t 3051pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3052{ 3053 pt_entry_t *pte; 3054 boolean_t rv; 3055 3056 rv = FALSE; 3057 PMAP_LOCK(pmap); 3058 if (*pmap_pde(pmap, addr)) { 3059 pte = vtopte(addr); 3060 rv = *pte == 0; 3061 } 3062 PMAP_UNLOCK(pmap); 3063 return (rv); 3064} 3065 3066/* 3067 * Clear the given bit in each of the given page's ptes. The bit is 3068 * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in 3069 * size, only a bit within the least significant 32 can be cleared. 3070 */ 3071static __inline void 3072pmap_clear_ptes(vm_page_t m, int bit) 3073{ 3074 pv_entry_t pv; 3075 pmap_t pmap; 3076 pt_entry_t pbits, *pte; 3077 3078 if ((m->flags & PG_FICTITIOUS) || 3079 (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 3080 return; 3081 3082 sched_pin(); 3083 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3084 /* 3085 * Loop over all current mappings setting/clearing as appropos If 3086 * setting RO do we need to clear the VAC? 3087 */ 3088 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3089 pmap = PV_PMAP(pv); 3090 PMAP_LOCK(pmap); 3091 pte = pmap_pte_quick(pmap, pv->pv_va); 3092retry: 3093 pbits = *pte; 3094 if (pbits & bit) { 3095 if (bit == PG_RW) { 3096 /* 3097 * Regardless of whether a pte is 32 or 64 bits 3098 * in size, PG_RW and PG_M are among the least 3099 * significant 32 bits. 3100 */ 3101 if (!atomic_cmpset_int((u_int *)pte, pbits, 3102 pbits & ~(PG_RW | PG_M))) 3103 goto retry; 3104 if (pbits & PG_M) { 3105 vm_page_dirty(m); 3106 } 3107 } else { 3108 atomic_clear_int((u_int *)pte, bit); 3109 } 3110 pmap_invalidate_page(pmap, pv->pv_va); 3111 } 3112 PMAP_UNLOCK(pmap); 3113 } 3114 if (bit == PG_RW) 3115 vm_page_flag_clear(m, PG_WRITEABLE); 3116 sched_unpin(); 3117} 3118 3119/* 3120 * pmap_page_protect: 3121 * 3122 * Lower the permission for all mappings to a given page. 3123 */ 3124void 3125pmap_page_protect(vm_page_t m, vm_prot_t prot) 3126{ 3127 if ((prot & VM_PROT_WRITE) == 0) { 3128 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3129 pmap_clear_ptes(m, PG_RW); 3130 } else { 3131 pmap_remove_all(m); 3132 } 3133 } 3134} 3135 3136/* 3137 * pmap_ts_referenced: 3138 * 3139 * Return a count of reference bits for a page, clearing those bits. 3140 * It is not necessary for every reference bit to be cleared, but it 3141 * is necessary that 0 only be returned when there are truly no 3142 * reference bits set. 3143 * 3144 * XXX: The exact number of bits to check and clear is a matter that 3145 * should be tested and standardized at some point in the future for 3146 * optimal aging of shared pages. 3147 */ 3148int 3149pmap_ts_referenced(vm_page_t m) 3150{ 3151 pv_entry_t pv, pvf, pvn; 3152 pmap_t pmap; 3153 pt_entry_t *pte; 3154 int rtval = 0; 3155 3156 if (m->flags & PG_FICTITIOUS) 3157 return (rtval); 3158 sched_pin(); 3159 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3160 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3161 pvf = pv; 3162 do { 3163 pvn = TAILQ_NEXT(pv, pv_list); 3164 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3165 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3166 pmap = PV_PMAP(pv); 3167 PMAP_LOCK(pmap); 3168 pte = pmap_pte_quick(pmap, pv->pv_va); 3169 if ((*pte & PG_A) != 0) { 3170 atomic_clear_int((u_int *)pte, PG_A); 3171 pmap_invalidate_page(pmap, pv->pv_va); 3172 rtval++; 3173 if (rtval > 4) 3174 pvn = NULL; 3175 } 3176 PMAP_UNLOCK(pmap); 3177 } while ((pv = pvn) != NULL && pv != pvf); 3178 } 3179 sched_unpin(); 3180 return (rtval); 3181} 3182 3183/* 3184 * Clear the modify bits on the specified physical page. 3185 */ 3186void 3187pmap_clear_modify(vm_page_t m) 3188{ 3189 pmap_clear_ptes(m, PG_M); 3190} 3191 3192/* 3193 * pmap_clear_reference: 3194 * 3195 * Clear the reference bit on the specified physical page. 3196 */ 3197void 3198pmap_clear_reference(vm_page_t m) 3199{ 3200 pmap_clear_ptes(m, PG_A); 3201} 3202 3203/* 3204 * Miscellaneous support routines follow 3205 */ 3206 3207/* 3208 * Map a set of physical memory pages into the kernel virtual 3209 * address space. Return a pointer to where it is mapped. This 3210 * routine is intended to be used for mapping device memory, 3211 * NOT real memory. 3212 */ 3213void * 3214pmap_mapdev(vm_paddr_t pa, vm_size_t size) 3215{ 3216 vm_offset_t va, tmpva, offset; 3217 3218 offset = pa & PAGE_MASK; 3219 size = roundup(offset + size, PAGE_SIZE); 3220 pa = pa & PG_FRAME; 3221 3222 if (pa < KERNLOAD && pa + size <= KERNLOAD) 3223 va = KERNBASE + pa; 3224 else 3225 va = kmem_alloc_nofault(kernel_map, size); 3226 if (!va) 3227 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3228 3229 for (tmpva = va; size > 0; ) { 3230 pmap_kenter(tmpva, pa); 3231 size -= PAGE_SIZE; 3232 tmpva += PAGE_SIZE; 3233 pa += PAGE_SIZE; 3234 } 3235 pmap_invalidate_range(kernel_pmap, va, tmpva); 3236 return ((void *)(va + offset)); 3237} 3238 3239void 3240pmap_unmapdev(vm_offset_t va, vm_size_t size) 3241{ 3242 vm_offset_t base, offset, tmpva; 3243 3244 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 3245 return; 3246 base = va & PG_FRAME; 3247 offset = va & PAGE_MASK; 3248 size = roundup(offset + size, PAGE_SIZE); 3249 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 3250 pmap_kremove(tmpva); 3251 pmap_invalidate_range(kernel_pmap, va, tmpva); 3252 kmem_free(kernel_map, base, size); 3253} 3254 3255/* 3256 * perform the pmap work for mincore 3257 */ 3258int 3259pmap_mincore(pmap_t pmap, vm_offset_t addr) 3260{ 3261 pt_entry_t *ptep, pte; 3262 vm_page_t m; 3263 int val = 0; 3264 3265 PMAP_LOCK(pmap); 3266 ptep = pmap_pte(pmap, addr); 3267 pte = (ptep != NULL) ? *ptep : 0; 3268 pmap_pte_release(ptep); 3269 PMAP_UNLOCK(pmap); 3270 3271 if (pte != 0) { 3272 vm_paddr_t pa; 3273 3274 val = MINCORE_INCORE; 3275 if ((pte & PG_MANAGED) == 0) 3276 return val; 3277 3278 pa = pte & PG_FRAME; 3279 3280 m = PHYS_TO_VM_PAGE(pa); 3281 3282 /* 3283 * Modified by us 3284 */ 3285 if (pte & PG_M) 3286 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3287 else { 3288 /* 3289 * Modified by someone else 3290 */ 3291 vm_page_lock_queues(); 3292 if (m->dirty || pmap_is_modified(m)) 3293 val |= MINCORE_MODIFIED_OTHER; 3294 vm_page_unlock_queues(); 3295 } 3296 /* 3297 * Referenced by us 3298 */ 3299 if (pte & PG_A) 3300 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3301 else { 3302 /* 3303 * Referenced by someone else 3304 */ 3305 vm_page_lock_queues(); 3306 if ((m->flags & PG_REFERENCED) || 3307 pmap_ts_referenced(m)) { 3308 val |= MINCORE_REFERENCED_OTHER; 3309 vm_page_flag_set(m, PG_REFERENCED); 3310 } 3311 vm_page_unlock_queues(); 3312 } 3313 } 3314 return val; 3315} 3316 3317void 3318pmap_activate(struct thread *td) 3319{ 3320 pmap_t pmap, oldpmap; 3321 u_int32_t cr3; 3322 3323 critical_enter(); 3324 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3325 oldpmap = PCPU_GET(curpmap); 3326#if defined(SMP) 3327 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 3328 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 3329#else 3330 oldpmap->pm_active &= ~1; 3331 pmap->pm_active |= 1; 3332#endif 3333#ifdef PAE 3334 cr3 = vtophys(pmap->pm_pdpt); 3335#else 3336 cr3 = vtophys(pmap->pm_pdir); 3337#endif 3338 /* 3339 * pmap_activate is for the current thread on the current cpu 3340 */ 3341 td->td_pcb->pcb_cr3 = cr3; 3342 load_cr3(cr3); 3343 PCPU_SET(curpmap, pmap); 3344 critical_exit(); 3345} 3346 3347vm_offset_t 3348pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3349{ 3350 3351 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3352 return addr; 3353 } 3354 3355 addr = (addr + PDRMASK) & ~PDRMASK; 3356 return addr; 3357} 3358 3359 3360#if defined(PMAP_DEBUG) 3361pmap_pid_dump(int pid) 3362{ 3363 pmap_t pmap; 3364 struct proc *p; 3365 int npte = 0; 3366 int index; 3367 3368 sx_slock(&allproc_lock); 3369 LIST_FOREACH(p, &allproc, p_list) { 3370 if (p->p_pid != pid) 3371 continue; 3372 3373 if (p->p_vmspace) { 3374 int i,j; 3375 index = 0; 3376 pmap = vmspace_pmap(p->p_vmspace); 3377 for (i = 0; i < NPDEPTD; i++) { 3378 pd_entry_t *pde; 3379 pt_entry_t *pte; 3380 vm_offset_t base = i << PDRSHIFT; 3381 3382 pde = &pmap->pm_pdir[i]; 3383 if (pde && pmap_pde_v(pde)) { 3384 for (j = 0; j < NPTEPG; j++) { 3385 vm_offset_t va = base + (j << PAGE_SHIFT); 3386 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3387 if (index) { 3388 index = 0; 3389 printf("\n"); 3390 } 3391 sx_sunlock(&allproc_lock); 3392 return npte; 3393 } 3394 pte = pmap_pte(pmap, va); 3395 if (pte && pmap_pte_v(pte)) { 3396 pt_entry_t pa; 3397 vm_page_t m; 3398 pa = *pte; 3399 m = PHYS_TO_VM_PAGE(pa); 3400 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3401 va, pa, m->hold_count, m->wire_count, m->flags); 3402 npte++; 3403 index++; 3404 if (index >= 2) { 3405 index = 0; 3406 printf("\n"); 3407 } else { 3408 printf(" "); 3409 } 3410 } 3411 } 3412 } 3413 } 3414 } 3415 } 3416 sx_sunlock(&allproc_lock); 3417 return npte; 3418} 3419#endif 3420 3421#if defined(DEBUG) 3422 3423static void pads(pmap_t pm); 3424void pmap_pvdump(vm_offset_t pa); 3425 3426/* print address space of pmap*/ 3427static void 3428pads(pmap_t pm) 3429{ 3430 int i, j; 3431 vm_paddr_t va; 3432 pt_entry_t *ptep; 3433 3434 if (pm == kernel_pmap) 3435 return; 3436 for (i = 0; i < NPDEPTD; i++) 3437 if (pm->pm_pdir[i]) 3438 for (j = 0; j < NPTEPG; j++) { 3439 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3440 if (pm == kernel_pmap && va < KERNBASE) 3441 continue; 3442 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3443 continue; 3444 ptep = pmap_pte(pm, va); 3445 if (pmap_pte_v(ptep)) 3446 printf("%x:%x ", va, *ptep); 3447 }; 3448 3449} 3450 3451void 3452pmap_pvdump(vm_paddr_t pa) 3453{ 3454 pv_entry_t pv; 3455 pmap_t pmap; 3456 vm_page_t m; 3457 3458 printf("pa %x", pa); 3459 m = PHYS_TO_VM_PAGE(pa); 3460 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3461 pmap = PV_PMAP(pv); 3462 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 3463 pads(pmap); 3464 } 3465 printf(" "); 3466} 3467#endif 3468