pmap.c revision 88245
1169689Skan/* 2169689Skan * Copyright (c) 1991 Regents of the University of California. 3169689Skan * All rights reserved. 4169689Skan * Copyright (c) 1994 John S. Dyson 5169689Skan * All rights reserved. 6169689Skan * Copyright (c) 1994 David Greenman 7169689Skan * All rights reserved. 8169689Skan * 9169689Skan * This code is derived from software contributed to Berkeley by 10169689Skan * the Systems Programming Group of the University of Utah Computer 11169689Skan * Science Department and William Jolitz of UUNET Technologies Inc. 12169689Skan * 13169689Skan * Redistribution and use in source and binary forms, with or without 14169689Skan * modification, are permitted provided that the following conditions 15169689Skan * are met: 16169689Skan * 1. Redistributions of source code must retain the above copyright 17169689Skan * notice, this list of conditions and the following disclaimer. 18169689Skan * 2. Redistributions in binary form must reproduce the above copyright 19169689Skan * notice, this list of conditions and the following disclaimer in the 20169689Skan * documentation and/or other materials provided with the distribution. 21169689Skan * 3. All advertising materials mentioning features or use of this software 22169689Skan * must display the following acknowledgement: 23169689Skan * This product includes software developed by the University of 24169689Skan * California, Berkeley and its contributors. 25169689Skan * 4. Neither the name of the University nor the names of its contributors 26169689Skan * may be used to endorse or promote products derived from this software 27169689Skan * without specific prior written permission. 28169689Skan * 29169689Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30169689Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32169689Skan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37169689Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39169689Skan * SUCH DAMAGE. 40169689Skan * 41169689Skan * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42169689Skan * $FreeBSD: head/sys/i386/i386/pmap.c 88245 2001-12-20 05:29:59Z peter $ 43169689Skan */ 44169689Skan 45169689Skan/* 46169689Skan * Manages physical address maps. 47169689Skan * 48169689Skan * In addition to hardware address maps, this 49169689Skan * module is called upon to provide software-use-only 50169689Skan * maps which may or may not be stored in the same 51169689Skan * form as hardware maps. These pseudo-maps are 52169689Skan * used to store intermediate results from copy 53169689Skan * operations to and from address spaces. 54169689Skan * 55169689Skan * Since the information managed by this module is 56169689Skan * also stored by the logical address mapping module, 57169689Skan * this module may throw away valid virtual-to-physical 58169689Skan * mappings at almost any time. However, invalidations 59169689Skan * of virtual-to-physical mappings must be done as 60169689Skan * requested. 61169689Skan * 62169689Skan * In order to cope with hardware architectures which 63169689Skan * make virtual-to-physical map invalidates expensive, 64169689Skan * this module may delay invalidate or reduced protection 65169689Skan * operations until such time as they are actually 66169689Skan * necessary. This module is given full information as 67169689Skan * to which processors are currently using which maps, 68169689Skan * and to when physical maps must be made correct. 69169689Skan */ 70169689Skan 71169689Skan#include "opt_disable_pse.h" 72169689Skan#include "opt_pmap.h" 73169689Skan#include "opt_msgbuf.h" 74169689Skan#include "opt_kstack_pages.h" 75169689Skan 76169689Skan#include <sys/param.h> 77169689Skan#include <sys/systm.h> 78169689Skan#include <sys/kernel.h> 79169689Skan#include <sys/lock.h> 80169689Skan#include <sys/mman.h> 81169689Skan#include <sys/msgbuf.h> 82169689Skan#include <sys/mutex.h> 83169689Skan#include <sys/proc.h> 84169689Skan#include <sys/sx.h> 85169689Skan#include <sys/user.h> 86169689Skan#include <sys/vmmeter.h> 87169689Skan#include <sys/sysctl.h> 88169689Skan 89169689Skan#include <vm/vm.h> 90169689Skan#include <vm/vm_param.h> 91169689Skan#include <vm/vm_kern.h> 92169689Skan#include <vm/vm_page.h> 93169689Skan#include <vm/vm_map.h> 94169689Skan#include <vm/vm_object.h> 95169689Skan#include <vm/vm_extern.h> 96169689Skan#include <vm/vm_pageout.h> 97169689Skan#include <vm/vm_pager.h> 98169689Skan#include <vm/vm_zone.h> 99169689Skan 100169689Skan#include <machine/cputypes.h> 101169689Skan#include <machine/md_var.h> 102169689Skan#include <machine/specialreg.h> 103169689Skan#if defined(SMP) || defined(APIC_IO) 104169689Skan#include <machine/smp.h> 105169689Skan#include <machine/apic.h> 106169689Skan#include <machine/segments.h> 107169689Skan#include <machine/tss.h> 108169689Skan#endif /* SMP || APIC_IO */ 109169689Skan 110169689Skan#define PMAP_KEEP_PDIRS 111169689Skan#ifndef PMAP_SHPGPERPROC 112169689Skan#define PMAP_SHPGPERPROC 200 113169689Skan#endif 114169689Skan 115169689Skan#if defined(DIAGNOSTIC) 116169689Skan#define PMAP_DIAGNOSTIC 117169689Skan#endif 118169689Skan 119169689Skan#define MINPV 2048 120169689Skan 121169689Skan#if !defined(PMAP_DIAGNOSTIC) 122169689Skan#define PMAP_INLINE __inline 123169689Skan#else 124169689Skan#define PMAP_INLINE 125169689Skan#endif 126169689Skan 127169689Skan/* 128169689Skan * Get PDEs and PTEs for user/kernel address space 129169689Skan */ 130169689Skan#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 131169689Skan#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 132169689Skan 133169689Skan#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 134169689Skan#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 135169689Skan#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 136169689Skan#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 137169689Skan#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 138169689Skan 139169689Skan#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 140169689Skan#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 141169689Skan 142169689Skan/* 143169689Skan * Given a map and a machine independent protection code, 144169689Skan * convert to a vax protection code. 145169689Skan */ 146169689Skan#define pte_prot(m, p) (protection_codes[p]) 147169689Skanstatic int protection_codes[8]; 148169689Skan 149169689Skanstatic struct pmap kernel_pmap_store; 150169689Skanpmap_t kernel_pmap; 151169689SkanLIST_HEAD(pmaplist, pmap); 152169689Skanstruct pmaplist allpmaps; 153169689Skan 154169689Skanvm_offset_t avail_start; /* PA of first available physical page */ 155169689Skanvm_offset_t avail_end; /* PA of last available physical page */ 156169689Skanvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 157169689Skanvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 158169689Skanstatic boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 159169689Skanstatic int pgeflag; /* PG_G or-in */ 160169689Skanstatic int pseflag; /* PG_PS or-in */ 161169689Skan 162169689Skanstatic vm_object_t kptobj; 163169689Skan 164169689Skanstatic int nkpt; 165169689Skanvm_offset_t kernel_vm_end; 166 167/* 168 * Data for the pv entry allocation mechanism 169 */ 170static vm_zone_t pvzone; 171static struct vm_zone pvzone_store; 172static struct vm_object pvzone_obj; 173static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 174static int pmap_pagedaemon_waken = 0; 175static struct pv_entry *pvinit; 176 177/* 178 * All those kernel PT submaps that BSD is so fond of 179 */ 180pt_entry_t *CMAP1 = 0; 181static pt_entry_t *CMAP2, *ptmmap; 182caddr_t CADDR1 = 0, ptvmmap = 0; 183static caddr_t CADDR2; 184static pt_entry_t *msgbufmap; 185struct msgbuf *msgbufp = 0; 186 187/* 188 * Crashdump maps. 189 */ 190static pt_entry_t *pt_crashdumpmap; 191static caddr_t crashdumpmap; 192 193#ifdef SMP 194extern pt_entry_t *SMPpt; 195#endif 196static pt_entry_t *PMAP1 = 0; 197static pt_entry_t *PADDR1 = 0; 198 199static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv)); 200static unsigned * get_ptbase __P((pmap_t pmap)); 201static pv_entry_t get_pv_entry __P((void)); 202static void i386_protection_init __P((void)); 203static __inline void pmap_changebit __P((vm_page_t m, int bit, boolean_t setem)); 204 205static void pmap_remove_all __P((vm_page_t m)); 206static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, 207 vm_page_t m, vm_page_t mpte)); 208static int pmap_remove_pte __P((pmap_t pmap, unsigned *ptq, vm_offset_t sva)); 209static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); 210static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m, 211 vm_offset_t va)); 212static boolean_t pmap_testbit __P((vm_page_t m, int bit)); 213static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, 214 vm_page_t mpte, vm_page_t m)); 215 216static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); 217 218static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); 219static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex)); 220static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); 221static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); 222static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); 223static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 224 225static unsigned pdir4mb; 226 227/* 228 * Routine: pmap_pte 229 * Function: 230 * Extract the page table entry associated 231 * with the given map/virtual_address pair. 232 */ 233 234PMAP_INLINE unsigned * 235pmap_pte(pmap, va) 236 register pmap_t pmap; 237 vm_offset_t va; 238{ 239 pd_entry_t *pdeaddr; 240 241 if (pmap) { 242 pdeaddr = pmap_pde(pmap, va); 243 if (*pdeaddr & PG_PS) 244 return pdeaddr; 245 if (*pdeaddr) { 246 return get_ptbase(pmap) + i386_btop(va); 247 } 248 } 249 return (0); 250} 251 252/* 253 * Move the kernel virtual free pointer to the next 254 * 4MB. This is used to help improve performance 255 * by using a large (4MB) page for much of the kernel 256 * (.text, .data, .bss) 257 */ 258static vm_offset_t 259pmap_kmem_choose(vm_offset_t addr) 260{ 261 vm_offset_t newaddr = addr; 262#ifndef DISABLE_PSE 263 if (cpu_feature & CPUID_PSE) { 264 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 265 } 266#endif 267 return newaddr; 268} 269 270/* 271 * Bootstrap the system enough to run with virtual memory. 272 * 273 * On the i386 this is called after mapping has already been enabled 274 * and just syncs the pmap module with what has already been done. 275 * [We can't call it easily with mapping off since the kernel is not 276 * mapped with PA == VA, hence we would have to relocate every address 277 * from the linked base (virtual) address "KERNBASE" to the actual 278 * (physical) address starting relative to 0] 279 */ 280void 281pmap_bootstrap(firstaddr, loadaddr) 282 vm_offset_t firstaddr; 283 vm_offset_t loadaddr; 284{ 285 vm_offset_t va; 286 pt_entry_t *pte; 287 int i; 288 289 avail_start = firstaddr; 290 291 /* 292 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 293 * large. It should instead be correctly calculated in locore.s and 294 * not based on 'first' (which is a physical address, not a virtual 295 * address, for the start of unused physical memory). The kernel 296 * page tables are NOT double mapped and thus should not be included 297 * in this calculation. 298 */ 299 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 300 virtual_avail = pmap_kmem_choose(virtual_avail); 301 302 virtual_end = VM_MAX_KERNEL_ADDRESS; 303 304 /* 305 * Initialize protection array. 306 */ 307 i386_protection_init(); 308 309 /* 310 * The kernel's pmap is statically allocated so we don't have to use 311 * pmap_create, which is unlikely to work correctly at this part of 312 * the boot sequence (XXX and which no longer exists). 313 */ 314 kernel_pmap = &kernel_pmap_store; 315 316 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 317 kernel_pmap->pm_count = 1; 318 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 319 TAILQ_INIT(&kernel_pmap->pm_pvlist); 320 LIST_INIT(&allpmaps); 321 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 322 nkpt = NKPT; 323 324 /* 325 * Reserve some special page table entries/VA space for temporary 326 * mapping of pages. 327 */ 328#define SYSMAP(c, p, v, n) \ 329 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 330 331 va = virtual_avail; 332 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); 333 334 /* 335 * CMAP1/CMAP2 are used for zeroing and copying pages. 336 */ 337 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 338 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 339 340 /* 341 * Crashdump maps. 342 */ 343 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 344 345 /* 346 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 347 * XXX ptmmap is not used. 348 */ 349 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 350 351 /* 352 * msgbufp is used to map the system message buffer. 353 * XXX msgbufmap is not used. 354 */ 355 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 356 atop(round_page(MSGBUF_SIZE))) 357 358 /* 359 * ptemap is used for pmap_pte_quick 360 */ 361 SYSMAP(unsigned *, PMAP1, PADDR1, 1); 362 363 virtual_avail = va; 364 365 *CMAP1 = *CMAP2 = 0; 366 for (i = 0; i < NKPT; i++) 367 PTD[i] = 0; 368 369 pgeflag = 0; 370#if !defined(SMP) /* XXX - see also mp_machdep.c */ 371 if (cpu_feature & CPUID_PGE) { 372 pgeflag = PG_G; 373 } 374#endif 375 376/* 377 * Initialize the 4MB page size flag 378 */ 379 pseflag = 0; 380/* 381 * The 4MB page version of the initial 382 * kernel page mapping. 383 */ 384 pdir4mb = 0; 385 386#if !defined(DISABLE_PSE) 387 if (cpu_feature & CPUID_PSE) { 388 unsigned ptditmp; 389 /* 390 * Note that we have enabled PSE mode 391 */ 392 pseflag = PG_PS; 393 ptditmp = *(PTmap + i386_btop(KERNBASE)); 394 ptditmp &= ~(NBPDR - 1); 395 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 396 pdir4mb = ptditmp; 397 398#if !defined(SMP) 399 /* 400 * Enable the PSE mode. 401 */ 402 load_cr4(rcr4() | CR4_PSE); 403 404 /* 405 * We can do the mapping here for the single processor 406 * case. We simply ignore the old page table page from 407 * now on. 408 */ 409 /* 410 * For SMP, we still need 4K pages to bootstrap APs, 411 * PSE will be enabled as soon as all APs are up. 412 */ 413 PTD[KPTDI] = (pd_entry_t) ptditmp; 414 kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; 415 invltlb(); 416#endif 417 } 418#endif 419 420#ifdef SMP 421 if (cpu_apic_address == 0) 422 panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); 423 424 /* local apic is mapped on last page */ 425 SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | 426 (cpu_apic_address & PG_FRAME)); 427#endif 428 429 invltlb(); 430} 431 432#ifdef SMP 433/* 434 * Set 4mb pdir for mp startup 435 */ 436void 437pmap_set_opt(void) 438{ 439 if (pseflag && (cpu_feature & CPUID_PSE)) { 440 load_cr4(rcr4() | CR4_PSE); 441 if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ 442 kernel_pmap->pm_pdir[KPTDI] = 443 PTD[KPTDI] = (pd_entry_t)pdir4mb; 444 cpu_invltlb(); 445 } 446 } 447} 448#endif 449 450/* 451 * Initialize the pmap module. 452 * Called by vm_init, to initialize any structures that the pmap 453 * system needs to map virtual memory. 454 * pmap_init has been enhanced to support in a fairly consistant 455 * way, discontiguous physical memory. 456 */ 457void 458pmap_init(phys_start, phys_end) 459 vm_offset_t phys_start, phys_end; 460{ 461 int i; 462 int initial_pvs; 463 464 /* 465 * object for kernel page table pages 466 */ 467 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); 468 469 /* 470 * Allocate memory for random pmap data structures. Includes the 471 * pv_head_table. 472 */ 473 474 for(i = 0; i < vm_page_array_size; i++) { 475 vm_page_t m; 476 477 m = &vm_page_array[i]; 478 TAILQ_INIT(&m->md.pv_list); 479 m->md.pv_list_count = 0; 480 } 481 482 /* 483 * init the pv free list 484 */ 485 initial_pvs = vm_page_array_size; 486 if (initial_pvs < MINPV) 487 initial_pvs = MINPV; 488 pvzone = &pvzone_store; 489 pvinit = (struct pv_entry *) kmem_alloc(kernel_map, 490 initial_pvs * sizeof (struct pv_entry)); 491 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 492 vm_page_array_size); 493 494 /* 495 * Now it is safe to enable pv_table recording. 496 */ 497 pmap_initialized = TRUE; 498} 499 500/* 501 * Initialize the address space (zone) for the pv_entries. Set a 502 * high water mark so that the system can recover from excessive 503 * numbers of pv entries. 504 */ 505void 506pmap_init2() 507{ 508 int shpgperproc = PMAP_SHPGPERPROC; 509 510 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 511 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 512 pv_entry_high_water = 9 * (pv_entry_max / 10); 513 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 514} 515 516 517/*************************************************** 518 * Low level helper routines..... 519 ***************************************************/ 520 521#if defined(PMAP_DIAGNOSTIC) 522 523/* 524 * This code checks for non-writeable/modified pages. 525 * This should be an invalid condition. 526 */ 527static int 528pmap_nw_modified(pt_entry_t ptea) 529{ 530 int pte; 531 532 pte = (int) ptea; 533 534 if ((pte & (PG_M|PG_RW)) == PG_M) 535 return 1; 536 else 537 return 0; 538} 539#endif 540 541 542/* 543 * this routine defines the region(s) of memory that should 544 * not be tested for the modified bit. 545 */ 546static PMAP_INLINE int 547pmap_track_modified(vm_offset_t va) 548{ 549 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 550 return 1; 551 else 552 return 0; 553} 554 555static PMAP_INLINE void 556invltlb_1pg(vm_offset_t va) 557{ 558#ifdef I386_CPU 559 invltlb(); 560#else 561 invlpg(va); 562#endif 563} 564 565static __inline void 566pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 567{ 568#if defined(SMP) 569 if (pmap->pm_active & (1 << PCPU_GET(cpuid))) 570 cpu_invlpg((void *)va); 571 if (pmap->pm_active & PCPU_GET(other_cpus)) 572 smp_invltlb(); 573#else 574 if (pmap->pm_active) 575 invltlb_1pg(va); 576#endif 577} 578 579static __inline void 580pmap_invalidate_all(pmap_t pmap) 581{ 582#if defined(SMP) 583 if (pmap->pm_active & (1 << PCPU_GET(cpuid))) 584 cpu_invltlb(); 585 if (pmap->pm_active & PCPU_GET(other_cpus)) 586 smp_invltlb(); 587#else 588 if (pmap->pm_active) 589 invltlb(); 590#endif 591} 592 593/* 594 * Return an address which is the base of the Virtual mapping of 595 * all the PTEs for the given pmap. Note this doesn't say that 596 * all the PTEs will be present or that the pages there are valid. 597 * The PTEs are made available by the recursive mapping trick. 598 * It will map in the alternate PTE space if needed. 599 */ 600static pt_entry_t * 601get_ptbase(pmap) 602 pmap_t pmap; 603{ 604 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 605 606 /* are we current address space or kernel? */ 607 if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { 608 return PTmap; 609 } 610 /* otherwise, we are alternate address space */ 611 if (frame != (((unsigned) APTDpde) & PG_FRAME)) { 612 APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); 613#if defined(SMP) 614 /* The page directory is not shared between CPUs */ 615 cpu_invltlb(); 616#else 617 invltlb(); 618#endif 619 } 620 return APTmap; 621} 622 623/* 624 * Super fast pmap_pte routine best used when scanning 625 * the pv lists. This eliminates many coarse-grained 626 * invltlb calls. Note that many of the pv list 627 * scans are across different pmaps. It is very wasteful 628 * to do an entire invltlb for checking a single mapping. 629 */ 630 631static pt_entry_t * 632pmap_pte_quick(pmap, va) 633 register pmap_t pmap; 634 vm_offset_t va; 635{ 636 pd_entry_t pde, newpf; 637 if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) { 638 pd_entry_t frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 639 unsigned index = i386_btop(va); 640 /* are we current address space or kernel? */ 641 if ((pmap == kernel_pmap) || 642 (frame == (((unsigned) PTDpde) & PG_FRAME))) { 643 return PTmap + index; 644 } 645 newpf = pde & PG_FRAME; 646 if (((*PMAP1) & PG_FRAME) != newpf) { 647 *PMAP1 = newpf | PG_RW | PG_V; 648 invltlb_1pg((vm_offset_t) PADDR1); 649 } 650 return PADDR1 + (index & (NPTEPG - 1)); 651 } 652 return (0); 653} 654 655/* 656 * Routine: pmap_extract 657 * Function: 658 * Extract the physical page address associated 659 * with the given map/virtual_address pair. 660 */ 661vm_offset_t 662pmap_extract(pmap, va) 663 register pmap_t pmap; 664 vm_offset_t va; 665{ 666 vm_offset_t rtval; /* XXX FIXME */ 667 vm_offset_t pdirindex; 668 pdirindex = va >> PDRSHIFT; 669 if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { 670 pt_entry_t *pte; 671 if ((rtval & PG_PS) != 0) { 672 rtval &= ~(NBPDR - 1); 673 rtval |= va & (NBPDR - 1); 674 return rtval; 675 } 676 pte = get_ptbase(pmap) + i386_btop(va); 677 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 678 return rtval; 679 } 680 return 0; 681 682} 683 684/*************************************************** 685 * Low level mapping routines..... 686 ***************************************************/ 687 688/* 689 * add a wired page to the kva 690 * note that in order for the mapping to take effect -- you 691 * should do a invltlb after doing the pmap_kenter... 692 */ 693PMAP_INLINE void 694pmap_kenter(vm_offset_t va, vm_offset_t pa) 695{ 696 pt_entry_t *pte; 697 pt_entry_t npte, opte; 698 699 npte = pa | PG_RW | PG_V | pgeflag; 700 pte = vtopte(va); 701 opte = *pte; 702 *pte = npte; 703 /*if (opte)*/ 704 invltlb_1pg(va); /* XXX what about SMP? */ 705} 706 707/* 708 * remove a page from the kernel pagetables 709 */ 710PMAP_INLINE void 711pmap_kremove(vm_offset_t va) 712{ 713 register pt_entry_t *pte; 714 715 pte = vtopte(va); 716 *pte = 0; 717 invltlb_1pg(va); /* XXX what about SMP? */ 718} 719 720/* 721 * Used to map a range of physical addresses into kernel 722 * virtual address space. 723 * 724 * The value passed in '*virt' is a suggested virtual address for 725 * the mapping. Architectures which can support a direct-mapped 726 * physical to virtual region can return the appropriate address 727 * within that region, leaving '*virt' unchanged. Other 728 * architectures should map the pages starting at '*virt' and 729 * update '*virt' with the first usable address after the mapped 730 * region. 731 */ 732vm_offset_t 733pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 734{ 735 vm_offset_t sva = *virt; 736 vm_offset_t va = sva; 737 while (start < end) { 738 pmap_kenter(va, start); 739 va += PAGE_SIZE; 740 start += PAGE_SIZE; 741 } 742 *virt = va; 743 return (sva); 744} 745 746 747/* 748 * Add a list of wired pages to the kva 749 * this routine is only used for temporary 750 * kernel mappings that do not need to have 751 * page modification or references recorded. 752 * Note that old mappings are simply written 753 * over. The page *must* be wired. 754 */ 755void 756pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 757{ 758 vm_offset_t end_va; 759 760 end_va = va + count * PAGE_SIZE; 761 762 while (va < end_va) { 763 pt_entry_t *pte; 764 765 pte = vtopte(va); 766 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; 767#ifdef SMP 768 cpu_invlpg((void *)va); 769#else 770 invltlb_1pg(va); 771#endif 772 va += PAGE_SIZE; 773 m++; 774 } 775#ifdef SMP 776 smp_invltlb(); 777#endif 778} 779 780/* 781 * this routine jerks page mappings from the 782 * kernel -- it is meant only for temporary mappings. 783 */ 784void 785pmap_qremove(vm_offset_t va, int count) 786{ 787 vm_offset_t end_va; 788 789 end_va = va + count*PAGE_SIZE; 790 791 while (va < end_va) { 792 pt_entry_t *pte; 793 794 pte = vtopte(va); 795 *pte = 0; 796#ifdef SMP 797 cpu_invlpg((void *)va); 798#else 799 invltlb_1pg(va); 800#endif 801 va += PAGE_SIZE; 802 } 803#ifdef SMP 804 smp_invltlb(); 805#endif 806} 807 808static vm_page_t 809pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 810{ 811 vm_page_t m; 812retry: 813 m = vm_page_lookup(object, pindex); 814 if (m && vm_page_sleep_busy(m, FALSE, "pplookp")) 815 goto retry; 816 return m; 817} 818 819/* 820 * Create the Uarea stack for a new process. 821 * This routine directly affects the fork perf for a process. 822 */ 823void 824pmap_new_proc(struct proc *p) 825{ 826#ifdef I386_CPU 827 int updateneeded = 0; 828#endif 829 int i; 830 vm_object_t upobj; 831 vm_offset_t up; 832 vm_page_t m; 833 pt_entry_t *ptek, oldpte; 834 835 /* 836 * allocate object for the upages 837 */ 838 upobj = p->p_upages_obj; 839 if (upobj == NULL) { 840 upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES); 841 p->p_upages_obj = upobj; 842 } 843 844 /* get a kernel virtual address for the U area for this thread */ 845 up = (vm_offset_t)p->p_uarea; 846 if (up == 0) { 847 up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE); 848 if (up == 0) 849 panic("pmap_new_proc: upage allocation failed"); 850 p->p_uarea = (struct user *)up; 851 } 852 853 ptek = vtopte(up); 854 855 for (i = 0; i < UAREA_PAGES; i++) { 856 /* 857 * Get a kernel stack page 858 */ 859 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 860 861 /* 862 * Wire the page 863 */ 864 m->wire_count++; 865 cnt.v_wire_count++; 866 867 oldpte = *(ptek + i); 868 /* 869 * Enter the page into the kernel address space. 870 */ 871 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 872 if (oldpte) { 873#ifdef I386_CPU 874 updateneeded = 1; 875#else 876 invlpg(up + i * PAGE_SIZE); 877#endif 878 } 879 880 vm_page_wakeup(m); 881 vm_page_flag_clear(m, PG_ZERO); 882 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 883 m->valid = VM_PAGE_BITS_ALL; 884 } 885#ifdef I386_CPU 886 if (updateneeded) 887 invltlb(); 888#endif 889} 890 891/* 892 * Dispose the U-Area for a process that has exited. 893 * This routine directly impacts the exit perf of a process. 894 */ 895void 896pmap_dispose_proc(p) 897 struct proc *p; 898{ 899 int i; 900 vm_object_t upobj; 901 vm_offset_t up; 902 vm_page_t m; 903 pt_entry_t *ptek, oldpte; 904 905 upobj = p->p_upages_obj; 906 up = (vm_offset_t)p->p_uarea; 907 ptek = vtopte(up); 908 for (i = 0; i < UAREA_PAGES; i++) { 909 m = vm_page_lookup(upobj, i); 910 if (m == NULL) 911 panic("pmap_dispose_proc: upage already missing?"); 912 vm_page_busy(m); 913 oldpte = *(ptek + i); 914 *(ptek + i) = 0; 915#ifndef I386_CPU 916 invlpg(up + i * PAGE_SIZE); 917#endif 918 vm_page_unwire(m, 0); 919 vm_page_free(m); 920 } 921#ifdef I386_CPU 922 invltlb(); 923#endif 924} 925 926/* 927 * Allow the U_AREA for a process to be prejudicially paged out. 928 */ 929void 930pmap_swapout_proc(p) 931 struct proc *p; 932{ 933 int i; 934 vm_object_t upobj; 935 vm_offset_t up; 936 vm_page_t m; 937 938 upobj = p->p_upages_obj; 939 up = (vm_offset_t)p->p_uarea; 940 for (i = 0; i < UAREA_PAGES; i++) { 941 m = vm_page_lookup(upobj, i); 942 if (m == NULL) 943 panic("pmap_swapout_proc: upage already missing?"); 944 vm_page_dirty(m); 945 vm_page_unwire(m, 0); 946 pmap_kremove(up + i * PAGE_SIZE); 947 } 948} 949 950/* 951 * Bring the U-Area for a specified process back in. 952 */ 953void 954pmap_swapin_proc(p) 955 struct proc *p; 956{ 957 int i, rv; 958 vm_object_t upobj; 959 vm_offset_t up; 960 vm_page_t m; 961 962 upobj = p->p_upages_obj; 963 up = (vm_offset_t)p->p_uarea; 964 for (i = 0; i < UAREA_PAGES; i++) { 965 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 966 pmap_kenter(up + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 967 if (m->valid != VM_PAGE_BITS_ALL) { 968 rv = vm_pager_get_pages(upobj, &m, 1, 0); 969 if (rv != VM_PAGER_OK) 970 panic("pmap_swapin_proc: cannot get upage for proc: %d\n", p->p_pid); 971 m = vm_page_lookup(upobj, i); 972 m->valid = VM_PAGE_BITS_ALL; 973 } 974 vm_page_wire(m); 975 vm_page_wakeup(m); 976 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 977 } 978} 979 980/* 981 * Create the kernel stack (including pcb for i386) for a new thread. 982 * This routine directly affects the fork perf for a process and 983 * create performance for a thread. 984 */ 985void 986pmap_new_thread(struct thread *td) 987{ 988#ifdef I386_CPU 989 int updateneeded = 0; 990#endif 991 int i; 992 vm_object_t ksobj; 993 vm_page_t m; 994 vm_offset_t ks; 995 pt_entry_t *ptek, oldpte; 996 997 /* 998 * allocate object for the kstack 999 */ 1000 ksobj = td->td_kstack_obj; 1001 if (ksobj == NULL) { 1002 ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES); 1003 td->td_kstack_obj = ksobj; 1004 } 1005 1006#ifdef KSTACK_GUARD 1007 /* get a kernel virtual address for the kstack for this thread */ 1008 ks = td->td_kstack; 1009 if (ks == 0) { 1010 ks = kmem_alloc_nofault(kernel_map, 1011 (KSTACK_PAGES + 1) * PAGE_SIZE); 1012 if (ks == 0) 1013 panic("pmap_new_thread: kstack allocation failed"); 1014 ks += PAGE_SIZE; 1015 td->td_kstack = ks; 1016 } 1017 1018 ptek = vtopte(ks - PAGE_SIZE); 1019 oldpte = *ptek; 1020 *ptek = 0; 1021 if (oldpte) { 1022#ifdef I386_CPU 1023 updateneeded = 1; 1024#else 1025 invlpg(ks - PAGE_SIZE); 1026#endif 1027 } 1028 ptek++; 1029#else 1030 /* get a kernel virtual address for the kstack for this thread */ 1031 ks = td->td_kstack; 1032 if (ks == 0) { 1033 ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE); 1034 if (ks == 0) 1035 panic("pmap_new_thread: kstack allocation failed"); 1036 td->td_kstack = ks; 1037 } 1038 ptek = vtopte(ks); 1039#endif 1040 for (i = 0; i < KSTACK_PAGES; i++) { 1041 /* 1042 * Get a kernel stack page 1043 */ 1044 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1045 1046 /* 1047 * Wire the page 1048 */ 1049 m->wire_count++; 1050 cnt.v_wire_count++; 1051 1052 oldpte = *(ptek + i); 1053 /* 1054 * Enter the page into the kernel address space. 1055 */ 1056 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 1057 if (oldpte) { 1058#ifdef I386_CPU 1059 updateneeded = 1; 1060#else 1061 invlpg(ks + i * PAGE_SIZE); 1062#endif 1063 } 1064 1065 vm_page_wakeup(m); 1066 vm_page_flag_clear(m, PG_ZERO); 1067 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1068 m->valid = VM_PAGE_BITS_ALL; 1069 } 1070#ifdef I386_CPU 1071 if (updateneeded) 1072 invltlb(); 1073#endif 1074} 1075 1076/* 1077 * Dispose the kernel stack for a thread that has exited. 1078 * This routine directly impacts the exit perf of a process and thread. 1079 */ 1080void 1081pmap_dispose_thread(td) 1082 struct thread *td; 1083{ 1084 int i; 1085 vm_object_t ksobj; 1086 vm_offset_t ks; 1087 vm_page_t m; 1088 pt_entry_t *ptek, oldpte; 1089 1090 ksobj = td->td_kstack_obj; 1091 ks = td->td_kstack; 1092 ptek = vtopte(ks); 1093 for (i = 0; i < KSTACK_PAGES; i++) { 1094 m = vm_page_lookup(ksobj, i); 1095 if (m == NULL) 1096 panic("pmap_dispose_thread: kstack already missing?"); 1097 vm_page_busy(m); 1098 oldpte = *(ptek + i); 1099 *(ptek + i) = 0; 1100#ifndef I386_CPU 1101 invlpg(ks + i * PAGE_SIZE); 1102#endif 1103 vm_page_unwire(m, 0); 1104 vm_page_free(m); 1105 } 1106#ifdef I386_CPU 1107 invltlb(); 1108#endif 1109} 1110 1111/* 1112 * Allow the Kernel stack for a thread to be prejudicially paged out. 1113 */ 1114void 1115pmap_swapout_thread(td) 1116 struct thread *td; 1117{ 1118 int i; 1119 vm_object_t ksobj; 1120 vm_offset_t ks; 1121 vm_page_t m; 1122 1123 ksobj = td->td_kstack_obj; 1124 ks = td->td_kstack; 1125 for (i = 0; i < KSTACK_PAGES; i++) { 1126 m = vm_page_lookup(ksobj, i); 1127 if (m == NULL) 1128 panic("pmap_swapout_thread: kstack already missing?"); 1129 vm_page_dirty(m); 1130 vm_page_unwire(m, 0); 1131 pmap_kremove(ks + i * PAGE_SIZE); 1132 } 1133} 1134 1135/* 1136 * Bring the kernel stack for a specified thread back in. 1137 */ 1138void 1139pmap_swapin_thread(td) 1140 struct thread *td; 1141{ 1142 int i, rv; 1143 vm_object_t ksobj; 1144 vm_offset_t ks; 1145 vm_page_t m; 1146 1147 ksobj = td->td_kstack_obj; 1148 ks = td->td_kstack; 1149 for (i = 0; i < KSTACK_PAGES; i++) { 1150 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1151 pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 1152 if (m->valid != VM_PAGE_BITS_ALL) { 1153 rv = vm_pager_get_pages(ksobj, &m, 1, 0); 1154 if (rv != VM_PAGER_OK) 1155 panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); 1156 m = vm_page_lookup(ksobj, i); 1157 m->valid = VM_PAGE_BITS_ALL; 1158 } 1159 vm_page_wire(m); 1160 vm_page_wakeup(m); 1161 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1162 } 1163} 1164 1165/*************************************************** 1166 * Page table page management routines..... 1167 ***************************************************/ 1168 1169/* 1170 * This routine unholds page table pages, and if the hold count 1171 * drops to zero, then it decrements the wire count. 1172 */ 1173static int 1174_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1175{ 1176 1177 while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) 1178 ; 1179 1180 if (m->hold_count == 0) { 1181 vm_offset_t pteva; 1182 /* 1183 * unmap the page table page 1184 */ 1185 pmap->pm_pdir[m->pindex] = 0; 1186 --pmap->pm_stats.resident_count; 1187 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1188 (PTDpde & PG_FRAME)) { 1189 /* 1190 * Do a invltlb to make the invalidated mapping 1191 * take effect immediately. 1192 */ 1193 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1194 pmap_invalidate_page(pmap, pteva); 1195 } 1196 1197 if (pmap->pm_ptphint == m) 1198 pmap->pm_ptphint = NULL; 1199 1200 /* 1201 * If the page is finally unwired, simply free it. 1202 */ 1203 --m->wire_count; 1204 if (m->wire_count == 0) { 1205 1206 vm_page_flash(m); 1207 vm_page_busy(m); 1208 vm_page_free_zero(m); 1209 --cnt.v_wire_count; 1210 } 1211 return 1; 1212 } 1213 return 0; 1214} 1215 1216static PMAP_INLINE int 1217pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1218{ 1219 vm_page_unhold(m); 1220 if (m->hold_count == 0) 1221 return _pmap_unwire_pte_hold(pmap, m); 1222 else 1223 return 0; 1224} 1225 1226/* 1227 * After removing a page table entry, this routine is used to 1228 * conditionally free the page, and manage the hold/wire counts. 1229 */ 1230static int 1231pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1232{ 1233 unsigned ptepindex; 1234 if (va >= VM_MAXUSER_ADDRESS) 1235 return 0; 1236 1237 if (mpte == NULL) { 1238 ptepindex = (va >> PDRSHIFT); 1239 if (pmap->pm_ptphint && 1240 (pmap->pm_ptphint->pindex == ptepindex)) { 1241 mpte = pmap->pm_ptphint; 1242 } else { 1243 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1244 pmap->pm_ptphint = mpte; 1245 } 1246 } 1247 1248 return pmap_unwire_pte_hold(pmap, mpte); 1249} 1250 1251void 1252pmap_pinit0(pmap) 1253 struct pmap *pmap; 1254{ 1255 pmap->pm_pdir = 1256 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1257 pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); 1258 pmap->pm_count = 1; 1259 pmap->pm_ptphint = NULL; 1260 pmap->pm_active = 0; 1261 TAILQ_INIT(&pmap->pm_pvlist); 1262 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1263 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1264} 1265 1266/* 1267 * Initialize a preallocated and zeroed pmap structure, 1268 * such as one in a vmspace structure. 1269 */ 1270void 1271pmap_pinit(pmap) 1272 register struct pmap *pmap; 1273{ 1274 vm_page_t ptdpg; 1275 1276 /* 1277 * No need to allocate page table space yet but we do need a valid 1278 * page directory table. 1279 */ 1280 if (pmap->pm_pdir == NULL) 1281 pmap->pm_pdir = 1282 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1283 1284 /* 1285 * allocate object for the ptes 1286 */ 1287 if (pmap->pm_pteobj == NULL) 1288 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); 1289 1290 /* 1291 * allocate the page directory page 1292 */ 1293 ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI, 1294 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1295 1296 ptdpg->wire_count = 1; 1297 ++cnt.v_wire_count; 1298 1299 1300 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ 1301 ptdpg->valid = VM_PAGE_BITS_ALL; 1302 1303 pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); 1304 if ((ptdpg->flags & PG_ZERO) == 0) 1305 bzero(pmap->pm_pdir, PAGE_SIZE); 1306 1307 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1308 /* Wire in kernel global address entries. */ 1309 /* XXX copies current process, does not fill in MPPTDI */ 1310 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 1311#ifdef SMP 1312 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 1313#endif 1314 1315 /* install self-referential address mapping entry */ 1316 pmap->pm_pdir[PTDPTDI] = 1317 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; 1318 1319 pmap->pm_count = 1; 1320 pmap->pm_active = 0; 1321 pmap->pm_ptphint = NULL; 1322 TAILQ_INIT(&pmap->pm_pvlist); 1323 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1324} 1325 1326/* 1327 * Wire in kernel global address entries. To avoid a race condition 1328 * between pmap initialization and pmap_growkernel, this procedure 1329 * should be called after the vmspace is attached to the process 1330 * but before this pmap is activated. 1331 */ 1332void 1333pmap_pinit2(pmap) 1334 struct pmap *pmap; 1335{ 1336 /* XXX: Remove this stub when no longer called */ 1337} 1338 1339static int 1340pmap_release_free_page(pmap_t pmap, vm_page_t p) 1341{ 1342 pd_entry_t *pde = pmap->pm_pdir; 1343 /* 1344 * This code optimizes the case of freeing non-busy 1345 * page-table pages. Those pages are zero now, and 1346 * might as well be placed directly into the zero queue. 1347 */ 1348 if (vm_page_sleep_busy(p, FALSE, "pmaprl")) 1349 return 0; 1350 1351 vm_page_busy(p); 1352 1353 /* 1354 * Remove the page table page from the processes address space. 1355 */ 1356 pde[p->pindex] = 0; 1357 pmap->pm_stats.resident_count--; 1358 1359 if (p->hold_count) { 1360 panic("pmap_release: freeing held page table page"); 1361 } 1362 /* 1363 * Page directory pages need to have the kernel 1364 * stuff cleared, so they can go into the zero queue also. 1365 */ 1366 if (p->pindex == PTDPTDI) { 1367 bzero(pde + KPTDI, nkpt * PTESIZE); 1368#ifdef SMP 1369 pde[MPPTDI] = 0; 1370#endif 1371 pde[APTDPTDI] = 0; 1372 pmap_kremove((vm_offset_t) pmap->pm_pdir); 1373 } 1374 1375 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1376 pmap->pm_ptphint = NULL; 1377 1378 p->wire_count--; 1379 cnt.v_wire_count--; 1380 vm_page_free_zero(p); 1381 return 1; 1382} 1383 1384/* 1385 * this routine is called if the page table page is not 1386 * mapped correctly. 1387 */ 1388static vm_page_t 1389_pmap_allocpte(pmap, ptepindex) 1390 pmap_t pmap; 1391 unsigned ptepindex; 1392{ 1393 vm_offset_t pteva, ptepa; /* XXXPA */ 1394 vm_page_t m; 1395 1396 /* 1397 * Find or fabricate a new pagetable page 1398 */ 1399 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1400 VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1401 1402 KASSERT(m->queue == PQ_NONE, 1403 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1404 1405 if (m->wire_count == 0) 1406 cnt.v_wire_count++; 1407 m->wire_count++; 1408 1409 /* 1410 * Increment the hold count for the page table page 1411 * (denoting a new mapping.) 1412 */ 1413 m->hold_count++; 1414 1415 /* 1416 * Map the pagetable page into the process address space, if 1417 * it isn't already there. 1418 */ 1419 1420 pmap->pm_stats.resident_count++; 1421 1422 ptepa = VM_PAGE_TO_PHYS(m); 1423 pmap->pm_pdir[ptepindex] = 1424 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1425 1426 /* 1427 * Set the page table hint 1428 */ 1429 pmap->pm_ptphint = m; 1430 1431 /* 1432 * Try to use the new mapping, but if we cannot, then 1433 * do it with the routine that maps the page explicitly. 1434 */ 1435 if ((m->flags & PG_ZERO) == 0) { 1436 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1437 (PTDpde & PG_FRAME)) { 1438 pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); 1439 bzero((caddr_t) pteva, PAGE_SIZE); 1440 } else { 1441 pmap_zero_page(ptepa); 1442 } 1443 } 1444 1445 m->valid = VM_PAGE_BITS_ALL; 1446 vm_page_flag_clear(m, PG_ZERO); 1447 vm_page_flag_set(m, PG_MAPPED); 1448 vm_page_wakeup(m); 1449 1450 return m; 1451} 1452 1453static vm_page_t 1454pmap_allocpte(pmap_t pmap, vm_offset_t va) 1455{ 1456 unsigned ptepindex; 1457 pd_entry_t ptepa; 1458 vm_page_t m; 1459 1460 /* 1461 * Calculate pagetable page index 1462 */ 1463 ptepindex = va >> PDRSHIFT; 1464 1465 /* 1466 * Get the page directory entry 1467 */ 1468 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; 1469 1470 /* 1471 * This supports switching from a 4MB page to a 1472 * normal 4K page. 1473 */ 1474 if (ptepa & PG_PS) { 1475 pmap->pm_pdir[ptepindex] = 0; 1476 ptepa = 0; 1477 invltlb(); 1478 } 1479 1480 /* 1481 * If the page table page is mapped, we just increment the 1482 * hold count, and activate it. 1483 */ 1484 if (ptepa) { 1485 /* 1486 * In order to get the page table page, try the 1487 * hint first. 1488 */ 1489 if (pmap->pm_ptphint && 1490 (pmap->pm_ptphint->pindex == ptepindex)) { 1491 m = pmap->pm_ptphint; 1492 } else { 1493 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1494 pmap->pm_ptphint = m; 1495 } 1496 m->hold_count++; 1497 return m; 1498 } 1499 /* 1500 * Here if the pte page isn't mapped, or if it has been deallocated. 1501 */ 1502 return _pmap_allocpte(pmap, ptepindex); 1503} 1504 1505 1506/*************************************************** 1507* Pmap allocation/deallocation routines. 1508 ***************************************************/ 1509 1510/* 1511 * Release any resources held by the given physical map. 1512 * Called when a pmap initialized by pmap_pinit is being released. 1513 * Should only be called if the map contains no valid mappings. 1514 */ 1515void 1516pmap_release(pmap_t pmap) 1517{ 1518 vm_page_t p,n,ptdpg; 1519 vm_object_t object = pmap->pm_pteobj; 1520 int curgeneration; 1521 1522#if defined(DIAGNOSTIC) 1523 if (object->ref_count != 1) 1524 panic("pmap_release: pteobj reference count != 1"); 1525#endif 1526 1527 ptdpg = NULL; 1528 LIST_REMOVE(pmap, pm_list); 1529retry: 1530 curgeneration = object->generation; 1531 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { 1532 n = TAILQ_NEXT(p, listq); 1533 if (p->pindex == PTDPTDI) { 1534 ptdpg = p; 1535 continue; 1536 } 1537 while (1) { 1538 if (!pmap_release_free_page(pmap, p) && 1539 (object->generation != curgeneration)) 1540 goto retry; 1541 } 1542 } 1543 1544 if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) 1545 goto retry; 1546} 1547 1548static int 1549kvm_size(SYSCTL_HANDLER_ARGS) 1550{ 1551 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1552 1553 return sysctl_handle_long(oidp, &ksize, 0, req); 1554} 1555SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1556 0, 0, kvm_size, "IU", "Size of KVM"); 1557 1558static int 1559kvm_free(SYSCTL_HANDLER_ARGS) 1560{ 1561 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1562 1563 return sysctl_handle_long(oidp, &kfree, 0, req); 1564} 1565SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1566 0, 0, kvm_free, "IU", "Amount of KVM free"); 1567 1568/* 1569 * grow the number of kernel page table entries, if needed 1570 */ 1571void 1572pmap_growkernel(vm_offset_t addr) 1573{ 1574 struct pmap *pmap; 1575 int s; 1576 vm_offset_t ptppaddr; 1577 vm_page_t nkpg; 1578 pd_entry_t newpdir; 1579 1580 s = splhigh(); 1581 if (kernel_vm_end == 0) { 1582 kernel_vm_end = KERNBASE; 1583 nkpt = 0; 1584 while (pdir_pde(PTD, kernel_vm_end)) { 1585 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1586 nkpt++; 1587 } 1588 } 1589 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1590 while (kernel_vm_end < addr) { 1591 if (pdir_pde(PTD, kernel_vm_end)) { 1592 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1593 continue; 1594 } 1595 1596 /* 1597 * This index is bogus, but out of the way 1598 */ 1599 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM); 1600 if (!nkpg) 1601 panic("pmap_growkernel: no memory to grow kernel"); 1602 1603 nkpt++; 1604 1605 vm_page_wire(nkpg); 1606 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1607 pmap_zero_page(ptppaddr); 1608 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1609 pdir_pde(PTD, kernel_vm_end) = newpdir; 1610 1611 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1612 *pmap_pde(pmap, kernel_vm_end) = newpdir; 1613 } 1614 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1615 } 1616 splx(s); 1617} 1618 1619/* 1620 * Retire the given physical map from service. 1621 * Should only be called if the map contains 1622 * no valid mappings. 1623 */ 1624void 1625pmap_destroy(pmap_t pmap) 1626{ 1627 int count; 1628 1629 if (pmap == NULL) 1630 return; 1631 1632 count = --pmap->pm_count; 1633 if (count == 0) { 1634 pmap_release(pmap); 1635 panic("destroying a pmap is not yet implemented"); 1636 } 1637} 1638 1639/* 1640 * Add a reference to the specified pmap. 1641 */ 1642void 1643pmap_reference(pmap_t pmap) 1644{ 1645 if (pmap != NULL) { 1646 pmap->pm_count++; 1647 } 1648} 1649 1650/*************************************************** 1651* page management routines. 1652 ***************************************************/ 1653 1654/* 1655 * free the pv_entry back to the free list 1656 */ 1657static PMAP_INLINE void 1658free_pv_entry(pv_entry_t pv) 1659{ 1660 pv_entry_count--; 1661 zfree(pvzone, pv); 1662} 1663 1664/* 1665 * get a new pv_entry, allocating a block from the system 1666 * when needed. 1667 * the memory allocation is performed bypassing the malloc code 1668 * because of the possibility of allocations at interrupt time. 1669 */ 1670static pv_entry_t 1671get_pv_entry(void) 1672{ 1673 pv_entry_count++; 1674 if (pv_entry_high_water && 1675 (pv_entry_count > pv_entry_high_water) && 1676 (pmap_pagedaemon_waken == 0)) { 1677 pmap_pagedaemon_waken = 1; 1678 wakeup (&vm_pages_needed); 1679 } 1680 return zalloc(pvzone); 1681} 1682 1683/* 1684 * This routine is very drastic, but can save the system 1685 * in a pinch. 1686 */ 1687void 1688pmap_collect() 1689{ 1690 int i; 1691 vm_page_t m; 1692 static int warningdone = 0; 1693 1694 if (pmap_pagedaemon_waken == 0) 1695 return; 1696 1697 if (warningdone < 5) { 1698 printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); 1699 warningdone++; 1700 } 1701 1702 for(i = 0; i < vm_page_array_size; i++) { 1703 m = &vm_page_array[i]; 1704 if (m->wire_count || m->hold_count || m->busy || 1705 (m->flags & (PG_BUSY | PG_UNMANAGED))) 1706 continue; 1707 pmap_remove_all(m); 1708 } 1709 pmap_pagedaemon_waken = 0; 1710} 1711 1712 1713/* 1714 * If it is the first entry on the list, it is actually 1715 * in the header and we must copy the following entry up 1716 * to the header. Otherwise we must search the list for 1717 * the entry. In either case we free the now unused entry. 1718 */ 1719 1720static int 1721pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1722{ 1723 pv_entry_t pv; 1724 int rtval; 1725 int s; 1726 1727 s = splvm(); 1728 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1729 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1730 if (pmap == pv->pv_pmap && va == pv->pv_va) 1731 break; 1732 } 1733 } else { 1734 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1735 if (va == pv->pv_va) 1736 break; 1737 } 1738 } 1739 1740 rtval = 0; 1741 if (pv) { 1742 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1743 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1744 m->md.pv_list_count--; 1745 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1746 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1747 1748 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1749 free_pv_entry(pv); 1750 } 1751 1752 splx(s); 1753 return rtval; 1754} 1755 1756/* 1757 * Create a pv entry for page at pa for 1758 * (pmap, va). 1759 */ 1760static void 1761pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1762{ 1763 1764 int s; 1765 pv_entry_t pv; 1766 1767 s = splvm(); 1768 pv = get_pv_entry(); 1769 pv->pv_va = va; 1770 pv->pv_pmap = pmap; 1771 pv->pv_ptem = mpte; 1772 1773 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1774 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1775 m->md.pv_list_count++; 1776 1777 splx(s); 1778} 1779 1780/* 1781 * pmap_remove_pte: do the things to unmap a page in a process 1782 */ 1783static int 1784pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1785{ 1786 pt_entry_t oldpte; 1787 vm_page_t m; 1788 1789 oldpte = atomic_readandclear_int(ptq); 1790 if (oldpte & PG_W) 1791 pmap->pm_stats.wired_count -= 1; 1792 /* 1793 * Machines that don't support invlpg, also don't support 1794 * PG_G. 1795 */ 1796 if (oldpte & PG_G) 1797 invlpg(va); 1798 pmap->pm_stats.resident_count -= 1; 1799 if (oldpte & PG_MANAGED) { 1800 m = PHYS_TO_VM_PAGE(oldpte); 1801 if (oldpte & PG_M) { 1802#if defined(PMAP_DIAGNOSTIC) 1803 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1804 printf( 1805 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1806 va, oldpte); 1807 } 1808#endif 1809 if (pmap_track_modified(va)) 1810 vm_page_dirty(m); 1811 } 1812 if (oldpte & PG_A) 1813 vm_page_flag_set(m, PG_REFERENCED); 1814 return pmap_remove_entry(pmap, m, va); 1815 } else { 1816 return pmap_unuse_pt(pmap, va, NULL); 1817 } 1818 1819 return 0; 1820} 1821 1822/* 1823 * Remove a single page from a process address space 1824 */ 1825static void 1826pmap_remove_page(pmap_t pmap, vm_offset_t va) 1827{ 1828 register pt_entry_t *ptq; 1829 1830 /* 1831 * if there is no pte for this address, just skip it!!! 1832 */ 1833 if (*pmap_pde(pmap, va) == 0) { 1834 return; 1835 } 1836 1837 /* 1838 * get a local va for mappings for this pmap. 1839 */ 1840 ptq = get_ptbase(pmap) + i386_btop(va); 1841 if (*ptq) { 1842 (void) pmap_remove_pte(pmap, ptq, va); 1843 pmap_invalidate_page(pmap, va); 1844 } 1845 return; 1846} 1847 1848/* 1849 * Remove the given range of addresses from the specified map. 1850 * 1851 * It is assumed that the start and end are properly 1852 * rounded to the page size. 1853 */ 1854void 1855pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1856{ 1857 register pt_entry_t *ptbase; 1858 vm_offset_t pdnxt; 1859 pd_entry_t ptpaddr; 1860 vm_offset_t sindex, eindex; 1861 int anyvalid; 1862 1863 if (pmap == NULL) 1864 return; 1865 1866 if (pmap->pm_stats.resident_count == 0) 1867 return; 1868 1869 /* 1870 * special handling of removing one page. a very 1871 * common operation and easy to short circuit some 1872 * code. 1873 */ 1874 if ((sva + PAGE_SIZE == eva) && 1875 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1876 pmap_remove_page(pmap, sva); 1877 return; 1878 } 1879 1880 anyvalid = 0; 1881 1882 /* 1883 * Get a local virtual address for the mappings that are being 1884 * worked with. 1885 */ 1886 ptbase = get_ptbase(pmap); 1887 1888 sindex = i386_btop(sva); 1889 eindex = i386_btop(eva); 1890 1891 for (; sindex < eindex; sindex = pdnxt) { 1892 unsigned pdirindex; 1893 1894 /* 1895 * Calculate index for next page table. 1896 */ 1897 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 1898 if (pmap->pm_stats.resident_count == 0) 1899 break; 1900 1901 pdirindex = sindex / NPDEPG; 1902 ptpaddr = pmap->pm_pdir[pdirindex]; 1903 if ((ptpaddr & PG_PS) != 0) { 1904 pmap->pm_pdir[pdirindex] = 0; 1905 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1906 anyvalid++; 1907 continue; 1908 } 1909 1910 /* 1911 * Weed out invalid mappings. Note: we assume that the page 1912 * directory table is always allocated, and in kernel virtual. 1913 */ 1914 if (ptpaddr == 0) 1915 continue; 1916 1917 /* 1918 * Limit our scan to either the end of the va represented 1919 * by the current page table page, or to the end of the 1920 * range being removed. 1921 */ 1922 if (pdnxt > eindex) { 1923 pdnxt = eindex; 1924 } 1925 1926 for (; sindex != pdnxt; sindex++) { 1927 vm_offset_t va; 1928 if (ptbase[sindex] == 0) { 1929 continue; 1930 } 1931 va = i386_ptob(sindex); 1932 1933 anyvalid++; 1934 if (pmap_remove_pte(pmap, 1935 ptbase + sindex, va)) 1936 break; 1937 } 1938 } 1939 1940 if (anyvalid) 1941 pmap_invalidate_all(pmap); 1942} 1943 1944/* 1945 * Routine: pmap_remove_all 1946 * Function: 1947 * Removes this physical page from 1948 * all physical maps in which it resides. 1949 * Reflects back modify bits to the pager. 1950 * 1951 * Notes: 1952 * Original versions of this routine were very 1953 * inefficient because they iteratively called 1954 * pmap_remove (slow...) 1955 */ 1956 1957static void 1958pmap_remove_all(vm_page_t m) 1959{ 1960 register pv_entry_t pv; 1961 pt_entry_t *pte, tpte; 1962 int s; 1963 1964#if defined(PMAP_DIAGNOSTIC) 1965 /* 1966 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1967 * pages! 1968 */ 1969 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1970 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); 1971 } 1972#endif 1973 1974 s = splvm(); 1975 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1976 pv->pv_pmap->pm_stats.resident_count--; 1977 1978 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 1979 1980 tpte = atomic_readandclear_int(pte); 1981 if (tpte & PG_W) 1982 pv->pv_pmap->pm_stats.wired_count--; 1983 1984 if (tpte & PG_A) 1985 vm_page_flag_set(m, PG_REFERENCED); 1986 1987 /* 1988 * Update the vm_page_t clean and reference bits. 1989 */ 1990 if (tpte & PG_M) { 1991#if defined(PMAP_DIAGNOSTIC) 1992 if (pmap_nw_modified((pt_entry_t) tpte)) { 1993 printf( 1994 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1995 pv->pv_va, tpte); 1996 } 1997#endif 1998 if (pmap_track_modified(pv->pv_va)) 1999 vm_page_dirty(m); 2000 } 2001 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2002 2003 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2004 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2005 m->md.pv_list_count--; 2006 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2007 free_pv_entry(pv); 2008 } 2009 2010 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2011 2012 splx(s); 2013} 2014 2015/* 2016 * Set the physical protection on the 2017 * specified range of this map as requested. 2018 */ 2019void 2020pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2021{ 2022 register pt_entry_t *ptbase; 2023 vm_offset_t pdnxt; 2024 pd_entry_t ptpaddr; 2025 vm_pindex_t sindex, eindex; 2026 int anychanged; 2027 2028 if (pmap == NULL) 2029 return; 2030 2031 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2032 pmap_remove(pmap, sva, eva); 2033 return; 2034 } 2035 2036 if (prot & VM_PROT_WRITE) 2037 return; 2038 2039 anychanged = 0; 2040 2041 ptbase = get_ptbase(pmap); 2042 2043 sindex = i386_btop(sva); 2044 eindex = i386_btop(eva); 2045 2046 for (; sindex < eindex; sindex = pdnxt) { 2047 2048 unsigned pdirindex; 2049 2050 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 2051 2052 pdirindex = sindex / NPDEPG; 2053 ptpaddr = pmap->pm_pdir[pdirindex]; 2054 if ((ptpaddr & PG_PS) != 0) { 2055 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2056 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2057 anychanged++; 2058 continue; 2059 } 2060 2061 /* 2062 * Weed out invalid mappings. Note: we assume that the page 2063 * directory table is always allocated, and in kernel virtual. 2064 */ 2065 if (ptpaddr == 0) 2066 continue; 2067 2068 if (pdnxt > eindex) { 2069 pdnxt = eindex; 2070 } 2071 2072 for (; sindex != pdnxt; sindex++) { 2073 2074 pt_entry_t pbits; 2075 vm_page_t m; 2076 2077 pbits = ptbase[sindex]; 2078 2079 if (pbits & PG_MANAGED) { 2080 m = NULL; 2081 if (pbits & PG_A) { 2082 m = PHYS_TO_VM_PAGE(pbits); 2083 vm_page_flag_set(m, PG_REFERENCED); 2084 pbits &= ~PG_A; 2085 } 2086 if (pbits & PG_M) { 2087 if (pmap_track_modified(i386_ptob(sindex))) { 2088 if (m == NULL) 2089 m = PHYS_TO_VM_PAGE(pbits); 2090 vm_page_dirty(m); 2091 pbits &= ~PG_M; 2092 } 2093 } 2094 } 2095 2096 pbits &= ~PG_RW; 2097 2098 if (pbits != ptbase[sindex]) { 2099 ptbase[sindex] = pbits; 2100 anychanged = 1; 2101 } 2102 } 2103 } 2104 if (anychanged) 2105 pmap_invalidate_all(pmap); 2106} 2107 2108/* 2109 * Insert the given physical page (p) at 2110 * the specified virtual address (v) in the 2111 * target physical map with the protection requested. 2112 * 2113 * If specified, the page will be wired down, meaning 2114 * that the related pte can not be reclaimed. 2115 * 2116 * NB: This is the only routine which MAY NOT lazy-evaluate 2117 * or lose information. That is, this routine must actually 2118 * insert this page into the given map NOW. 2119 */ 2120void 2121pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2122 boolean_t wired) 2123{ 2124 vm_offset_t pa; 2125 register pt_entry_t *pte; 2126 vm_offset_t opa; 2127 pt_entry_t origpte, newpte; 2128 vm_page_t mpte; 2129 2130 if (pmap == NULL) 2131 return; 2132 2133 va &= PG_FRAME; 2134#ifdef PMAP_DIAGNOSTIC 2135 if (va > VM_MAX_KERNEL_ADDRESS) 2136 panic("pmap_enter: toobig"); 2137 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2138 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2139#endif 2140 2141 mpte = NULL; 2142 /* 2143 * In the case that a page table page is not 2144 * resident, we are creating it here. 2145 */ 2146 if (va < VM_MAXUSER_ADDRESS) { 2147 mpte = pmap_allocpte(pmap, va); 2148 } 2149#if 0 && defined(PMAP_DIAGNOSTIC) 2150 else { 2151 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2152 if (((origpte = *pdeaddr) & PG_V) == 0) { 2153 panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 2154 pmap->pm_pdir[PTDPTDI], origpte, va); 2155 } 2156 } 2157#endif 2158 2159 pte = pmap_pte(pmap, va); 2160 2161 /* 2162 * Page Directory table entry not valid, we need a new PT page 2163 */ 2164 if (pte == NULL) { 2165 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 2166 (void *)pmap->pm_pdir[PTDPTDI], va); 2167 } 2168 2169 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 2170 origpte = *(vm_offset_t *)pte; 2171 opa = origpte & PG_FRAME; 2172 2173 if (origpte & PG_PS) 2174 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2175 2176 /* 2177 * Mapping has not changed, must be protection or wiring change. 2178 */ 2179 if (origpte && (opa == pa)) { 2180 /* 2181 * Wiring change, just update stats. We don't worry about 2182 * wiring PT pages as they remain resident as long as there 2183 * are valid mappings in them. Hence, if a user page is wired, 2184 * the PT page will be also. 2185 */ 2186 if (wired && ((origpte & PG_W) == 0)) 2187 pmap->pm_stats.wired_count++; 2188 else if (!wired && (origpte & PG_W)) 2189 pmap->pm_stats.wired_count--; 2190 2191#if defined(PMAP_DIAGNOSTIC) 2192 if (pmap_nw_modified((pt_entry_t) origpte)) { 2193 printf( 2194 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 2195 va, origpte); 2196 } 2197#endif 2198 2199 /* 2200 * Remove extra pte reference 2201 */ 2202 if (mpte) 2203 mpte->hold_count--; 2204 2205 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 2206 if ((origpte & PG_RW) == 0) { 2207 *pte |= PG_RW; 2208#ifdef SMP 2209 cpu_invlpg((void *)va); 2210 if (pmap->pm_active & PCPU_GET(other_cpus)) 2211 smp_invltlb(); 2212#else 2213 invltlb_1pg(va); 2214#endif 2215 } 2216 return; 2217 } 2218 2219 /* 2220 * We might be turning off write access to the page, 2221 * so we go ahead and sense modify status. 2222 */ 2223 if (origpte & PG_MANAGED) { 2224 if ((origpte & PG_M) && pmap_track_modified(va)) { 2225 vm_page_t om; 2226 om = PHYS_TO_VM_PAGE(opa); 2227 vm_page_dirty(om); 2228 } 2229 pa |= PG_MANAGED; 2230 } 2231 goto validate; 2232 } 2233 /* 2234 * Mapping has changed, invalidate old range and fall through to 2235 * handle validating new mapping. 2236 */ 2237 if (opa) { 2238 int err; 2239 err = pmap_remove_pte(pmap, pte, va); 2240 if (err) 2241 panic("pmap_enter: pte vanished, va: 0x%x", va); 2242 } 2243 2244 /* 2245 * Enter on the PV list if part of our managed memory. Note that we 2246 * raise IPL while manipulating pv_table since pmap_enter can be 2247 * called at interrupt time. 2248 */ 2249 if (pmap_initialized && 2250 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2251 pmap_insert_entry(pmap, va, mpte, m); 2252 pa |= PG_MANAGED; 2253 } 2254 2255 /* 2256 * Increment counters 2257 */ 2258 pmap->pm_stats.resident_count++; 2259 if (wired) 2260 pmap->pm_stats.wired_count++; 2261 2262validate: 2263 /* 2264 * Now validate mapping with desired protection/wiring. 2265 */ 2266 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); 2267 2268 if (wired) 2269 newpte |= PG_W; 2270 if (va < VM_MAXUSER_ADDRESS) 2271 newpte |= PG_U; 2272 if (pmap == kernel_pmap) 2273 newpte |= pgeflag; 2274 2275 /* 2276 * if the mapping or permission bits are different, we need 2277 * to update the pte. 2278 */ 2279 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2280 *pte = newpte | PG_A; 2281 /*if (origpte)*/ { 2282#ifdef SMP 2283 cpu_invlpg((void *)va); 2284 if (pmap->pm_active & PCPU_GET(other_cpus)) 2285 smp_invltlb(); 2286#else 2287 invltlb_1pg(va); 2288#endif 2289 } 2290 } 2291} 2292 2293/* 2294 * this code makes some *MAJOR* assumptions: 2295 * 1. Current pmap & pmap exists. 2296 * 2. Not wired. 2297 * 3. Read access. 2298 * 4. No page table pages. 2299 * 5. Tlbflush is deferred to calling procedure. 2300 * 6. Page IS managed. 2301 * but is *MUCH* faster than pmap_enter... 2302 */ 2303 2304static vm_page_t 2305pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2306{ 2307 pt_entry_t *pte; 2308 vm_offset_t pa; 2309 2310 /* 2311 * In the case that a page table page is not 2312 * resident, we are creating it here. 2313 */ 2314 if (va < VM_MAXUSER_ADDRESS) { 2315 unsigned ptepindex; 2316 pd_entry_t ptepa; 2317 2318 /* 2319 * Calculate pagetable page index 2320 */ 2321 ptepindex = va >> PDRSHIFT; 2322 if (mpte && (mpte->pindex == ptepindex)) { 2323 mpte->hold_count++; 2324 } else { 2325retry: 2326 /* 2327 * Get the page directory entry 2328 */ 2329 ptepa = pmap->pm_pdir[ptepindex]; 2330 2331 /* 2332 * If the page table page is mapped, we just increment 2333 * the hold count, and activate it. 2334 */ 2335 if (ptepa) { 2336 if (ptepa & PG_PS) 2337 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2338 if (pmap->pm_ptphint && 2339 (pmap->pm_ptphint->pindex == ptepindex)) { 2340 mpte = pmap->pm_ptphint; 2341 } else { 2342 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 2343 pmap->pm_ptphint = mpte; 2344 } 2345 if (mpte == NULL) 2346 goto retry; 2347 mpte->hold_count++; 2348 } else { 2349 mpte = _pmap_allocpte(pmap, ptepindex); 2350 } 2351 } 2352 } else { 2353 mpte = NULL; 2354 } 2355 2356 /* 2357 * This call to vtopte makes the assumption that we are 2358 * entering the page into the current pmap. In order to support 2359 * quick entry into any pmap, one would likely use pmap_pte_quick. 2360 * But that isn't as quick as vtopte. 2361 */ 2362 pte = vtopte(va); 2363 if (*pte) { 2364 if (mpte) 2365 pmap_unwire_pte_hold(pmap, mpte); 2366 return 0; 2367 } 2368 2369 /* 2370 * Enter on the PV list if part of our managed memory. Note that we 2371 * raise IPL while manipulating pv_table since pmap_enter can be 2372 * called at interrupt time. 2373 */ 2374 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2375 pmap_insert_entry(pmap, va, mpte, m); 2376 2377 /* 2378 * Increment counters 2379 */ 2380 pmap->pm_stats.resident_count++; 2381 2382 pa = VM_PAGE_TO_PHYS(m); 2383 2384 /* 2385 * Now validate mapping with RO protection 2386 */ 2387 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2388 *pte = pa | PG_V | PG_U; 2389 else 2390 *pte = pa | PG_V | PG_U | PG_MANAGED; 2391 2392 return mpte; 2393} 2394 2395/* 2396 * Make a temporary mapping for a physical address. This is only intended 2397 * to be used for panic dumps. 2398 */ 2399void * 2400pmap_kenter_temporary(vm_offset_t pa, int i) 2401{ 2402 pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); 2403 return ((void *)crashdumpmap); 2404} 2405 2406#define MAX_INIT_PT (96) 2407/* 2408 * pmap_object_init_pt preloads the ptes for a given object 2409 * into the specified pmap. This eliminates the blast of soft 2410 * faults on process startup and immediately after an mmap. 2411 */ 2412void 2413pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2414 vm_object_t object, vm_pindex_t pindex, 2415 vm_size_t size, int limit) 2416{ 2417 vm_offset_t tmpidx; 2418 int psize; 2419 vm_page_t p, mpte; 2420 int objpgs; 2421 2422 if (pmap == NULL || object == NULL) 2423 return; 2424 2425 /* 2426 * This code maps large physical mmap regions into the 2427 * processor address space. Note that some shortcuts 2428 * are taken, but the code works. 2429 */ 2430 if (pseflag && (object->type == OBJT_DEVICE) && 2431 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2432 int i; 2433 vm_page_t m[1]; 2434 unsigned int ptepindex; 2435 int npdes; 2436 pd_entry_t ptepa; 2437 2438 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2439 return; 2440 2441retry: 2442 p = vm_page_lookup(object, pindex); 2443 if (p && vm_page_sleep_busy(p, FALSE, "init4p")) 2444 goto retry; 2445 2446 if (p == NULL) { 2447 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2448 if (p == NULL) 2449 return; 2450 m[0] = p; 2451 2452 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2453 vm_page_free(p); 2454 return; 2455 } 2456 2457 p = vm_page_lookup(object, pindex); 2458 vm_page_wakeup(p); 2459 } 2460 2461 ptepa = VM_PAGE_TO_PHYS(p); 2462 if (ptepa & (NBPDR - 1)) { 2463 return; 2464 } 2465 2466 p->valid = VM_PAGE_BITS_ALL; 2467 2468 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2469 npdes = size >> PDRSHIFT; 2470 for(i = 0; i < npdes; i++) { 2471 pmap->pm_pdir[ptepindex] = 2472 ptepa | PG_U | PG_RW | PG_V | PG_PS; 2473 ptepa += NBPDR; 2474 ptepindex += 1; 2475 } 2476 vm_page_flag_set(p, PG_MAPPED); 2477 invltlb(); 2478 return; 2479 } 2480 2481 psize = i386_btop(size); 2482 2483 if ((object->type != OBJT_VNODE) || 2484 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2485 (object->resident_page_count > MAX_INIT_PT))) { 2486 return; 2487 } 2488 2489 if (psize + pindex > object->size) { 2490 if (object->size < pindex) 2491 return; 2492 psize = object->size - pindex; 2493 } 2494 2495 mpte = NULL; 2496 /* 2497 * if we are processing a major portion of the object, then scan the 2498 * entire thing. 2499 */ 2500 if (psize > (object->resident_page_count >> 2)) { 2501 objpgs = psize; 2502 2503 for (p = TAILQ_FIRST(&object->memq); 2504 ((objpgs > 0) && (p != NULL)); 2505 p = TAILQ_NEXT(p, listq)) { 2506 2507 tmpidx = p->pindex; 2508 if (tmpidx < pindex) { 2509 continue; 2510 } 2511 tmpidx -= pindex; 2512 if (tmpidx >= psize) { 2513 continue; 2514 } 2515 /* 2516 * don't allow an madvise to blow away our really 2517 * free pages allocating pv entries. 2518 */ 2519 if ((limit & MAP_PREFAULT_MADVISE) && 2520 cnt.v_free_count < cnt.v_free_reserved) { 2521 break; 2522 } 2523 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2524 (p->busy == 0) && 2525 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2526 if ((p->queue - p->pc) == PQ_CACHE) 2527 vm_page_deactivate(p); 2528 vm_page_busy(p); 2529 mpte = pmap_enter_quick(pmap, 2530 addr + i386_ptob(tmpidx), p, mpte); 2531 vm_page_flag_set(p, PG_MAPPED); 2532 vm_page_wakeup(p); 2533 } 2534 objpgs -= 1; 2535 } 2536 } else { 2537 /* 2538 * else lookup the pages one-by-one. 2539 */ 2540 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 2541 /* 2542 * don't allow an madvise to blow away our really 2543 * free pages allocating pv entries. 2544 */ 2545 if ((limit & MAP_PREFAULT_MADVISE) && 2546 cnt.v_free_count < cnt.v_free_reserved) { 2547 break; 2548 } 2549 p = vm_page_lookup(object, tmpidx + pindex); 2550 if (p && 2551 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2552 (p->busy == 0) && 2553 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2554 if ((p->queue - p->pc) == PQ_CACHE) 2555 vm_page_deactivate(p); 2556 vm_page_busy(p); 2557 mpte = pmap_enter_quick(pmap, 2558 addr + i386_ptob(tmpidx), p, mpte); 2559 vm_page_flag_set(p, PG_MAPPED); 2560 vm_page_wakeup(p); 2561 } 2562 } 2563 } 2564 return; 2565} 2566 2567/* 2568 * pmap_prefault provides a quick way of clustering 2569 * pagefaults into a processes address space. It is a "cousin" 2570 * of pmap_object_init_pt, except it runs at page fault time instead 2571 * of mmap time. 2572 */ 2573#define PFBAK 4 2574#define PFFOR 4 2575#define PAGEORDER_SIZE (PFBAK+PFFOR) 2576 2577static int pmap_prefault_pageorder[] = { 2578 -PAGE_SIZE, PAGE_SIZE, 2579 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2580 -3 * PAGE_SIZE, 3 * PAGE_SIZE 2581 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2582}; 2583 2584void 2585pmap_prefault(pmap, addra, entry) 2586 pmap_t pmap; 2587 vm_offset_t addra; 2588 vm_map_entry_t entry; 2589{ 2590 int i; 2591 vm_offset_t starta; 2592 vm_offset_t addr; 2593 vm_pindex_t pindex; 2594 vm_page_t m, mpte; 2595 vm_object_t object; 2596 2597 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2598 return; 2599 2600 object = entry->object.vm_object; 2601 2602 starta = addra - PFBAK * PAGE_SIZE; 2603 if (starta < entry->start) { 2604 starta = entry->start; 2605 } else if (starta > addra) { 2606 starta = 0; 2607 } 2608 2609 mpte = NULL; 2610 for (i = 0; i < PAGEORDER_SIZE; i++) { 2611 vm_object_t lobject; 2612 pt_entry_t *pte; 2613 2614 addr = addra + pmap_prefault_pageorder[i]; 2615 if (addr > addra + (PFFOR * PAGE_SIZE)) 2616 addr = 0; 2617 2618 if (addr < starta || addr >= entry->end) 2619 continue; 2620 2621 if ((*pmap_pde(pmap, addr)) == NULL) 2622 continue; 2623 2624 pte = vtopte(addr); 2625 if (*pte) 2626 continue; 2627 2628 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2629 lobject = object; 2630 for (m = vm_page_lookup(lobject, pindex); 2631 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2632 lobject = lobject->backing_object) { 2633 if (lobject->backing_object_offset & PAGE_MASK) 2634 break; 2635 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2636 m = vm_page_lookup(lobject->backing_object, pindex); 2637 } 2638 2639 /* 2640 * give-up when a page is not in memory 2641 */ 2642 if (m == NULL) 2643 break; 2644 2645 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2646 (m->busy == 0) && 2647 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2648 2649 if ((m->queue - m->pc) == PQ_CACHE) { 2650 vm_page_deactivate(m); 2651 } 2652 vm_page_busy(m); 2653 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2654 vm_page_flag_set(m, PG_MAPPED); 2655 vm_page_wakeup(m); 2656 } 2657 } 2658} 2659 2660/* 2661 * Routine: pmap_change_wiring 2662 * Function: Change the wiring attribute for a map/virtual-address 2663 * pair. 2664 * In/out conditions: 2665 * The mapping must already exist in the pmap. 2666 */ 2667void 2668pmap_change_wiring(pmap, va, wired) 2669 register pmap_t pmap; 2670 vm_offset_t va; 2671 boolean_t wired; 2672{ 2673 register pt_entry_t *pte; 2674 2675 if (pmap == NULL) 2676 return; 2677 2678 pte = pmap_pte(pmap, va); 2679 2680 if (wired && !pmap_pte_w(pte)) 2681 pmap->pm_stats.wired_count++; 2682 else if (!wired && pmap_pte_w(pte)) 2683 pmap->pm_stats.wired_count--; 2684 2685 /* 2686 * Wiring is not a hardware characteristic so there is no need to 2687 * invalidate TLB. 2688 */ 2689 pmap_pte_set_w(pte, wired); 2690} 2691 2692 2693 2694/* 2695 * Copy the range specified by src_addr/len 2696 * from the source map to the range dst_addr/len 2697 * in the destination map. 2698 * 2699 * This routine is only advisory and need not do anything. 2700 */ 2701 2702void 2703pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2704 vm_offset_t src_addr) 2705{ 2706 vm_offset_t addr; 2707 vm_offset_t end_addr = src_addr + len; 2708 vm_offset_t pdnxt; 2709 pd_entry_t src_frame, dst_frame; 2710 vm_page_t m; 2711 pd_entry_t saved_pde; 2712 2713 if (dst_addr != src_addr) 2714 return; 2715 2716 src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2717 if (src_frame != (PTDpde & PG_FRAME)) 2718 return; 2719 2720 dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2721 if (dst_frame != (APTDpde & PG_FRAME)) { 2722 APTDpde = dst_frame | PG_RW | PG_V; 2723#if defined(SMP) 2724 /* The page directory is not shared between CPUs */ 2725 cpu_invltlb(); 2726#else 2727 invltlb(); 2728#endif 2729 } 2730 saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); 2731 for(addr = src_addr; addr < end_addr; addr = pdnxt) { 2732 pt_entry_t *src_pte, *dst_pte; 2733 vm_page_t dstmpte, srcmpte; 2734 pd_entry_t srcptepaddr; 2735 unsigned ptepindex; 2736 2737 if (addr >= UPT_MIN_ADDRESS) 2738 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2739 2740 /* 2741 * Don't let optional prefaulting of pages make us go 2742 * way below the low water mark of free pages or way 2743 * above high water mark of used pv entries. 2744 */ 2745 if (cnt.v_free_count < cnt.v_free_reserved || 2746 pv_entry_count > pv_entry_high_water) 2747 break; 2748 2749 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); 2750 ptepindex = addr >> PDRSHIFT; 2751 2752 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2753 if (srcptepaddr == 0) 2754 continue; 2755 2756 if (srcptepaddr & PG_PS) { 2757 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2758 dst_pmap->pm_pdir[ptepindex] = srcptepaddr; 2759 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 2760 } 2761 continue; 2762 } 2763 2764 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2765 if ((srcmpte == NULL) || 2766 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2767 continue; 2768 2769 if (pdnxt > end_addr) 2770 pdnxt = end_addr; 2771 2772 src_pte = vtopte(addr); 2773 dst_pte = avtopte(addr); 2774 while (addr < pdnxt) { 2775 pt_entry_t ptetemp; 2776 ptetemp = *src_pte; 2777 /* 2778 * we only virtual copy managed pages 2779 */ 2780 if ((ptetemp & PG_MANAGED) != 0) { 2781 /* 2782 * We have to check after allocpte for the 2783 * pte still being around... allocpte can 2784 * block. 2785 */ 2786 dstmpte = pmap_allocpte(dst_pmap, addr); 2787 if ((APTDpde & PG_FRAME) != 2788 (saved_pde & PG_FRAME)) { 2789 APTDpde = saved_pde; 2790printf ("IT HAPPENNED!"); 2791#if defined(SMP) 2792 cpu_invltlb(); 2793#else 2794 invltlb(); 2795#endif 2796 } 2797 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2798 /* 2799 * Clear the modified and 2800 * accessed (referenced) bits 2801 * during the copy. 2802 */ 2803 m = PHYS_TO_VM_PAGE(ptetemp); 2804 *dst_pte = ptetemp & ~(PG_M | PG_A); 2805 dst_pmap->pm_stats.resident_count++; 2806 pmap_insert_entry(dst_pmap, addr, 2807 dstmpte, m); 2808 } else { 2809 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2810 } 2811 if (dstmpte->hold_count >= srcmpte->hold_count) 2812 break; 2813 } 2814 addr += PAGE_SIZE; 2815 src_pte++; 2816 dst_pte++; 2817 } 2818 } 2819} 2820 2821/* 2822 * Routine: pmap_kernel 2823 * Function: 2824 * Returns the physical map handle for the kernel. 2825 */ 2826pmap_t 2827pmap_kernel() 2828{ 2829 return (kernel_pmap); 2830} 2831 2832/* 2833 * pmap_zero_page zeros the specified hardware page by mapping 2834 * the page into KVM and using bzero to clear its contents. 2835 */ 2836void 2837pmap_zero_page(vm_offset_t phys) 2838{ 2839 2840 if (*CMAP2) 2841 panic("pmap_zero_page: CMAP2 busy"); 2842 2843 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2844 invltlb_1pg((vm_offset_t)CADDR2); 2845 2846#if defined(I686_CPU) 2847 if (cpu_class == CPUCLASS_686) 2848 i686_pagezero(CADDR2); 2849 else 2850#endif 2851 bzero(CADDR2, PAGE_SIZE); 2852 *CMAP2 = 0; 2853} 2854 2855/* 2856 * pmap_zero_page_area zeros the specified hardware page by mapping 2857 * the page into KVM and using bzero to clear its contents. 2858 * 2859 * off and size may not cover an area beyond a single hardware page. 2860 */ 2861void 2862pmap_zero_page_area(vm_offset_t phys, int off, int size) 2863{ 2864 2865 if (*CMAP2) 2866 panic("pmap_zero_page: CMAP2 busy"); 2867 2868 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2869 invltlb_1pg((vm_offset_t)CADDR2); 2870 2871#if defined(I686_CPU) 2872 if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) 2873 i686_pagezero(CADDR2); 2874 else 2875#endif 2876 bzero((char *)CADDR2 + off, size); 2877 *CMAP2 = 0; 2878} 2879 2880/* 2881 * pmap_copy_page copies the specified (machine independent) 2882 * page by mapping the page into virtual memory and using 2883 * bcopy to copy the page, one machine dependent page at a 2884 * time. 2885 */ 2886void 2887pmap_copy_page(vm_offset_t src, vm_offset_t dst) 2888{ 2889 2890 if (*CMAP1) 2891 panic("pmap_copy_page: CMAP1 busy"); 2892 if (*CMAP2) 2893 panic("pmap_copy_page: CMAP2 busy"); 2894 2895 *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; 2896 *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; 2897#ifdef I386_CPU 2898 invltlb(); 2899#else 2900 invlpg((u_int)CADDR1); 2901 invlpg((u_int)CADDR2); 2902#endif 2903 2904 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2905 2906 *CMAP1 = 0; 2907 *CMAP2 = 0; 2908} 2909 2910 2911/* 2912 * Routine: pmap_pageable 2913 * Function: 2914 * Make the specified pages (by pmap, offset) 2915 * pageable (or not) as requested. 2916 * 2917 * A page which is not pageable may not take 2918 * a fault; therefore, its page table entry 2919 * must remain valid for the duration. 2920 * 2921 * This routine is merely advisory; pmap_enter 2922 * will specify that these pages are to be wired 2923 * down (or not) as appropriate. 2924 */ 2925void 2926pmap_pageable(pmap, sva, eva, pageable) 2927 pmap_t pmap; 2928 vm_offset_t sva, eva; 2929 boolean_t pageable; 2930{ 2931} 2932 2933/* 2934 * this routine returns true if a physical page resides 2935 * in the given pmap. 2936 */ 2937boolean_t 2938pmap_page_exists(pmap, m) 2939 pmap_t pmap; 2940 vm_page_t m; 2941{ 2942 register pv_entry_t pv; 2943 int s; 2944 2945 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2946 return FALSE; 2947 2948 s = splvm(); 2949 2950 /* 2951 * Not found, check current mappings returning immediately if found. 2952 */ 2953 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2954 if (pv->pv_pmap == pmap) { 2955 splx(s); 2956 return TRUE; 2957 } 2958 } 2959 splx(s); 2960 return (FALSE); 2961} 2962 2963#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2964/* 2965 * Remove all pages from specified address space 2966 * this aids process exit speeds. Also, this code 2967 * is special cased for current process only, but 2968 * can have the more generic (and slightly slower) 2969 * mode enabled. This is much faster than pmap_remove 2970 * in the case of running down an entire address space. 2971 */ 2972void 2973pmap_remove_pages(pmap, sva, eva) 2974 pmap_t pmap; 2975 vm_offset_t sva, eva; 2976{ 2977 pt_entry_t *pte, tpte; 2978 vm_page_t m; 2979 pv_entry_t pv, npv; 2980 int s; 2981 2982#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2983 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2984 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2985 return; 2986 } 2987#endif 2988 2989 s = splvm(); 2990 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2991 2992 if (pv->pv_va >= eva || pv->pv_va < sva) { 2993 npv = TAILQ_NEXT(pv, pv_plist); 2994 continue; 2995 } 2996 2997#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2998 pte = vtopte(pv->pv_va); 2999#else 3000 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3001#endif 3002 tpte = *pte; 3003 3004 if (tpte == 0) { 3005 printf("TPTE at %p IS ZERO @ VA %08x\n", 3006 pte, pv->pv_va); 3007 panic("bad pte"); 3008 } 3009 3010/* 3011 * We cannot remove wired pages from a process' mapping at this time 3012 */ 3013 if (tpte & PG_W) { 3014 npv = TAILQ_NEXT(pv, pv_plist); 3015 continue; 3016 } 3017 3018 m = PHYS_TO_VM_PAGE(tpte); 3019 KASSERT(m->phys_addr == (tpte & PG_FRAME), 3020 ("vm_page_t %p phys_addr mismatch %08x %08x", 3021 m, m->phys_addr, tpte)); 3022 3023 KASSERT(m < &vm_page_array[vm_page_array_size], 3024 ("pmap_remove_pages: bad tpte %x", tpte)); 3025 3026 pv->pv_pmap->pm_stats.resident_count--; 3027 3028 *pte = 0; 3029 3030 /* 3031 * Update the vm_page_t clean and reference bits. 3032 */ 3033 if (tpte & PG_M) { 3034 vm_page_dirty(m); 3035 } 3036 3037 npv = TAILQ_NEXT(pv, pv_plist); 3038 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 3039 3040 m->md.pv_list_count--; 3041 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3042 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 3043 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 3044 } 3045 3046 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 3047 free_pv_entry(pv); 3048 } 3049 splx(s); 3050 pmap_invalidate_all(pmap); 3051} 3052 3053/* 3054 * pmap_testbit tests bits in pte's 3055 * note that the testbit/changebit routines are inline, 3056 * and a lot of things compile-time evaluate. 3057 */ 3058static boolean_t 3059pmap_testbit(m, bit) 3060 vm_page_t m; 3061 int bit; 3062{ 3063 pv_entry_t pv; 3064 pt_entry_t *pte; 3065 int s; 3066 3067 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3068 return FALSE; 3069 3070 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3071 return FALSE; 3072 3073 s = splvm(); 3074 3075 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3076 /* 3077 * if the bit being tested is the modified bit, then 3078 * mark clean_map and ptes as never 3079 * modified. 3080 */ 3081 if (bit & (PG_A|PG_M)) { 3082 if (!pmap_track_modified(pv->pv_va)) 3083 continue; 3084 } 3085 3086#if defined(PMAP_DIAGNOSTIC) 3087 if (!pv->pv_pmap) { 3088 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 3089 continue; 3090 } 3091#endif 3092 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3093 if (*pte & bit) { 3094 splx(s); 3095 return TRUE; 3096 } 3097 } 3098 splx(s); 3099 return (FALSE); 3100} 3101 3102/* 3103 * this routine is used to modify bits in ptes 3104 */ 3105static __inline void 3106pmap_changebit(vm_page_t m, int bit, boolean_t setem) 3107{ 3108 register pv_entry_t pv; 3109 register pt_entry_t *pte; 3110 int s; 3111 3112 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3113 return; 3114 3115 s = splvm(); 3116 3117 /* 3118 * Loop over all current mappings setting/clearing as appropos If 3119 * setting RO do we need to clear the VAC? 3120 */ 3121 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3122 /* 3123 * don't write protect pager mappings 3124 */ 3125 if (!setem && (bit == PG_RW)) { 3126 if (!pmap_track_modified(pv->pv_va)) 3127 continue; 3128 } 3129 3130#if defined(PMAP_DIAGNOSTIC) 3131 if (!pv->pv_pmap) { 3132 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 3133 continue; 3134 } 3135#endif 3136 3137 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3138 3139 if (setem) { 3140 *pte |= bit; 3141 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3142 } else { 3143 pt_entry_t pbits = *pte; 3144 if (pbits & bit) { 3145 if (bit == PG_RW) { 3146 if (pbits & PG_M) { 3147 vm_page_dirty(m); 3148 } 3149 *pte = pbits & ~(PG_M|PG_RW); 3150 } else { 3151 *pte = pbits & ~bit; 3152 } 3153 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3154 } 3155 } 3156 } 3157 splx(s); 3158} 3159 3160/* 3161 * pmap_page_protect: 3162 * 3163 * Lower the permission for all mappings to a given page. 3164 */ 3165void 3166pmap_page_protect(vm_page_t m, vm_prot_t prot) 3167{ 3168 if ((prot & VM_PROT_WRITE) == 0) { 3169 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3170 pmap_changebit(m, PG_RW, FALSE); 3171 } else { 3172 pmap_remove_all(m); 3173 } 3174 } 3175} 3176 3177vm_offset_t 3178pmap_phys_address(ppn) 3179 int ppn; 3180{ 3181 return (i386_ptob(ppn)); 3182} 3183 3184/* 3185 * pmap_ts_referenced: 3186 * 3187 * Return the count of reference bits for a page, clearing all of them. 3188 */ 3189int 3190pmap_ts_referenced(vm_page_t m) 3191{ 3192 register pv_entry_t pv, pvf, pvn; 3193 pt_entry_t *pte; 3194 int s; 3195 int rtval = 0; 3196 3197 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3198 return (rtval); 3199 3200 s = splvm(); 3201 3202 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3203 3204 pvf = pv; 3205 3206 do { 3207 pvn = TAILQ_NEXT(pv, pv_list); 3208 3209 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3210 3211 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3212 3213 if (!pmap_track_modified(pv->pv_va)) 3214 continue; 3215 3216 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3217 3218 if (pte && (*pte & PG_A)) { 3219 *pte &= ~PG_A; 3220 3221 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3222 3223 rtval++; 3224 if (rtval > 4) { 3225 break; 3226 } 3227 } 3228 } while ((pv = pvn) != NULL && pv != pvf); 3229 } 3230 splx(s); 3231 3232 return (rtval); 3233} 3234 3235/* 3236 * pmap_is_modified: 3237 * 3238 * Return whether or not the specified physical page was modified 3239 * in any physical maps. 3240 */ 3241boolean_t 3242pmap_is_modified(vm_page_t m) 3243{ 3244 return pmap_testbit(m, PG_M); 3245} 3246 3247/* 3248 * Clear the modify bits on the specified physical page. 3249 */ 3250void 3251pmap_clear_modify(vm_page_t m) 3252{ 3253 pmap_changebit(m, PG_M, FALSE); 3254} 3255 3256/* 3257 * pmap_clear_reference: 3258 * 3259 * Clear the reference bit on the specified physical page. 3260 */ 3261void 3262pmap_clear_reference(vm_page_t m) 3263{ 3264 pmap_changebit(m, PG_A, FALSE); 3265} 3266 3267/* 3268 * Miscellaneous support routines follow 3269 */ 3270 3271static void 3272i386_protection_init() 3273{ 3274 register int *kp, prot; 3275 3276 kp = protection_codes; 3277 for (prot = 0; prot < 8; prot++) { 3278 switch (prot) { 3279 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 3280 /* 3281 * Read access is also 0. There isn't any execute bit, 3282 * so just make it readable. 3283 */ 3284 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 3285 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 3286 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 3287 *kp++ = 0; 3288 break; 3289 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 3290 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 3291 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 3292 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 3293 *kp++ = PG_RW; 3294 break; 3295 } 3296 } 3297} 3298 3299/* 3300 * Map a set of physical memory pages into the kernel virtual 3301 * address space. Return a pointer to where it is mapped. This 3302 * routine is intended to be used for mapping device memory, 3303 * NOT real memory. 3304 */ 3305void * 3306pmap_mapdev(pa, size) 3307 vm_offset_t pa; 3308 vm_size_t size; 3309{ 3310 vm_offset_t va, tmpva, offset; 3311 pt_entry_t *pte; 3312 3313 offset = pa & PAGE_MASK; 3314 size = roundup(offset + size, PAGE_SIZE); 3315 3316 GIANT_REQUIRED; 3317 3318 va = kmem_alloc_pageable(kernel_map, size); 3319 if (!va) 3320 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3321 3322 pa = pa & PG_FRAME; 3323 for (tmpva = va; size > 0;) { 3324 pte = vtopte(tmpva); 3325 *pte = pa | PG_RW | PG_V | pgeflag; 3326 size -= PAGE_SIZE; 3327 tmpva += PAGE_SIZE; 3328 pa += PAGE_SIZE; 3329 } 3330 invltlb(); 3331 3332 return ((void *)(va + offset)); 3333} 3334 3335void 3336pmap_unmapdev(va, size) 3337 vm_offset_t va; 3338 vm_size_t size; 3339{ 3340 vm_offset_t base, offset; 3341 3342 base = va & PG_FRAME; 3343 offset = va & PAGE_MASK; 3344 size = roundup(offset + size, PAGE_SIZE); 3345 kmem_free(kernel_map, base, size); 3346} 3347 3348/* 3349 * perform the pmap work for mincore 3350 */ 3351int 3352pmap_mincore(pmap, addr) 3353 pmap_t pmap; 3354 vm_offset_t addr; 3355{ 3356 pt_entry_t *ptep, pte; 3357 vm_page_t m; 3358 int val = 0; 3359 3360 ptep = pmap_pte(pmap, addr); 3361 if (ptep == 0) { 3362 return 0; 3363 } 3364 3365 if ((pte = *ptep) != 0) { 3366 vm_offset_t pa; 3367 3368 val = MINCORE_INCORE; 3369 if ((pte & PG_MANAGED) == 0) 3370 return val; 3371 3372 pa = pte & PG_FRAME; 3373 3374 m = PHYS_TO_VM_PAGE(pa); 3375 3376 /* 3377 * Modified by us 3378 */ 3379 if (pte & PG_M) 3380 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3381 /* 3382 * Modified by someone 3383 */ 3384 else if (m->dirty || pmap_is_modified(m)) 3385 val |= MINCORE_MODIFIED_OTHER; 3386 /* 3387 * Referenced by us 3388 */ 3389 if (pte & PG_A) 3390 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3391 3392 /* 3393 * Referenced by someone 3394 */ 3395 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3396 val |= MINCORE_REFERENCED_OTHER; 3397 vm_page_flag_set(m, PG_REFERENCED); 3398 } 3399 } 3400 return val; 3401} 3402 3403void 3404pmap_activate(struct thread *td) 3405{ 3406 struct proc *p = td->td_proc; 3407 pmap_t pmap; 3408 u_int32_t cr3; 3409 3410 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3411#if defined(SMP) 3412 pmap->pm_active |= 1 << PCPU_GET(cpuid); 3413#else 3414 pmap->pm_active |= 1; 3415#endif 3416#if defined(SWTCH_OPTIM_STATS) 3417 tlb_flush_count++; 3418#endif 3419 cr3 = vtophys(pmap->pm_pdir); 3420 /* XXXKSE this is wrong. 3421 * pmap_activate is for the current thread on the current cpu 3422 */ 3423 if (p->p_flag & P_KSES) { 3424 /* Make sure all other cr3 entries are updated. */ 3425 /* what if they are running? XXXKSE (maybe abort them) */ 3426 FOREACH_THREAD_IN_PROC(p, td) { 3427 td->td_pcb->pcb_cr3 = cr3; 3428 } 3429 } else { 3430 td->td_pcb->pcb_cr3 = cr3; 3431 } 3432 load_cr3(cr3); 3433} 3434 3435vm_offset_t 3436pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3437{ 3438 3439 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3440 return addr; 3441 } 3442 3443 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3444 return addr; 3445} 3446 3447 3448#if defined(PMAP_DEBUG) 3449pmap_pid_dump(int pid) 3450{ 3451 pmap_t pmap; 3452 struct proc *p; 3453 int npte = 0; 3454 int index; 3455 3456 sx_slock(&allproc_lock); 3457 LIST_FOREACH(p, &allproc, p_list) { 3458 if (p->p_pid != pid) 3459 continue; 3460 3461 if (p->p_vmspace) { 3462 int i,j; 3463 index = 0; 3464 pmap = vmspace_pmap(p->p_vmspace); 3465 for (i = 0; i < NPDEPG; i++) { 3466 pd_entry_t *pde; 3467 pt_entry_t *pte; 3468 vm_offset_t base = i << PDRSHIFT; 3469 3470 pde = &pmap->pm_pdir[i]; 3471 if (pde && pmap_pde_v(pde)) { 3472 for (j = 0; j < NPTEPG; j++) { 3473 vm_offset_t va = base + (j << PAGE_SHIFT); 3474 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3475 if (index) { 3476 index = 0; 3477 printf("\n"); 3478 } 3479 sx_sunlock(&allproc_lock); 3480 return npte; 3481 } 3482 pte = pmap_pte_quick(pmap, va); 3483 if (pte && pmap_pte_v(pte)) { 3484 pt_entry_t pa; 3485 vm_page_t m; 3486 pa = *pte; 3487 m = PHYS_TO_VM_PAGE(pa); 3488 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3489 va, pa, m->hold_count, m->wire_count, m->flags); 3490 npte++; 3491 index++; 3492 if (index >= 2) { 3493 index = 0; 3494 printf("\n"); 3495 } else { 3496 printf(" "); 3497 } 3498 } 3499 } 3500 } 3501 } 3502 } 3503 } 3504 sx_sunlock(&allproc_lock); 3505 return npte; 3506} 3507#endif 3508 3509#if defined(DEBUG) 3510 3511static void pads __P((pmap_t pm)); 3512void pmap_pvdump __P((vm_offset_t pa)); 3513 3514/* print address space of pmap*/ 3515static void 3516pads(pm) 3517 pmap_t pm; 3518{ 3519 int i, j; 3520 vm_offset_t va; 3521 pt_entry_t *ptep; 3522 3523 if (pm == kernel_pmap) 3524 return; 3525 for (i = 0; i < NPDEPG; i++) 3526 if (pm->pm_pdir[i]) 3527 for (j = 0; j < NPTEPG; j++) { 3528 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3529 if (pm == kernel_pmap && va < KERNBASE) 3530 continue; 3531 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3532 continue; 3533 ptep = pmap_pte_quick(pm, va); 3534 if (pmap_pte_v(ptep)) 3535 printf("%x:%x ", va, *ptep); 3536 }; 3537 3538} 3539 3540void 3541pmap_pvdump(pa) 3542 vm_offset_t pa; 3543{ 3544 pv_entry_t pv; 3545 vm_page_t m; 3546 3547 printf("pa %x", pa); 3548 m = PHYS_TO_VM_PAGE(pa); 3549 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3550 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3551 pads(pv->pv_pmap); 3552 } 3553 printf(" "); 3554} 3555#endif 3556