pmap.c revision 38807
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $Id: pmap.c,v 1.207 1998/08/23 10:16:25 bde Exp $ 43 */ 44 45/* 46 * Manages physical address maps. 47 * 48 * In addition to hardware address maps, this 49 * module is called upon to provide software-use-only 50 * maps which may or may not be stored in the same 51 * form as hardware maps. These pseudo-maps are 52 * used to store intermediate results from copy 53 * operations to and from address spaces. 54 * 55 * Since the information managed by this module is 56 * also stored by the logical address mapping module, 57 * this module may throw away valid virtual-to-physical 58 * mappings at almost any time. However, invalidations 59 * of virtual-to-physical mappings must be done as 60 * requested. 61 * 62 * In order to cope with hardware architectures which 63 * make virtual-to-physical map invalidates expensive, 64 * this module may delay invalidate or reduced protection 65 * operations until such time as they are actually 66 * necessary. This module is given full information as 67 * to which processors are currently using which maps, 68 * and to when physical maps must be made correct. 69 */ 70 71#include "opt_disable_pse.h" 72#include "opt_pmap.h" 73#include "opt_msgbuf.h" 74 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/proc.h> 78#include <sys/msgbuf.h> 79#include <sys/vmmeter.h> 80#include <sys/mman.h> 81 82#include <vm/vm.h> 83#include <vm/vm_param.h> 84#include <vm/vm_prot.h> 85#include <sys/lock.h> 86#include <vm/vm_kern.h> 87#include <vm/vm_page.h> 88#include <vm/vm_map.h> 89#include <vm/vm_object.h> 90#include <vm/vm_extern.h> 91#include <vm/vm_pageout.h> 92#include <vm/vm_pager.h> 93#include <vm/vm_zone.h> 94 95#include <sys/user.h> 96 97#include <machine/cputypes.h> 98#include <machine/md_var.h> 99#include <machine/specialreg.h> 100#if defined(SMP) || defined(APIC_IO) 101#include <machine/smp.h> 102#include <machine/apic.h> 103#endif /* SMP || APIC_IO */ 104 105#define PMAP_KEEP_PDIRS 106#ifndef PMAP_SHPGPERPROC 107#define PMAP_SHPGPERPROC 200 108#endif 109 110#if defined(DIAGNOSTIC) 111#define PMAP_DIAGNOSTIC 112#endif 113 114#define MINPV 2048 115 116#if !defined(PMAP_DIAGNOSTIC) 117#define PMAP_INLINE __inline 118#else 119#define PMAP_INLINE 120#endif 121 122/* 123 * Get PDEs and PTEs for user/kernel address space 124 */ 125#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 126#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 127 128#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 129#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 130#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 131#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 132#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 133 134#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 135#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 136 137/* 138 * Given a map and a machine independent protection code, 139 * convert to a vax protection code. 140 */ 141#define pte_prot(m, p) (protection_codes[p]) 142static int protection_codes[8]; 143 144#define pa_index(pa) atop((pa) - vm_first_phys) 145#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 146 147static struct pmap kernel_pmap_store; 148pmap_t kernel_pmap; 149extern pd_entry_t my_idlePTD; 150 151vm_offset_t avail_start; /* PA of first available physical page */ 152vm_offset_t avail_end; /* PA of last available physical page */ 153vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 154vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 155static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 156static vm_offset_t vm_first_phys; 157static int pgeflag; /* PG_G or-in */ 158static int pseflag; /* PG_PS or-in */ 159static int pv_npg; 160 161static vm_object_t kptobj; 162 163static int nkpt; 164vm_offset_t kernel_vm_end; 165 166/* 167 * Data for the pv entry allocation mechanism 168 */ 169static vm_zone_t pvzone; 170static struct vm_zone pvzone_store; 171static struct vm_object pvzone_obj; 172static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; 173static int pmap_pagedaemon_waken = 0; 174static struct pv_entry *pvinit; 175 176/* 177 * All those kernel PT submaps that BSD is so fond of 178 */ 179pt_entry_t *CMAP1 = 0; 180static pt_entry_t *CMAP2, *ptmmap; 181static pv_table_t *pv_table; 182caddr_t CADDR1 = 0, ptvmmap = 0; 183static caddr_t CADDR2; 184static pt_entry_t *msgbufmap; 185struct msgbuf *msgbufp=0; 186 187#ifdef SMP 188extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[]; 189extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3; 190extern pd_entry_t *IdlePTDS[]; 191extern pt_entry_t SMP_prvpt[]; 192#endif 193 194#ifdef SMP 195extern unsigned int prv_PPAGE1[]; 196extern pt_entry_t *prv_PMAP1; 197#else 198static pt_entry_t *PMAP1 = 0; 199static unsigned *PADDR1 = 0; 200#endif 201 202static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv)); 203static unsigned * get_ptbase __P((pmap_t pmap)); 204static pv_entry_t get_pv_entry __P((void)); 205static void i386_protection_init __P((void)); 206static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); 207 208static PMAP_INLINE int pmap_is_managed __P((vm_offset_t pa)); 209static void pmap_remove_all __P((vm_offset_t pa)); 210static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, 211 vm_offset_t pa, vm_page_t mpte)); 212static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, 213 vm_offset_t sva)); 214static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); 215static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv, 216 vm_offset_t va)); 217static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); 218static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, 219 vm_page_t mpte, vm_offset_t pa)); 220 221static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); 222 223static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); 224static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex)); 225static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); 226static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); 227static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); 228static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 229void pmap_collect(void); 230 231static unsigned pdir4mb; 232 233/* 234 * Routine: pmap_pte 235 * Function: 236 * Extract the page table entry associated 237 * with the given map/virtual_address pair. 238 */ 239 240PMAP_INLINE unsigned * 241pmap_pte(pmap, va) 242 register pmap_t pmap; 243 vm_offset_t va; 244{ 245 unsigned *pdeaddr; 246 247 if (pmap) { 248 pdeaddr = (unsigned *) pmap_pde(pmap, va); 249 if (*pdeaddr & PG_PS) 250 return pdeaddr; 251 if (*pdeaddr) { 252 return get_ptbase(pmap) + i386_btop(va); 253 } 254 } 255 return (0); 256} 257 258/* 259 * Move the kernel virtual free pointer to the next 260 * 4MB. This is used to help improve performance 261 * by using a large (4MB) page for much of the kernel 262 * (.text, .data, .bss) 263 */ 264static vm_offset_t 265pmap_kmem_choose(vm_offset_t addr) { 266 vm_offset_t newaddr = addr; 267#ifndef DISABLE_PSE 268 if (cpu_feature & CPUID_PSE) { 269 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 270 } 271#endif 272 return newaddr; 273} 274 275/* 276 * Bootstrap the system enough to run with virtual memory. 277 * 278 * On the i386 this is called after mapping has already been enabled 279 * and just syncs the pmap module with what has already been done. 280 * [We can't call it easily with mapping off since the kernel is not 281 * mapped with PA == VA, hence we would have to relocate every address 282 * from the linked base (virtual) address "KERNBASE" to the actual 283 * (physical) address starting relative to 0] 284 */ 285void 286pmap_bootstrap(firstaddr, loadaddr) 287 vm_offset_t firstaddr; 288 vm_offset_t loadaddr; 289{ 290 vm_offset_t va; 291 pt_entry_t *pte; 292 int i, j; 293 294 avail_start = firstaddr; 295 296 /* 297 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 298 * large. It should instead be correctly calculated in locore.s and 299 * not based on 'first' (which is a physical address, not a virtual 300 * address, for the start of unused physical memory). The kernel 301 * page tables are NOT double mapped and thus should not be included 302 * in this calculation. 303 */ 304 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 305 virtual_avail = pmap_kmem_choose(virtual_avail); 306 307 virtual_end = VM_MAX_KERNEL_ADDRESS; 308 309 /* 310 * Initialize protection array. 311 */ 312 i386_protection_init(); 313 314 /* 315 * The kernel's pmap is statically allocated so we don't have to use 316 * pmap_create, which is unlikely to work correctly at this part of 317 * the boot sequence (XXX and which no longer exists). 318 */ 319 kernel_pmap = &kernel_pmap_store; 320 321 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 322 323 kernel_pmap->pm_count = 1; 324 TAILQ_INIT(&kernel_pmap->pm_pvlist); 325 nkpt = NKPT; 326 327 /* 328 * Reserve some special page table entries/VA space for temporary 329 * mapping of pages. 330 */ 331#define SYSMAP(c, p, v, n) \ 332 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 333 334 va = virtual_avail; 335 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); 336 337 /* 338 * CMAP1/CMAP2 are used for zeroing and copying pages. 339 */ 340 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 341 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 342 343 /* 344 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 345 * XXX ptmmap is not used. 346 */ 347 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 348 349 /* 350 * msgbufp is used to map the system message buffer. 351 * XXX msgbufmap is not used. 352 */ 353 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 354 atop(round_page(MSGBUF_SIZE))) 355 356#if !defined(SMP) 357 /* 358 * ptemap is used for pmap_pte_quick 359 */ 360 SYSMAP(unsigned *, PMAP1, PADDR1, 1); 361#endif 362 363 virtual_avail = va; 364 365 *(int *) CMAP1 = *(int *) CMAP2 = 0; 366 *(int *) PTD = 0; 367 368 369 pgeflag = 0; 370#if !defined(SMP) 371 if (cpu_feature & CPUID_PGE) { 372 pgeflag = PG_G; 373 } 374#endif 375 376/* 377 * Initialize the 4MB page size flag 378 */ 379 pseflag = 0; 380/* 381 * The 4MB page version of the initial 382 * kernel page mapping. 383 */ 384 pdir4mb = 0; 385 386#if !defined(DISABLE_PSE) 387 if (cpu_feature & CPUID_PSE) { 388 unsigned ptditmp; 389 /* 390 * Enable the PSE mode 391 */ 392 load_cr4(rcr4() | CR4_PSE); 393 394 /* 395 * Note that we have enabled PSE mode 396 */ 397 pseflag = PG_PS; 398 ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE)); 399 ptditmp &= ~(NBPDR - 1); 400 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 401 pdir4mb = ptditmp; 402 /* 403 * We can do the mapping here for the single processor 404 * case. We simply ignore the old page table page from 405 * now on. 406 */ 407#if !defined(SMP) 408 PTD[KPTDI] = (pd_entry_t) ptditmp; 409 kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; 410 invltlb(); 411#endif 412 } 413#endif 414 415#ifdef SMP 416 if (cpu_apic_address == 0) 417 panic("pmap_bootstrap: no local apic!"); 418 419 /* 0 = private page */ 420 /* 1 = page table page */ 421 /* 2 = local apic */ 422 /* 16-31 = io apics */ 423 SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | 424 (cpu_apic_address & PG_FRAME)); 425 426 for (i = 0; i < mp_napics; i++) { 427 for (j = 0; j < 16; j++) { 428 /* same page frame as a previous IO apic? */ 429 if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) == 430 (io_apic_address[0] & PG_FRAME)) { 431 ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE]; 432 break; 433 } 434 /* use this slot if available */ 435 if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) == 0) { 436 SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | 437 pgeflag | (io_apic_address[i] & PG_FRAME)); 438 ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE]; 439 break; 440 } 441 } 442 if (j == 16) 443 panic("no space to map IO apic %d!", i); 444 } 445 446 /* BSP does this itself, AP's get it pre-set */ 447 prv_CMAP1 = &SMP_prvpt[3 + UPAGES]; 448 prv_CMAP2 = &SMP_prvpt[4 + UPAGES]; 449 prv_CMAP3 = &SMP_prvpt[5 + UPAGES]; 450 prv_PMAP1 = &SMP_prvpt[6 + UPAGES]; 451#endif 452 453 invltlb(); 454 455} 456 457void 458getmtrr() 459{ 460 int i; 461 462 if (cpu == CPU_686) { 463 for(i = 0; i < NPPROVMTRR; i++) { 464 PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2); 465 PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2); 466 } 467 } 468} 469 470void 471putmtrr() 472{ 473 int i; 474 475 if (cpu == CPU_686) { 476 wbinvd(); 477 for(i = 0; i < NPPROVMTRR; i++) { 478 wrmsr(PPRO_VMTRRphysBase0 + i * 2, PPro_vmtrr[i].base); 479 wrmsr(PPRO_VMTRRphysMask0 + i * 2, PPro_vmtrr[i].mask); 480 } 481 } 482} 483 484void 485pmap_setvidram(void) 486{ 487#if 0 488 if (cpu == CPU_686) { 489 wbinvd(); 490 /* 491 * Set memory between 0-640K to be WB 492 */ 493 wrmsr(0x250, 0x0606060606060606LL); 494 wrmsr(0x258, 0x0606060606060606LL); 495 /* 496 * Set normal, PC video memory to be WC 497 */ 498 wrmsr(0x259, 0x0101010101010101LL); 499 } 500#endif 501} 502 503void 504pmap_setdevram(unsigned long long basea, vm_offset_t sizea) 505{ 506 int i, free, skip; 507 unsigned basepage, basepaget; 508 unsigned long long base; 509 unsigned long long mask; 510 511 if (cpu != CPU_686) 512 return; 513 514 free = -1; 515 skip = 0; 516 basea &= ~0xfff; 517 base = basea | 0x1; 518 mask = (long long) (0xfffffffffLL - ((long) sizea - 1)) | (long long) 0x800; 519 mask &= ~0x7ff; 520 521 basepage = (long long) (base >> 12); 522 for(i = 0; i < NPPROVMTRR; i++) { 523 PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2); 524 PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2); 525 basepaget = (long long) (PPro_vmtrr[i].base >> 12); 526 if (basepage == basepaget) 527 skip = 1; 528 if ((PPro_vmtrr[i].mask & 0x800) == 0) { 529 if (free == -1) 530 free = i; 531 } 532 } 533 534 if (!skip && free != -1) { 535 wbinvd(); 536 PPro_vmtrr[free].base = base; 537 PPro_vmtrr[free].mask = mask; 538 wrmsr(PPRO_VMTRRphysBase0 + free * 2, base); 539 wrmsr(PPRO_VMTRRphysMask0 + free * 2, mask); 540 printf( 541 "pmap: added WC mapping at page: 0x%x %x, size: %u mask: 0x%x %x\n", 542 (u_int)(base >> 32), (u_int)base, sizea, 543 (u_int)(mask >> 32), (u_int)mask); 544 } 545} 546 547/* 548 * Set 4mb pdir for mp startup, and global flags 549 */ 550void 551pmap_set_opt(unsigned *pdir) { 552 int i; 553 554 if (pseflag && (cpu_feature & CPUID_PSE)) { 555 load_cr4(rcr4() | CR4_PSE); 556 if (pdir4mb) { 557 pdir[KPTDI] = pdir4mb; 558 } 559 } 560 561 if (pgeflag && (cpu_feature & CPUID_PGE)) { 562 load_cr4(rcr4() | CR4_PGE); 563 for(i = KPTDI; i < KPTDI + nkpt; i++) { 564 if (pdir[i]) { 565 pdir[i] |= PG_G; 566 } 567 } 568 } 569} 570 571/* 572 * Setup the PTD for the boot processor 573 */ 574void 575pmap_set_opt_bsp(void) 576{ 577 pmap_set_opt((unsigned *)kernel_pmap->pm_pdir); 578 pmap_set_opt((unsigned *)PTD); 579 invltlb(); 580} 581 582/* 583 * Initialize the pmap module. 584 * Called by vm_init, to initialize any structures that the pmap 585 * system needs to map virtual memory. 586 * pmap_init has been enhanced to support in a fairly consistant 587 * way, discontiguous physical memory. 588 */ 589void 590pmap_init(phys_start, phys_end) 591 vm_offset_t phys_start, phys_end; 592{ 593 vm_offset_t addr; 594 vm_size_t s; 595 int i; 596 int initial_pvs; 597 598 /* 599 * calculate the number of pv_entries needed 600 */ 601 vm_first_phys = phys_avail[0]; 602 for (i = 0; phys_avail[i + 1]; i += 2); 603 pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE; 604 605 /* 606 * Allocate memory for random pmap data structures. Includes the 607 * pv_head_table. 608 */ 609 s = (vm_size_t) (sizeof(pv_table_t) * pv_npg); 610 s = round_page(s); 611 612 addr = (vm_offset_t) kmem_alloc(kernel_map, s); 613 pv_table = (pv_table_t *) addr; 614 for(i = 0; i < pv_npg; i++) { 615 vm_offset_t pa; 616 TAILQ_INIT(&pv_table[i].pv_list); 617 pv_table[i].pv_list_count = 0; 618 pa = vm_first_phys + i * PAGE_SIZE; 619 pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa); 620 } 621 622 /* 623 * init the pv free list 624 */ 625 initial_pvs = pv_npg; 626 if (initial_pvs < MINPV) 627 initial_pvs = MINPV; 628 pvzone = &pvzone_store; 629 pvinit = (struct pv_entry *) kmem_alloc(kernel_map, 630 initial_pvs * sizeof (struct pv_entry)); 631 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, pv_npg); 632 /* 633 * object for kernel page table pages 634 */ 635 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); 636 637 /* 638 * Now it is safe to enable pv_table recording. 639 */ 640 pmap_initialized = TRUE; 641} 642 643/* 644 * Initialize the address space (zone) for the pv_entries. Set a 645 * high water mark so that the system can recover from excessive 646 * numbers of pv entries. 647 */ 648void 649pmap_init2() { 650 pv_entry_max = PMAP_SHPGPERPROC * maxproc + pv_npg; 651 pv_entry_high_water = 9 * (pv_entry_max / 10); 652 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 653} 654 655/* 656 * Used to map a range of physical addresses into kernel 657 * virtual address space. 658 * 659 * For now, VM is already on, we only need to map the 660 * specified memory. 661 */ 662vm_offset_t 663pmap_map(virt, start, end, prot) 664 vm_offset_t virt; 665 vm_offset_t start; 666 vm_offset_t end; 667 int prot; 668{ 669 while (start < end) { 670 pmap_enter(kernel_pmap, virt, start, prot, FALSE); 671 virt += PAGE_SIZE; 672 start += PAGE_SIZE; 673 } 674 return (virt); 675} 676 677 678/*************************************************** 679 * Low level helper routines..... 680 ***************************************************/ 681 682#if defined(PMAP_DIAGNOSTIC) 683 684/* 685 * This code checks for non-writeable/modified pages. 686 * This should be an invalid condition. 687 */ 688static int 689pmap_nw_modified(pt_entry_t ptea) { 690 int pte; 691 692 pte = (int) ptea; 693 694 if ((pte & (PG_M|PG_RW)) == PG_M) 695 return 1; 696 else 697 return 0; 698} 699#endif 700 701 702/* 703 * this routine defines the region(s) of memory that should 704 * not be tested for the modified bit. 705 */ 706static PMAP_INLINE int 707pmap_track_modified( vm_offset_t va) { 708 if ((va < clean_sva) || (va >= clean_eva)) 709 return 1; 710 else 711 return 0; 712} 713 714static PMAP_INLINE void 715invltlb_1pg( vm_offset_t va) { 716#if defined(I386_CPU) 717 if (cpu_class == CPUCLASS_386) { 718 invltlb(); 719 } else 720#endif 721 { 722 invlpg(va); 723 } 724} 725 726static PMAP_INLINE void 727invltlb_2pg( vm_offset_t va1, vm_offset_t va2) { 728#if defined(I386_CPU) 729 if (cpu_class == CPUCLASS_386) { 730 invltlb(); 731 } else 732#endif 733 { 734 invlpg(va1); 735 invlpg(va2); 736 } 737} 738 739static unsigned * 740get_ptbase(pmap) 741 pmap_t pmap; 742{ 743 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 744 745 /* are we current address space or kernel? */ 746 if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { 747 return (unsigned *) PTmap; 748 } 749 /* otherwise, we are alternate address space */ 750 if (frame != (((unsigned) APTDpde) & PG_FRAME)) { 751 APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); 752#if defined(SMP) 753 /* The page directory is not shared between CPUs */ 754 cpu_invltlb(); 755#else 756 invltlb(); 757#endif 758 } 759 return (unsigned *) APTmap; 760} 761 762/* 763 * Super fast pmap_pte routine best used when scanning 764 * the pv lists. This eliminates many coarse-grained 765 * invltlb calls. Note that many of the pv list 766 * scans are across different pmaps. It is very wasteful 767 * to do an entire invltlb for checking a single mapping. 768 */ 769 770static unsigned * 771pmap_pte_quick(pmap, va) 772 register pmap_t pmap; 773 vm_offset_t va; 774{ 775 unsigned pde, newpf; 776 if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) { 777 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 778 unsigned index = i386_btop(va); 779 /* are we current address space or kernel? */ 780 if ((pmap == kernel_pmap) || 781 (frame == (((unsigned) PTDpde) & PG_FRAME))) { 782 return (unsigned *) PTmap + index; 783 } 784 newpf = pde & PG_FRAME; 785#ifdef SMP 786 if ( ((* (unsigned *) prv_PMAP1) & PG_FRAME) != newpf) { 787 * (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V; 788 cpu_invlpg(&prv_PPAGE1); 789 } 790 return prv_PPAGE1 + ((unsigned) index & (NPTEPG - 1)); 791#else 792 if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) { 793 * (unsigned *) PMAP1 = newpf | PG_RW | PG_V; 794 invltlb_1pg((vm_offset_t) PADDR1); 795 } 796 return PADDR1 + ((unsigned) index & (NPTEPG - 1)); 797#endif 798 } 799 return (0); 800} 801 802/* 803 * Routine: pmap_extract 804 * Function: 805 * Extract the physical page address associated 806 * with the given map/virtual_address pair. 807 */ 808vm_offset_t 809pmap_extract(pmap, va) 810 register pmap_t pmap; 811 vm_offset_t va; 812{ 813 vm_offset_t rtval; 814 vm_offset_t pdirindex; 815 pdirindex = va >> PDRSHIFT; 816 if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { 817 unsigned *pte; 818 if ((rtval & PG_PS) != 0) { 819 rtval &= ~(NBPDR - 1); 820 rtval |= va & (NBPDR - 1); 821 return rtval; 822 } 823 pte = get_ptbase(pmap) + i386_btop(va); 824 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 825 return rtval; 826 } 827 return 0; 828 829} 830 831/* 832 * determine if a page is managed (memory vs. device) 833 */ 834static PMAP_INLINE int 835pmap_is_managed(pa) 836 vm_offset_t pa; 837{ 838 int i; 839 840 if (!pmap_initialized) 841 return 0; 842 843 for (i = 0; phys_avail[i + 1]; i += 2) { 844 if (pa < phys_avail[i + 1] && pa >= phys_avail[i]) 845 return 1; 846 } 847 return 0; 848} 849 850 851/*************************************************** 852 * Low level mapping routines..... 853 ***************************************************/ 854 855/* 856 * Add a list of wired pages to the kva 857 * this routine is only used for temporary 858 * kernel mappings that do not need to have 859 * page modification or references recorded. 860 * Note that old mappings are simply written 861 * over. The page *must* be wired. 862 */ 863void 864pmap_qenter(va, m, count) 865 vm_offset_t va; 866 vm_page_t *m; 867 int count; 868{ 869 int i; 870 register unsigned *pte; 871 872 for (i = 0; i < count; i++) { 873 vm_offset_t tva = va + i * PAGE_SIZE; 874 unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag; 875 unsigned opte; 876 pte = (unsigned *)vtopte(tva); 877 opte = *pte; 878 *pte = npte; 879 if (opte) 880 invltlb_1pg(tva); 881 } 882} 883 884/* 885 * this routine jerks page mappings from the 886 * kernel -- it is meant only for temporary mappings. 887 */ 888void 889pmap_qremove(va, count) 890 vm_offset_t va; 891 int count; 892{ 893 int i; 894 register unsigned *pte; 895 896 for (i = 0; i < count; i++) { 897 pte = (unsigned *)vtopte(va); 898 *pte = 0; 899 invltlb_1pg(va); 900 va += PAGE_SIZE; 901 } 902} 903 904/* 905 * add a wired page to the kva 906 * note that in order for the mapping to take effect -- you 907 * should do a invltlb after doing the pmap_kenter... 908 */ 909PMAP_INLINE void 910pmap_kenter(va, pa) 911 vm_offset_t va; 912 register vm_offset_t pa; 913{ 914 register unsigned *pte; 915 unsigned npte, opte; 916 917 npte = pa | PG_RW | PG_V | pgeflag; 918 pte = (unsigned *)vtopte(va); 919 opte = *pte; 920 *pte = npte; 921 if (opte) 922 invltlb_1pg(va); 923} 924 925/* 926 * remove a page from the kernel pagetables 927 */ 928PMAP_INLINE void 929pmap_kremove(va) 930 vm_offset_t va; 931{ 932 register unsigned *pte; 933 934 pte = (unsigned *)vtopte(va); 935 *pte = 0; 936 invltlb_1pg(va); 937} 938 939static vm_page_t 940pmap_page_lookup(object, pindex) 941 vm_object_t object; 942 vm_pindex_t pindex; 943{ 944 vm_page_t m; 945retry: 946 m = vm_page_lookup(object, pindex); 947 if (m && vm_page_sleep(m, "pplookp", NULL)) 948 goto retry; 949 return m; 950} 951 952/* 953 * Create the UPAGES for a new process. 954 * This routine directly affects the fork perf for a process. 955 */ 956void 957pmap_new_proc(p) 958 struct proc *p; 959{ 960 int i, updateneeded; 961 vm_object_t upobj; 962 vm_page_t m; 963 struct user *up; 964 unsigned *ptek, oldpte; 965 966 /* 967 * allocate object for the upages 968 */ 969 if ((upobj = p->p_upages_obj) == NULL) { 970 upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES); 971 p->p_upages_obj = upobj; 972 } 973 974 /* get a kernel virtual address for the UPAGES for this proc */ 975 if ((up = p->p_addr) == NULL) { 976 up = (struct user *) kmem_alloc_pageable(kernel_map, 977 UPAGES * PAGE_SIZE); 978#if !defined(MAX_PERF) 979 if (up == NULL) 980 panic("pmap_new_proc: u_map allocation failed"); 981#endif 982 p->p_addr = up; 983 } 984 985 ptek = (unsigned *) vtopte((vm_offset_t) up); 986 987 updateneeded = 0; 988 for(i=0;i<UPAGES;i++) { 989 /* 990 * Get a kernel stack page 991 */ 992 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 993 994 /* 995 * Wire the page 996 */ 997 m->wire_count++; 998 cnt.v_wire_count++; 999 1000 oldpte = *(ptek + i); 1001 /* 1002 * Enter the page into the kernel address space. 1003 */ 1004 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 1005 if (oldpte) { 1006 if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386)) { 1007 invlpg((vm_offset_t) up + i * PAGE_SIZE); 1008 } else { 1009 updateneeded = 1; 1010 } 1011 } 1012 1013 vm_page_wakeup(m); 1014 m->flags &= ~PG_ZERO; 1015 m->flags |= PG_MAPPED | PG_WRITEABLE; 1016 m->valid = VM_PAGE_BITS_ALL; 1017 } 1018 if (updateneeded) 1019 invltlb(); 1020} 1021 1022/* 1023 * Dispose the UPAGES for a process that has exited. 1024 * This routine directly impacts the exit perf of a process. 1025 */ 1026void 1027pmap_dispose_proc(p) 1028 struct proc *p; 1029{ 1030 int i; 1031 vm_object_t upobj; 1032 vm_page_t m; 1033 unsigned *ptek, oldpte; 1034 1035 upobj = p->p_upages_obj; 1036 1037 ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr); 1038 for(i=0;i<UPAGES;i++) { 1039 1040 if ((m = vm_page_lookup(upobj, i)) == NULL) 1041 panic("pmap_dispose_proc: upage already missing???"); 1042 1043 m->flags |= PG_BUSY; 1044 1045 oldpte = *(ptek + i); 1046 *(ptek + i) = 0; 1047 if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386)) 1048 invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE); 1049 vm_page_unwire(m); 1050 vm_page_free(m); 1051 } 1052 1053 if (cpu_class <= CPUCLASS_386) 1054 invltlb(); 1055} 1056 1057/* 1058 * Allow the UPAGES for a process to be prejudicially paged out. 1059 */ 1060void 1061pmap_swapout_proc(p) 1062 struct proc *p; 1063{ 1064 int i; 1065 vm_object_t upobj; 1066 vm_page_t m; 1067 1068 upobj = p->p_upages_obj; 1069 /* 1070 * let the upages be paged 1071 */ 1072 for(i=0;i<UPAGES;i++) { 1073 if ((m = vm_page_lookup(upobj, i)) == NULL) 1074 panic("pmap_swapout_proc: upage already missing???"); 1075 m->dirty = VM_PAGE_BITS_ALL; 1076 vm_page_unwire(m); 1077 vm_page_deactivate(m); 1078 pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); 1079 } 1080} 1081 1082/* 1083 * Bring the UPAGES for a specified process back in. 1084 */ 1085void 1086pmap_swapin_proc(p) 1087 struct proc *p; 1088{ 1089 int i,rv; 1090 vm_object_t upobj; 1091 vm_page_t m; 1092 1093 upobj = p->p_upages_obj; 1094 for(i=0;i<UPAGES;i++) { 1095 1096 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1097 1098 pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE, 1099 VM_PAGE_TO_PHYS(m)); 1100 1101 if (m->valid != VM_PAGE_BITS_ALL) { 1102 rv = vm_pager_get_pages(upobj, &m, 1, 0); 1103#if !defined(MAX_PERF) 1104 if (rv != VM_PAGER_OK) 1105 panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid); 1106#endif 1107 m = vm_page_lookup(upobj, i); 1108 m->valid = VM_PAGE_BITS_ALL; 1109 } 1110 1111 vm_page_wire(m); 1112 vm_page_wakeup(m); 1113 m->flags |= PG_MAPPED | PG_WRITEABLE; 1114 } 1115} 1116 1117/*************************************************** 1118 * Page table page management routines..... 1119 ***************************************************/ 1120 1121/* 1122 * This routine unholds page table pages, and if the hold count 1123 * drops to zero, then it decrements the wire count. 1124 */ 1125static int 1126_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { 1127 int s; 1128 1129 while (vm_page_sleep(m, "pmuwpt", NULL)); 1130 1131 if (m->hold_count == 0) { 1132 vm_offset_t pteva; 1133 /* 1134 * unmap the page table page 1135 */ 1136 pmap->pm_pdir[m->pindex] = 0; 1137 --pmap->pm_stats.resident_count; 1138 if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == 1139 (((unsigned) PTDpde) & PG_FRAME)) { 1140 /* 1141 * Do a invltlb to make the invalidated mapping 1142 * take effect immediately. 1143 */ 1144 pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); 1145 invltlb_1pg(pteva); 1146 } 1147 1148 if (pmap->pm_ptphint == m) 1149 pmap->pm_ptphint = NULL; 1150 1151 /* 1152 * If the page is finally unwired, simply free it. 1153 */ 1154 --m->wire_count; 1155 if (m->wire_count == 0) { 1156 1157 if (m->flags & PG_WANTED) { 1158 m->flags &= ~PG_WANTED; 1159 wakeup(m); 1160 } 1161 1162 m->flags |= PG_BUSY; 1163 vm_page_free_zero(m); 1164 --cnt.v_wire_count; 1165 } 1166 return 1; 1167 } 1168 return 0; 1169} 1170 1171static PMAP_INLINE int 1172pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { 1173 vm_page_unhold(m); 1174 if (m->hold_count == 0) 1175 return _pmap_unwire_pte_hold(pmap, m); 1176 else 1177 return 0; 1178} 1179 1180/* 1181 * After removing a page table entry, this routine is used to 1182 * conditionally free the page, and manage the hold/wire counts. 1183 */ 1184static int 1185pmap_unuse_pt(pmap, va, mpte) 1186 pmap_t pmap; 1187 vm_offset_t va; 1188 vm_page_t mpte; 1189{ 1190 unsigned ptepindex; 1191 if (va >= UPT_MIN_ADDRESS) 1192 return 0; 1193 1194 if (mpte == NULL) { 1195 ptepindex = (va >> PDRSHIFT); 1196 if (pmap->pm_ptphint && 1197 (pmap->pm_ptphint->pindex == ptepindex)) { 1198 mpte = pmap->pm_ptphint; 1199 } else { 1200 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 1201 pmap->pm_ptphint = mpte; 1202 } 1203 } 1204 1205 return pmap_unwire_pte_hold(pmap, mpte); 1206} 1207 1208#if !defined(SMP) 1209void 1210pmap_pinit0(pmap) 1211 struct pmap *pmap; 1212{ 1213 pmap->pm_pdir = 1214 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1215 pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); 1216 pmap->pm_flags = 0; 1217 pmap->pm_count = 1; 1218 pmap->pm_ptphint = NULL; 1219 TAILQ_INIT(&pmap->pm_pvlist); 1220 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1221} 1222#else 1223void 1224pmap_pinit0(pmap) 1225 struct pmap *pmap; 1226{ 1227 pmap_pinit(pmap); 1228} 1229#endif 1230 1231/* 1232 * Initialize a preallocated and zeroed pmap structure, 1233 * such as one in a vmspace structure. 1234 */ 1235void 1236pmap_pinit(pmap) 1237 register struct pmap *pmap; 1238{ 1239 vm_page_t ptdpg; 1240 1241 /* 1242 * No need to allocate page table space yet but we do need a valid 1243 * page directory table. 1244 */ 1245 if (pmap->pm_pdir == NULL) 1246 pmap->pm_pdir = 1247 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1248 1249 /* 1250 * allocate object for the ptes 1251 */ 1252 if (pmap->pm_pteobj == NULL) 1253 pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1); 1254 1255 /* 1256 * allocate the page directory page 1257 */ 1258retry: 1259 ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI, 1260 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1261 1262 ptdpg->wire_count = 1; 1263 ++cnt.v_wire_count; 1264 1265 ptdpg->flags &= ~(PG_MAPPED | PG_BUSY); /* not mapped normally */ 1266 ptdpg->valid = VM_PAGE_BITS_ALL; 1267 1268 pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); 1269 if ((ptdpg->flags & PG_ZERO) == 0) 1270 bzero(pmap->pm_pdir, PAGE_SIZE); 1271 1272 /* wire in kernel global address entries */ 1273 /* XXX copies current process, does not fill in MPPTDI */ 1274 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 1275 1276 /* install self-referential address mapping entry */ 1277 *(unsigned *) (pmap->pm_pdir + PTDPTDI) = 1278 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; 1279 1280 pmap->pm_flags = 0; 1281 pmap->pm_count = 1; 1282 pmap->pm_ptphint = NULL; 1283 TAILQ_INIT(&pmap->pm_pvlist); 1284 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1285} 1286 1287static int 1288pmap_release_free_page(pmap, p) 1289 struct pmap *pmap; 1290 vm_page_t p; 1291{ 1292 int s; 1293 unsigned *pde = (unsigned *) pmap->pm_pdir; 1294 /* 1295 * This code optimizes the case of freeing non-busy 1296 * page-table pages. Those pages are zero now, and 1297 * might as well be placed directly into the zero queue. 1298 */ 1299 if (vm_page_sleep(p, "pmaprl", NULL)) 1300 return 0; 1301 1302 p->flags |= PG_BUSY; 1303 1304 /* 1305 * Remove the page table page from the processes address space. 1306 */ 1307 pde[p->pindex] = 0; 1308 pmap->pm_stats.resident_count--; 1309 1310#if !defined(MAX_PERF) 1311 if (p->hold_count) { 1312 panic("pmap_release: freeing held page table page"); 1313 } 1314#endif 1315 /* 1316 * Page directory pages need to have the kernel 1317 * stuff cleared, so they can go into the zero queue also. 1318 */ 1319 if (p->pindex == PTDPTDI) { 1320 bzero(pde + KPTDI, nkpt * PTESIZE); 1321#ifdef SMP 1322 pde[MPPTDI] = 0; 1323#endif 1324 pde[APTDPTDI] = 0; 1325 pmap_kremove((vm_offset_t) pmap->pm_pdir); 1326 } 1327 1328 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1329 pmap->pm_ptphint = NULL; 1330 1331 vm_page_free_zero(p); 1332 return 1; 1333} 1334 1335/* 1336 * this routine is called if the page table page is not 1337 * mapped correctly. 1338 */ 1339static vm_page_t 1340_pmap_allocpte(pmap, ptepindex) 1341 pmap_t pmap; 1342 unsigned ptepindex; 1343{ 1344 vm_offset_t pteva, ptepa; 1345 vm_page_t m; 1346 1347 /* 1348 * Find or fabricate a new pagetable page 1349 */ 1350 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1351 VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1352 1353 if (m->queue != PQ_NONE) { 1354 int s = splvm(); 1355 vm_page_unqueue(m); 1356 splx(s); 1357 } 1358 1359 if (m->wire_count == 0) 1360 cnt.v_wire_count++; 1361 m->wire_count++; 1362 1363 /* 1364 * Increment the hold count for the page table page 1365 * (denoting a new mapping.) 1366 */ 1367 m->hold_count++; 1368 1369 /* 1370 * Map the pagetable page into the process address space, if 1371 * it isn't already there. 1372 */ 1373 1374 pmap->pm_stats.resident_count++; 1375 1376 ptepa = VM_PAGE_TO_PHYS(m); 1377 pmap->pm_pdir[ptepindex] = 1378 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1379 1380 /* 1381 * Set the page table hint 1382 */ 1383 pmap->pm_ptphint = m; 1384 1385 /* 1386 * Try to use the new mapping, but if we cannot, then 1387 * do it with the routine that maps the page explicitly. 1388 */ 1389 if ((m->flags & PG_ZERO) == 0) { 1390 if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == 1391 (((unsigned) PTDpde) & PG_FRAME)) { 1392 pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex); 1393 bzero((caddr_t) pteva, PAGE_SIZE); 1394 } else { 1395 pmap_zero_page(ptepa); 1396 } 1397 } 1398 1399 m->valid = VM_PAGE_BITS_ALL; 1400 m->flags &= ~(PG_ZERO | PG_BUSY); 1401 m->flags |= PG_MAPPED; 1402 1403 return m; 1404} 1405 1406static vm_page_t 1407pmap_allocpte(pmap, va) 1408 pmap_t pmap; 1409 vm_offset_t va; 1410{ 1411 unsigned ptepindex; 1412 vm_offset_t ptepa; 1413 vm_page_t m; 1414 1415 /* 1416 * Calculate pagetable page index 1417 */ 1418 ptepindex = va >> PDRSHIFT; 1419 1420 /* 1421 * Get the page directory entry 1422 */ 1423 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; 1424 1425 /* 1426 * This supports switching from a 4MB page to a 1427 * normal 4K page. 1428 */ 1429 if (ptepa & PG_PS) { 1430 pmap->pm_pdir[ptepindex] = 0; 1431 ptepa = 0; 1432 invltlb(); 1433 } 1434 1435 /* 1436 * If the page table page is mapped, we just increment the 1437 * hold count, and activate it. 1438 */ 1439 if (ptepa) { 1440 /* 1441 * In order to get the page table page, try the 1442 * hint first. 1443 */ 1444 if (pmap->pm_ptphint && 1445 (pmap->pm_ptphint->pindex == ptepindex)) { 1446 m = pmap->pm_ptphint; 1447 } else { 1448 m = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 1449 pmap->pm_ptphint = m; 1450 } 1451 m->hold_count++; 1452 return m; 1453 } 1454 /* 1455 * Here if the pte page isn't mapped, or if it has been deallocated. 1456 */ 1457 return _pmap_allocpte(pmap, ptepindex); 1458} 1459 1460 1461/*************************************************** 1462* Pmap allocation/deallocation routines. 1463 ***************************************************/ 1464 1465/* 1466 * Release any resources held by the given physical map. 1467 * Called when a pmap initialized by pmap_pinit is being released. 1468 * Should only be called if the map contains no valid mappings. 1469 */ 1470void 1471pmap_release(pmap) 1472 register struct pmap *pmap; 1473{ 1474 vm_page_t p,n,ptdpg; 1475 vm_object_t object = pmap->pm_pteobj; 1476 int curgeneration; 1477 1478#if defined(DIAGNOSTIC) 1479 if (object->ref_count != 1) 1480 panic("pmap_release: pteobj reference count != 1"); 1481#endif 1482 1483 ptdpg = NULL; 1484retry: 1485 curgeneration = object->generation; 1486 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { 1487 n = TAILQ_NEXT(p, listq); 1488 if (p->pindex == PTDPTDI) { 1489 ptdpg = p; 1490 continue; 1491 } 1492 while (1) { 1493 if (!pmap_release_free_page(pmap, p) && 1494 (object->generation != curgeneration)) 1495 goto retry; 1496 } 1497 } 1498 1499 if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) 1500 goto retry; 1501} 1502 1503/* 1504 * grow the number of kernel page table entries, if needed 1505 */ 1506void 1507pmap_growkernel(vm_offset_t addr) 1508{ 1509 struct proc *p; 1510 struct pmap *pmap; 1511 int s; 1512 vm_offset_t ptppaddr; 1513 vm_page_t nkpg; 1514#ifdef SMP 1515 int i; 1516#endif 1517 pd_entry_t newpdir; 1518 1519 s = splhigh(); 1520 if (kernel_vm_end == 0) { 1521 kernel_vm_end = KERNBASE; 1522 nkpt = 0; 1523 while (pdir_pde(PTD, kernel_vm_end)) { 1524 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1525 nkpt++; 1526 } 1527 } 1528 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1529 while (kernel_vm_end < addr) { 1530 if (pdir_pde(PTD, kernel_vm_end)) { 1531 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1532 continue; 1533 } 1534 1535 /* 1536 * This index is bogus, but out of the way 1537 */ 1538 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM); 1539#if !defined(MAX_PERF) 1540 if (!nkpg) 1541 panic("pmap_growkernel: no memory to grow kernel"); 1542#endif 1543 1544 nkpt++; 1545 1546 vm_page_wire(nkpg); 1547 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1548 pmap_zero_page(ptppaddr); 1549 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1550 pdir_pde(PTD, kernel_vm_end) = newpdir; 1551 1552#ifdef SMP 1553 for (i = 0; i < mp_ncpus; i++) { 1554 if (IdlePTDS[i]) 1555 pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir; 1556 } 1557#endif 1558 1559 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1560 if (p->p_vmspace) { 1561 pmap = &p->p_vmspace->vm_pmap; 1562 *pmap_pde(pmap, kernel_vm_end) = newpdir; 1563 } 1564 } 1565 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1566 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1567 } 1568 splx(s); 1569} 1570 1571/* 1572 * Retire the given physical map from service. 1573 * Should only be called if the map contains 1574 * no valid mappings. 1575 */ 1576void 1577pmap_destroy(pmap) 1578 register pmap_t pmap; 1579{ 1580 int count; 1581 1582 if (pmap == NULL) 1583 return; 1584 1585 count = --pmap->pm_count; 1586 if (count == 0) { 1587 pmap_release(pmap); 1588#if !defined(MAX_PERF) 1589 panic("destroying a pmap is not yet implemented"); 1590#endif 1591 } 1592} 1593 1594/* 1595 * Add a reference to the specified pmap. 1596 */ 1597void 1598pmap_reference(pmap) 1599 pmap_t pmap; 1600{ 1601 if (pmap != NULL) { 1602 pmap->pm_count++; 1603 } 1604} 1605 1606/*************************************************** 1607* page management routines. 1608 ***************************************************/ 1609 1610/* 1611 * free the pv_entry back to the free list 1612 */ 1613static PMAP_INLINE void 1614free_pv_entry(pv) 1615 pv_entry_t pv; 1616{ 1617 pv_entry_count--; 1618 zfreei(pvzone, pv); 1619} 1620 1621/* 1622 * get a new pv_entry, allocating a block from the system 1623 * when needed. 1624 * the memory allocation is performed bypassing the malloc code 1625 * because of the possibility of allocations at interrupt time. 1626 */ 1627static pv_entry_t 1628get_pv_entry(void) 1629{ 1630 pv_entry_count++; 1631 if (pv_entry_high_water && 1632 (pv_entry_count > pv_entry_high_water) && 1633 (pmap_pagedaemon_waken == 0)) { 1634 pmap_pagedaemon_waken = 1; 1635 wakeup (&vm_pages_needed); 1636 } 1637 return zalloci(pvzone); 1638} 1639 1640/* 1641 * This routine is very drastic, but can save the system 1642 * in a pinch. 1643 */ 1644void 1645pmap_collect() { 1646 pv_table_t *ppv; 1647 int i; 1648 vm_offset_t pa; 1649 vm_page_t m; 1650 static int warningdone=0; 1651 1652 if (pmap_pagedaemon_waken == 0) 1653 return; 1654 1655 if (warningdone < 5) { 1656 printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); 1657 warningdone++; 1658 } 1659 1660 for(i = 0; i < pv_npg; i++) { 1661 if ((ppv = &pv_table[i]) == 0) 1662 continue; 1663 m = ppv->pv_vm_page; 1664 if ((pa = VM_PAGE_TO_PHYS(m)) == 0) 1665 continue; 1666 if (m->wire_count || m->hold_count || m->busy || 1667 (m->flags & PG_BUSY)) 1668 continue; 1669 pmap_remove_all(pa); 1670 } 1671 pmap_pagedaemon_waken = 0; 1672} 1673 1674 1675/* 1676 * If it is the first entry on the list, it is actually 1677 * in the header and we must copy the following entry up 1678 * to the header. Otherwise we must search the list for 1679 * the entry. In either case we free the now unused entry. 1680 */ 1681 1682static int 1683pmap_remove_entry(pmap, ppv, va) 1684 struct pmap *pmap; 1685 pv_table_t *ppv; 1686 vm_offset_t va; 1687{ 1688 pv_entry_t pv; 1689 int rtval; 1690 int s; 1691 1692 s = splvm(); 1693 if (ppv->pv_list_count < pmap->pm_stats.resident_count) { 1694 for (pv = TAILQ_FIRST(&ppv->pv_list); 1695 pv; 1696 pv = TAILQ_NEXT(pv, pv_list)) { 1697 if (pmap == pv->pv_pmap && va == pv->pv_va) 1698 break; 1699 } 1700 } else { 1701 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); 1702 pv; 1703 pv = TAILQ_NEXT(pv, pv_plist)) { 1704 if (va == pv->pv_va) 1705 break; 1706 } 1707 } 1708 1709 rtval = 0; 1710 if (pv) { 1711 1712 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1713 TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); 1714 ppv->pv_list_count--; 1715 if (TAILQ_FIRST(&ppv->pv_list) == NULL) 1716 ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE); 1717 1718 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1719 free_pv_entry(pv); 1720 } 1721 1722 splx(s); 1723 return rtval; 1724} 1725 1726/* 1727 * Create a pv entry for page at pa for 1728 * (pmap, va). 1729 */ 1730static void 1731pmap_insert_entry(pmap, va, mpte, pa) 1732 pmap_t pmap; 1733 vm_offset_t va; 1734 vm_page_t mpte; 1735 vm_offset_t pa; 1736{ 1737 1738 int s; 1739 pv_entry_t pv; 1740 pv_table_t *ppv; 1741 1742 s = splvm(); 1743 pv = get_pv_entry(); 1744 pv->pv_va = va; 1745 pv->pv_pmap = pmap; 1746 pv->pv_ptem = mpte; 1747 1748 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1749 1750 ppv = pa_to_pvh(pa); 1751 TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); 1752 ppv->pv_list_count++; 1753 1754 splx(s); 1755} 1756 1757/* 1758 * pmap_remove_pte: do the things to unmap a page in a process 1759 */ 1760static int 1761pmap_remove_pte(pmap, ptq, va) 1762 struct pmap *pmap; 1763 unsigned *ptq; 1764 vm_offset_t va; 1765{ 1766 unsigned oldpte; 1767 pv_table_t *ppv; 1768 1769 oldpte = *ptq; 1770 *ptq = 0; 1771 if (oldpte & PG_W) 1772 pmap->pm_stats.wired_count -= 1; 1773 /* 1774 * Machines that don't support invlpg, also don't support 1775 * PG_G. 1776 */ 1777 if (oldpte & PG_G) 1778 invlpg(va); 1779 pmap->pm_stats.resident_count -= 1; 1780 if (oldpte & PG_MANAGED) { 1781 ppv = pa_to_pvh(oldpte); 1782 if (oldpte & PG_M) { 1783#if defined(PMAP_DIAGNOSTIC) 1784 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1785 printf( 1786 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1787 va, oldpte); 1788 } 1789#endif 1790 if (pmap_track_modified(va)) 1791 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 1792 } 1793 if (oldpte & PG_A) 1794 ppv->pv_vm_page->flags |= PG_REFERENCED; 1795 return pmap_remove_entry(pmap, ppv, va); 1796 } else { 1797 return pmap_unuse_pt(pmap, va, NULL); 1798 } 1799 1800 return 0; 1801} 1802 1803/* 1804 * Remove a single page from a process address space 1805 */ 1806static void 1807pmap_remove_page(pmap, va) 1808 struct pmap *pmap; 1809 register vm_offset_t va; 1810{ 1811 register unsigned *ptq; 1812 1813 /* 1814 * if there is no pte for this address, just skip it!!! 1815 */ 1816 if (*pmap_pde(pmap, va) == 0) { 1817 return; 1818 } 1819 1820 /* 1821 * get a local va for mappings for this pmap. 1822 */ 1823 ptq = get_ptbase(pmap) + i386_btop(va); 1824 if (*ptq) { 1825 (void) pmap_remove_pte(pmap, ptq, va); 1826 invltlb_1pg(va); 1827 } 1828 return; 1829} 1830 1831/* 1832 * Remove the given range of addresses from the specified map. 1833 * 1834 * It is assumed that the start and end are properly 1835 * rounded to the page size. 1836 */ 1837void 1838pmap_remove(pmap, sva, eva) 1839 struct pmap *pmap; 1840 register vm_offset_t sva; 1841 register vm_offset_t eva; 1842{ 1843 register unsigned *ptbase; 1844 vm_offset_t pdnxt; 1845 vm_offset_t ptpaddr; 1846 vm_offset_t sindex, eindex; 1847 int anyvalid; 1848 1849 if (pmap == NULL) 1850 return; 1851 1852 if (pmap->pm_stats.resident_count == 0) 1853 return; 1854 1855 /* 1856 * special handling of removing one page. a very 1857 * common operation and easy to short circuit some 1858 * code. 1859 */ 1860 if (((sva + PAGE_SIZE) == eva) && 1861 (((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1862 pmap_remove_page(pmap, sva); 1863 return; 1864 } 1865 1866 anyvalid = 0; 1867 1868 /* 1869 * Get a local virtual address for the mappings that are being 1870 * worked with. 1871 */ 1872 ptbase = get_ptbase(pmap); 1873 1874 sindex = i386_btop(sva); 1875 eindex = i386_btop(eva); 1876 1877 for (; sindex < eindex; sindex = pdnxt) { 1878 unsigned pdirindex; 1879 1880 /* 1881 * Calculate index for next page table. 1882 */ 1883 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 1884 if (pmap->pm_stats.resident_count == 0) 1885 break; 1886 1887 pdirindex = sindex / NPDEPG; 1888 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { 1889 pmap->pm_pdir[pdirindex] = 0; 1890 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1891 anyvalid++; 1892 continue; 1893 } 1894 1895 /* 1896 * Weed out invalid mappings. Note: we assume that the page 1897 * directory table is always allocated, and in kernel virtual. 1898 */ 1899 if (ptpaddr == 0) 1900 continue; 1901 1902 /* 1903 * Limit our scan to either the end of the va represented 1904 * by the current page table page, or to the end of the 1905 * range being removed. 1906 */ 1907 if (pdnxt > eindex) { 1908 pdnxt = eindex; 1909 } 1910 1911 for ( ;sindex != pdnxt; sindex++) { 1912 vm_offset_t va; 1913 if (ptbase[sindex] == 0) { 1914 continue; 1915 } 1916 va = i386_ptob(sindex); 1917 1918 anyvalid++; 1919 if (pmap_remove_pte(pmap, 1920 ptbase + sindex, va)) 1921 break; 1922 } 1923 } 1924 1925 if (anyvalid) { 1926 invltlb(); 1927 } 1928} 1929 1930/* 1931 * Routine: pmap_remove_all 1932 * Function: 1933 * Removes this physical page from 1934 * all physical maps in which it resides. 1935 * Reflects back modify bits to the pager. 1936 * 1937 * Notes: 1938 * Original versions of this routine were very 1939 * inefficient because they iteratively called 1940 * pmap_remove (slow...) 1941 */ 1942 1943static void 1944pmap_remove_all(pa) 1945 vm_offset_t pa; 1946{ 1947 register pv_entry_t pv; 1948 pv_table_t *ppv; 1949 register unsigned *pte, tpte; 1950 int nmodify; 1951 int update_needed; 1952 int s; 1953 1954 nmodify = 0; 1955 update_needed = 0; 1956#if defined(PMAP_DIAGNOSTIC) 1957 /* 1958 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1959 * pages! 1960 */ 1961 if (!pmap_is_managed(pa)) { 1962 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", pa); 1963 } 1964#endif 1965 1966 s = splvm(); 1967 ppv = pa_to_pvh(pa); 1968 while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) { 1969 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 1970 1971 pv->pv_pmap->pm_stats.resident_count--; 1972 1973 tpte = *pte; 1974 *pte = 0; 1975 if (tpte & PG_W) 1976 pv->pv_pmap->pm_stats.wired_count--; 1977 1978 if (tpte & PG_A) 1979 ppv->pv_vm_page->flags |= PG_REFERENCED; 1980 1981 /* 1982 * Update the vm_page_t clean and reference bits. 1983 */ 1984 if (tpte & PG_M) { 1985#if defined(PMAP_DIAGNOSTIC) 1986 if (pmap_nw_modified((pt_entry_t) tpte)) { 1987 printf( 1988 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1989 pv->pv_va, tpte); 1990 } 1991#endif 1992 if (pmap_track_modified(pv->pv_va)) 1993 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 1994 } 1995 if (!update_needed && 1996 ((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) || 1997 (pv->pv_pmap == kernel_pmap))) { 1998 update_needed = 1; 1999 } 2000 2001 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2002 TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); 2003 ppv->pv_list_count--; 2004 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2005 free_pv_entry(pv); 2006 } 2007 2008 ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE); 2009 2010 if (update_needed) 2011 invltlb(); 2012 2013 splx(s); 2014 return; 2015} 2016 2017/* 2018 * Set the physical protection on the 2019 * specified range of this map as requested. 2020 */ 2021void 2022pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2023{ 2024 register unsigned *ptbase; 2025 vm_offset_t pdnxt, ptpaddr; 2026 vm_pindex_t sindex, eindex; 2027 int anychanged; 2028 2029 2030 if (pmap == NULL) 2031 return; 2032 2033 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2034 pmap_remove(pmap, sva, eva); 2035 return; 2036 } 2037 2038 if (prot & VM_PROT_WRITE) 2039 return; 2040 2041 anychanged = 0; 2042 2043 ptbase = get_ptbase(pmap); 2044 2045 sindex = i386_btop(sva); 2046 eindex = i386_btop(eva); 2047 2048 for (; sindex < eindex; sindex = pdnxt) { 2049 2050 unsigned pdirindex; 2051 2052 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 2053 2054 pdirindex = sindex / NPDEPG; 2055 if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) { 2056 (unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2057 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2058 anychanged++; 2059 continue; 2060 } 2061 2062 /* 2063 * Weed out invalid mappings. Note: we assume that the page 2064 * directory table is always allocated, and in kernel virtual. 2065 */ 2066 if (ptpaddr == 0) 2067 continue; 2068 2069 if (pdnxt > eindex) { 2070 pdnxt = eindex; 2071 } 2072 2073 for (; sindex != pdnxt; sindex++) { 2074 2075 unsigned pbits; 2076 pv_table_t *ppv; 2077 2078 pbits = ptbase[sindex]; 2079 2080 if (pbits & PG_MANAGED) { 2081 ppv = NULL; 2082 if (pbits & PG_A) { 2083 ppv = pa_to_pvh(pbits); 2084 ppv->pv_vm_page->flags |= PG_REFERENCED; 2085 pbits &= ~PG_A; 2086 } 2087 if (pbits & PG_M) { 2088 if (pmap_track_modified(i386_ptob(sindex))) { 2089 if (ppv == NULL) 2090 ppv = pa_to_pvh(pbits); 2091 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 2092 pbits &= ~PG_M; 2093 } 2094 } 2095 } 2096 2097 pbits &= ~PG_RW; 2098 2099 if (pbits != ptbase[sindex]) { 2100 ptbase[sindex] = pbits; 2101 anychanged = 1; 2102 } 2103 } 2104 } 2105 if (anychanged) 2106 invltlb(); 2107} 2108 2109/* 2110 * Insert the given physical page (p) at 2111 * the specified virtual address (v) in the 2112 * target physical map with the protection requested. 2113 * 2114 * If specified, the page will be wired down, meaning 2115 * that the related pte can not be reclaimed. 2116 * 2117 * NB: This is the only routine which MAY NOT lazy-evaluate 2118 * or lose information. That is, this routine must actually 2119 * insert this page into the given map NOW. 2120 */ 2121void 2122pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot, 2123 boolean_t wired) 2124{ 2125 register unsigned *pte; 2126 vm_offset_t opa; 2127 vm_offset_t origpte, newpte; 2128 vm_page_t mpte; 2129 2130 if (pmap == NULL) 2131 return; 2132 2133 va &= PG_FRAME; 2134#ifdef PMAP_DIAGNOSTIC 2135 if (va > VM_MAX_KERNEL_ADDRESS) 2136 panic("pmap_enter: toobig"); 2137 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2138 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2139#endif 2140 2141 mpte = NULL; 2142 /* 2143 * In the case that a page table page is not 2144 * resident, we are creating it here. 2145 */ 2146 if (va < UPT_MIN_ADDRESS) { 2147 mpte = pmap_allocpte(pmap, va); 2148 } 2149#if 0 && defined(PMAP_DIAGNOSTIC) 2150 else { 2151 vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va); 2152 if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { 2153 panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n", 2154 pmap->pm_pdir[PTDPTDI], origpte, va); 2155 } 2156 if (smp_active) { 2157 pdeaddr = (vm_offset_t *) IdlePTDS[cpuid]; 2158 if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) { 2159 if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr)) 2160 printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr); 2161 printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr); 2162 panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n", 2163 pmap->pm_pdir[PTDPTDI], newpte, origpte, va); 2164 } 2165 } 2166 } 2167#endif 2168 2169 pte = pmap_pte(pmap, va); 2170 2171#if !defined(MAX_PERF) 2172 /* 2173 * Page Directory table entry not valid, we need a new PT page 2174 */ 2175 if (pte == NULL) { 2176 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 2177 (void *)pmap->pm_pdir[PTDPTDI], va); 2178 } 2179#endif 2180 2181 origpte = *(vm_offset_t *)pte; 2182 pa &= PG_FRAME; 2183 opa = origpte & PG_FRAME; 2184 2185#if !defined(MAX_PERF) 2186 if (origpte & PG_PS) 2187 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2188#endif 2189 2190 /* 2191 * Mapping has not changed, must be protection or wiring change. 2192 */ 2193 if (origpte && (opa == pa)) { 2194 /* 2195 * Wiring change, just update stats. We don't worry about 2196 * wiring PT pages as they remain resident as long as there 2197 * are valid mappings in them. Hence, if a user page is wired, 2198 * the PT page will be also. 2199 */ 2200 if (wired && ((origpte & PG_W) == 0)) 2201 pmap->pm_stats.wired_count++; 2202 else if (!wired && (origpte & PG_W)) 2203 pmap->pm_stats.wired_count--; 2204 2205#if defined(PMAP_DIAGNOSTIC) 2206 if (pmap_nw_modified((pt_entry_t) origpte)) { 2207 printf( 2208 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 2209 va, origpte); 2210 } 2211#endif 2212 2213 /* 2214 * Remove extra pte reference 2215 */ 2216 if (mpte) 2217 mpte->hold_count--; 2218 2219 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 2220 if ((origpte & PG_RW) == 0) { 2221 *pte |= PG_RW; 2222 invltlb_1pg(va); 2223 } 2224 return; 2225 } 2226 2227 /* 2228 * We might be turning off write access to the page, 2229 * so we go ahead and sense modify status. 2230 */ 2231 if (origpte & PG_MANAGED) { 2232 if ((origpte & PG_M) && pmap_track_modified(va)) { 2233 pv_table_t *ppv; 2234 ppv = pa_to_pvh(opa); 2235 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 2236 } 2237 pa |= PG_MANAGED; 2238 } 2239 goto validate; 2240 } 2241 /* 2242 * Mapping has changed, invalidate old range and fall through to 2243 * handle validating new mapping. 2244 */ 2245 if (opa) { 2246 int err; 2247 err = pmap_remove_pte(pmap, pte, va); 2248#if !defined(MAX_PERF) 2249 if (err) 2250 panic("pmap_enter: pte vanished, va: 0x%x", va); 2251#endif 2252 } 2253 2254 /* 2255 * Enter on the PV list if part of our managed memory Note that we 2256 * raise IPL while manipulating pv_table since pmap_enter can be 2257 * called at interrupt time. 2258 */ 2259 if (pmap_is_managed(pa)) { 2260 pmap_insert_entry(pmap, va, mpte, pa); 2261 pa |= PG_MANAGED; 2262 } 2263 2264 /* 2265 * Increment counters 2266 */ 2267 pmap->pm_stats.resident_count++; 2268 if (wired) 2269 pmap->pm_stats.wired_count++; 2270 2271validate: 2272 /* 2273 * Now validate mapping with desired protection/wiring. 2274 */ 2275 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); 2276 2277 if (wired) 2278 newpte |= PG_W; 2279 if (va < UPT_MIN_ADDRESS) 2280 newpte |= PG_U; 2281 if (pmap == kernel_pmap) 2282 newpte |= pgeflag; 2283 2284 /* 2285 * if the mapping or permission bits are different, we need 2286 * to update the pte. 2287 */ 2288 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2289 *pte = newpte | PG_A; 2290 if (origpte) 2291 invltlb_1pg(va); 2292 } 2293} 2294 2295/* 2296 * this code makes some *MAJOR* assumptions: 2297 * 1. Current pmap & pmap exists. 2298 * 2. Not wired. 2299 * 3. Read access. 2300 * 4. No page table pages. 2301 * 5. Tlbflush is deferred to calling procedure. 2302 * 6. Page IS managed. 2303 * but is *MUCH* faster than pmap_enter... 2304 */ 2305 2306static vm_page_t 2307pmap_enter_quick(pmap, va, pa, mpte) 2308 register pmap_t pmap; 2309 vm_offset_t va; 2310 register vm_offset_t pa; 2311 vm_page_t mpte; 2312{ 2313 register unsigned *pte; 2314 2315 /* 2316 * In the case that a page table page is not 2317 * resident, we are creating it here. 2318 */ 2319 if (va < UPT_MIN_ADDRESS) { 2320 unsigned ptepindex; 2321 vm_offset_t ptepa; 2322 2323 /* 2324 * Calculate pagetable page index 2325 */ 2326 ptepindex = va >> PDRSHIFT; 2327 if (mpte && (mpte->pindex == ptepindex)) { 2328 mpte->hold_count++; 2329 } else { 2330retry: 2331 /* 2332 * Get the page directory entry 2333 */ 2334 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; 2335 2336 /* 2337 * If the page table page is mapped, we just increment 2338 * the hold count, and activate it. 2339 */ 2340 if (ptepa) { 2341#if !defined(MAX_PERF) 2342 if (ptepa & PG_PS) 2343 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2344#endif 2345 if (pmap->pm_ptphint && 2346 (pmap->pm_ptphint->pindex == ptepindex)) { 2347 mpte = pmap->pm_ptphint; 2348 } else { 2349 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2350 pmap->pm_ptphint = mpte; 2351 } 2352 if (mpte == NULL) 2353 goto retry; 2354 mpte->hold_count++; 2355 } else { 2356 mpte = _pmap_allocpte(pmap, ptepindex); 2357 } 2358 } 2359 } else { 2360 mpte = NULL; 2361 } 2362 2363 /* 2364 * This call to vtopte makes the assumption that we are 2365 * entering the page into the current pmap. In order to support 2366 * quick entry into any pmap, one would likely use pmap_pte_quick. 2367 * But that isn't as quick as vtopte. 2368 */ 2369 pte = (unsigned *)vtopte(va); 2370 if (*pte) { 2371 if (mpte) 2372 pmap_unwire_pte_hold(pmap, mpte); 2373 return 0; 2374 } 2375 2376 /* 2377 * Enter on the PV list if part of our managed memory Note that we 2378 * raise IPL while manipulating pv_table since pmap_enter can be 2379 * called at interrupt time. 2380 */ 2381 pmap_insert_entry(pmap, va, mpte, pa); 2382 2383 /* 2384 * Increment counters 2385 */ 2386 pmap->pm_stats.resident_count++; 2387 2388 /* 2389 * Now validate mapping with RO protection 2390 */ 2391 *pte = pa | PG_V | PG_U | PG_MANAGED; 2392 2393 return mpte; 2394} 2395 2396#define MAX_INIT_PT (96) 2397/* 2398 * pmap_object_init_pt preloads the ptes for a given object 2399 * into the specified pmap. This eliminates the blast of soft 2400 * faults on process startup and immediately after an mmap. 2401 */ 2402void 2403pmap_object_init_pt(pmap, addr, object, pindex, size, limit) 2404 pmap_t pmap; 2405 vm_offset_t addr; 2406 vm_object_t object; 2407 vm_pindex_t pindex; 2408 vm_size_t size; 2409 int limit; 2410{ 2411 vm_offset_t tmpidx; 2412 int psize; 2413 vm_page_t p, mpte; 2414 int objpgs; 2415 2416 if (!pmap) 2417 return; 2418 2419 /* 2420 * This code maps large physical mmap regions into the 2421 * processor address space. Note that some shortcuts 2422 * are taken, but the code works. 2423 */ 2424 if (pseflag && 2425 (object->type == OBJT_DEVICE) && 2426 ((addr & (NBPDR - 1)) == 0) && 2427 ((size & (NBPDR - 1)) == 0) ) { 2428 int i; 2429 int s; 2430 vm_page_t m[1]; 2431 unsigned int ptepindex; 2432 int npdes; 2433 vm_offset_t ptepa; 2434 2435 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2436 return; 2437 2438retry: 2439 p = vm_page_lookup(object, pindex); 2440 if (p && vm_page_sleep(p, "init4p", NULL)) 2441 goto retry; 2442 2443 if (p == NULL) { 2444 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2445 if (p == NULL) 2446 return; 2447 m[0] = p; 2448 2449 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2450 vm_page_free(p); 2451 return; 2452 } 2453 2454 p = vm_page_lookup(object, pindex); 2455 vm_page_wakeup(p); 2456 } 2457 2458 ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p); 2459 if (ptepa & (NBPDR - 1)) { 2460 return; 2461 } 2462 2463 p->valid = VM_PAGE_BITS_ALL; 2464 2465 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2466 npdes = size >> PDRSHIFT; 2467 for(i=0;i<npdes;i++) { 2468 pmap->pm_pdir[ptepindex] = 2469 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS); 2470 ptepa += NBPDR; 2471 ptepindex += 1; 2472 } 2473 p->flags |= PG_MAPPED; 2474 invltlb(); 2475 return; 2476 } 2477 2478 psize = i386_btop(size); 2479 2480 if ((object->type != OBJT_VNODE) || 2481 (limit && (psize > MAX_INIT_PT) && 2482 (object->resident_page_count > MAX_INIT_PT))) { 2483 return; 2484 } 2485 2486 if (psize + pindex > object->size) 2487 psize = object->size - pindex; 2488 2489 mpte = NULL; 2490 /* 2491 * if we are processing a major portion of the object, then scan the 2492 * entire thing. 2493 */ 2494 if (psize > (object->size >> 2)) { 2495 objpgs = psize; 2496 2497 for (p = TAILQ_FIRST(&object->memq); 2498 ((objpgs > 0) && (p != NULL)); 2499 p = TAILQ_NEXT(p, listq)) { 2500 2501 tmpidx = p->pindex; 2502 if (tmpidx < pindex) { 2503 continue; 2504 } 2505 tmpidx -= pindex; 2506 if (tmpidx >= psize) { 2507 continue; 2508 } 2509 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2510 (p->busy == 0) && 2511 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2512 if ((p->queue - p->pc) == PQ_CACHE) 2513 vm_page_deactivate(p); 2514 p->flags |= PG_BUSY; 2515 mpte = pmap_enter_quick(pmap, 2516 addr + i386_ptob(tmpidx), 2517 VM_PAGE_TO_PHYS(p), mpte); 2518 p->flags |= PG_MAPPED; 2519 vm_page_wakeup(p); 2520 } 2521 objpgs -= 1; 2522 } 2523 } else { 2524 /* 2525 * else lookup the pages one-by-one. 2526 */ 2527 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 2528 p = vm_page_lookup(object, tmpidx + pindex); 2529 if (p && 2530 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2531 (p->busy == 0) && 2532 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2533 if ((p->queue - p->pc) == PQ_CACHE) 2534 vm_page_deactivate(p); 2535 p->flags |= PG_BUSY; 2536 mpte = pmap_enter_quick(pmap, 2537 addr + i386_ptob(tmpidx), 2538 VM_PAGE_TO_PHYS(p), mpte); 2539 p->flags |= PG_MAPPED; 2540 vm_page_wakeup(p); 2541 } 2542 } 2543 } 2544 return; 2545} 2546 2547/* 2548 * pmap_prefault provides a quick way of clustering 2549 * pagefaults into a processes address space. It is a "cousin" 2550 * of pmap_object_init_pt, except it runs at page fault time instead 2551 * of mmap time. 2552 */ 2553#define PFBAK 4 2554#define PFFOR 4 2555#define PAGEORDER_SIZE (PFBAK+PFFOR) 2556 2557static int pmap_prefault_pageorder[] = { 2558 -PAGE_SIZE, PAGE_SIZE, 2559 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2560 -3 * PAGE_SIZE, 3 * PAGE_SIZE 2561 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2562}; 2563 2564void 2565pmap_prefault(pmap, addra, entry) 2566 pmap_t pmap; 2567 vm_offset_t addra; 2568 vm_map_entry_t entry; 2569{ 2570 int i; 2571 vm_offset_t starta; 2572 vm_offset_t addr; 2573 vm_pindex_t pindex; 2574 vm_page_t m, mpte; 2575 vm_object_t object; 2576 2577 if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) 2578 return; 2579 2580 object = entry->object.vm_object; 2581 2582 starta = addra - PFBAK * PAGE_SIZE; 2583 if (starta < entry->start) { 2584 starta = entry->start; 2585 } else if (starta > addra) { 2586 starta = 0; 2587 } 2588 2589 mpte = NULL; 2590 for (i = 0; i < PAGEORDER_SIZE; i++) { 2591 vm_object_t lobject; 2592 unsigned *pte; 2593 2594 addr = addra + pmap_prefault_pageorder[i]; 2595 if (addr > addra + (PFFOR * PAGE_SIZE)) 2596 addr = 0; 2597 2598 if (addr < starta || addr >= entry->end) 2599 continue; 2600 2601 if ((*pmap_pde(pmap, addr)) == NULL) 2602 continue; 2603 2604 pte = (unsigned *) vtopte(addr); 2605 if (*pte) 2606 continue; 2607 2608 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2609 lobject = object; 2610 for (m = vm_page_lookup(lobject, pindex); 2611 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2612 lobject = lobject->backing_object) { 2613 if (lobject->backing_object_offset & PAGE_MASK) 2614 break; 2615 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2616 m = vm_page_lookup(lobject->backing_object, pindex); 2617 } 2618 2619 /* 2620 * give-up when a page is not in memory 2621 */ 2622 if (m == NULL) 2623 break; 2624 2625 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2626 (m->busy == 0) && 2627 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2628 2629 if ((m->queue - m->pc) == PQ_CACHE) { 2630 vm_page_deactivate(m); 2631 } 2632 m->flags |= PG_BUSY; 2633 mpte = pmap_enter_quick(pmap, addr, 2634 VM_PAGE_TO_PHYS(m), mpte); 2635 m->flags |= PG_MAPPED; 2636 vm_page_wakeup(m); 2637 } 2638 } 2639} 2640 2641/* 2642 * Routine: pmap_change_wiring 2643 * Function: Change the wiring attribute for a map/virtual-address 2644 * pair. 2645 * In/out conditions: 2646 * The mapping must already exist in the pmap. 2647 */ 2648void 2649pmap_change_wiring(pmap, va, wired) 2650 register pmap_t pmap; 2651 vm_offset_t va; 2652 boolean_t wired; 2653{ 2654 register unsigned *pte; 2655 2656 if (pmap == NULL) 2657 return; 2658 2659 pte = pmap_pte(pmap, va); 2660 2661 if (wired && !pmap_pte_w(pte)) 2662 pmap->pm_stats.wired_count++; 2663 else if (!wired && pmap_pte_w(pte)) 2664 pmap->pm_stats.wired_count--; 2665 2666 /* 2667 * Wiring is not a hardware characteristic so there is no need to 2668 * invalidate TLB. 2669 */ 2670 pmap_pte_set_w(pte, wired); 2671} 2672 2673 2674 2675/* 2676 * Copy the range specified by src_addr/len 2677 * from the source map to the range dst_addr/len 2678 * in the destination map. 2679 * 2680 * This routine is only advisory and need not do anything. 2681 */ 2682 2683void 2684pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 2685 pmap_t dst_pmap, src_pmap; 2686 vm_offset_t dst_addr; 2687 vm_size_t len; 2688 vm_offset_t src_addr; 2689{ 2690 vm_offset_t addr; 2691 vm_offset_t end_addr = src_addr + len; 2692 vm_offset_t pdnxt; 2693 unsigned src_frame, dst_frame; 2694 2695 if (dst_addr != src_addr) 2696 return; 2697 2698 src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; 2699 if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) { 2700 return; 2701 } 2702 2703 dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME; 2704 if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) { 2705 APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V); 2706 invltlb(); 2707 } 2708 2709 for(addr = src_addr; addr < end_addr; addr = pdnxt) { 2710 unsigned *src_pte, *dst_pte; 2711 vm_page_t dstmpte, srcmpte; 2712 vm_offset_t srcptepaddr; 2713 unsigned ptepindex; 2714 2715#if !defined(MAX_PERF) 2716 if (addr >= UPT_MIN_ADDRESS) 2717 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2718#endif 2719 2720 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); 2721 ptepindex = addr >> PDRSHIFT; 2722 2723 srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; 2724 if (srcptepaddr == 0) 2725 continue; 2726 2727 if (srcptepaddr & PG_PS) { 2728 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2729 dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr; 2730 dst_pmap->pm_stats.resident_count += NBPDR; 2731 } 2732 continue; 2733 } 2734 2735 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2736 if ((srcmpte == NULL) || 2737 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2738 continue; 2739 2740 if (pdnxt > end_addr) 2741 pdnxt = end_addr; 2742 2743 src_pte = (unsigned *) vtopte(addr); 2744 dst_pte = (unsigned *) avtopte(addr); 2745 while (addr < pdnxt) { 2746 unsigned ptetemp; 2747 ptetemp = *src_pte; 2748 /* 2749 * we only virtual copy managed pages 2750 */ 2751 if ((ptetemp & PG_MANAGED) != 0) { 2752 /* 2753 * We have to check after allocpte for the 2754 * pte still being around... allocpte can 2755 * block. 2756 */ 2757 dstmpte = pmap_allocpte(dst_pmap, addr); 2758 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2759 /* 2760 * Clear the modified and 2761 * accessed (referenced) bits 2762 * during the copy. 2763 */ 2764 *dst_pte = ptetemp & ~(PG_M | PG_A); 2765 dst_pmap->pm_stats.resident_count++; 2766 pmap_insert_entry(dst_pmap, addr, 2767 dstmpte, 2768 (ptetemp & PG_FRAME)); 2769 } else { 2770 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2771 } 2772 if (dstmpte->hold_count >= srcmpte->hold_count) 2773 break; 2774 } 2775 addr += PAGE_SIZE; 2776 src_pte++; 2777 dst_pte++; 2778 } 2779 } 2780} 2781 2782/* 2783 * Routine: pmap_kernel 2784 * Function: 2785 * Returns the physical map handle for the kernel. 2786 */ 2787pmap_t 2788pmap_kernel() 2789{ 2790 return (kernel_pmap); 2791} 2792 2793/* 2794 * pmap_zero_page zeros the specified (machine independent) 2795 * page by mapping the page into virtual memory and using 2796 * bzero to clear its contents, one machine dependent page 2797 * at a time. 2798 */ 2799void 2800pmap_zero_page(phys) 2801 vm_offset_t phys; 2802{ 2803#ifdef SMP 2804#if !defined(MAX_PERF) 2805 if (*(int *) prv_CMAP3) 2806 panic("pmap_zero_page: prv_CMAP3 busy"); 2807#endif 2808 2809 *(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2810 cpu_invlpg(&prv_CPAGE3); 2811 2812#if defined(I686_CPU) 2813 if (cpu == CPU_686) 2814 i686_pagezero(&prv_CPAGE3); 2815 else 2816#endif 2817 bzero(&prv_CPAGE3, PAGE_SIZE); 2818 2819 *(int *) prv_CMAP3 = 0; 2820#else 2821#if !defined(MAX_PERF) 2822 if (*(int *) CMAP2) 2823 panic("pmap_zero_page: CMAP2 busy"); 2824#endif 2825 2826 *(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2827 if (cpu_class == CPUCLASS_386) { 2828 invltlb(); 2829 } else { 2830 invlpg((u_int)CADDR2); 2831 } 2832 2833#if defined(I686_CPU) 2834 if (cpu == CPU_686) 2835 i686_pagezero(CADDR2); 2836 else 2837#endif 2838 bzero(CADDR2, PAGE_SIZE); 2839 *(int *) CMAP2 = 0; 2840#endif 2841} 2842 2843/* 2844 * pmap_copy_page copies the specified (machine independent) 2845 * page by mapping the page into virtual memory and using 2846 * bcopy to copy the page, one machine dependent page at a 2847 * time. 2848 */ 2849void 2850pmap_copy_page(src, dst) 2851 vm_offset_t src; 2852 vm_offset_t dst; 2853{ 2854#ifdef SMP 2855#if !defined(MAX_PERF) 2856 if (*(int *) prv_CMAP1) 2857 panic("pmap_copy_page: prv_CMAP1 busy"); 2858 if (*(int *) prv_CMAP2) 2859 panic("pmap_copy_page: prv_CMAP2 busy"); 2860#endif 2861 2862 *(int *) prv_CMAP1 = PG_V | (src & PG_FRAME) | PG_A; 2863 *(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; 2864 2865 cpu_invlpg(&prv_CPAGE1); 2866 cpu_invlpg(&prv_CPAGE2); 2867 2868 bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE); 2869 2870 *(int *) prv_CMAP1 = 0; 2871 *(int *) prv_CMAP2 = 0; 2872#else 2873#if !defined(MAX_PERF) 2874 if (*(int *) CMAP1 || *(int *) CMAP2) 2875 panic("pmap_copy_page: CMAP busy"); 2876#endif 2877 2878 *(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A; 2879 *(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; 2880 if (cpu_class == CPUCLASS_386) { 2881 invltlb(); 2882 } else { 2883 invlpg((u_int)CADDR1); 2884 invlpg((u_int)CADDR2); 2885 } 2886 2887 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2888 2889 *(int *) CMAP1 = 0; 2890 *(int *) CMAP2 = 0; 2891#endif 2892} 2893 2894 2895/* 2896 * Routine: pmap_pageable 2897 * Function: 2898 * Make the specified pages (by pmap, offset) 2899 * pageable (or not) as requested. 2900 * 2901 * A page which is not pageable may not take 2902 * a fault; therefore, its page table entry 2903 * must remain valid for the duration. 2904 * 2905 * This routine is merely advisory; pmap_enter 2906 * will specify that these pages are to be wired 2907 * down (or not) as appropriate. 2908 */ 2909void 2910pmap_pageable(pmap, sva, eva, pageable) 2911 pmap_t pmap; 2912 vm_offset_t sva, eva; 2913 boolean_t pageable; 2914{ 2915} 2916 2917/* 2918 * this routine returns true if a physical page resides 2919 * in the given pmap. 2920 */ 2921boolean_t 2922pmap_page_exists(pmap, pa) 2923 pmap_t pmap; 2924 vm_offset_t pa; 2925{ 2926 register pv_entry_t pv; 2927 pv_table_t *ppv; 2928 int s; 2929 2930 if (!pmap_is_managed(pa)) 2931 return FALSE; 2932 2933 s = splvm(); 2934 2935 ppv = pa_to_pvh(pa); 2936 /* 2937 * Not found, check current mappings returning immediately if found. 2938 */ 2939 for (pv = TAILQ_FIRST(&ppv->pv_list); 2940 pv; 2941 pv = TAILQ_NEXT(pv, pv_list)) { 2942 if (pv->pv_pmap == pmap) { 2943 splx(s); 2944 return TRUE; 2945 } 2946 } 2947 splx(s); 2948 return (FALSE); 2949} 2950 2951#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2952/* 2953 * Remove all pages from specified address space 2954 * this aids process exit speeds. Also, this code 2955 * is special cased for current process only, but 2956 * can have the more generic (and slightly slower) 2957 * mode enabled. This is much faster than pmap_remove 2958 * in the case of running down an entire address space. 2959 */ 2960void 2961pmap_remove_pages(pmap, sva, eva) 2962 pmap_t pmap; 2963 vm_offset_t sva, eva; 2964{ 2965 unsigned *pte, tpte; 2966 pv_table_t *ppv; 2967 pv_entry_t pv, npv; 2968 int s; 2969 2970#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2971 if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) { 2972 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2973 return; 2974 } 2975#endif 2976 2977 s = splvm(); 2978 for(pv = TAILQ_FIRST(&pmap->pm_pvlist); 2979 pv; 2980 pv = npv) { 2981 2982 if (pv->pv_va >= eva || pv->pv_va < sva) { 2983 npv = TAILQ_NEXT(pv, pv_plist); 2984 continue; 2985 } 2986 2987#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2988 pte = (unsigned *)vtopte(pv->pv_va); 2989#else 2990 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2991#endif 2992 tpte = *pte; 2993 2994/* 2995 * We cannot remove wired pages from a process' mapping at this time 2996 */ 2997 if (tpte & PG_W) { 2998 npv = TAILQ_NEXT(pv, pv_plist); 2999 continue; 3000 } 3001 *pte = 0; 3002 3003 ppv = pa_to_pvh(tpte); 3004 3005 pv->pv_pmap->pm_stats.resident_count--; 3006 3007 /* 3008 * Update the vm_page_t clean and reference bits. 3009 */ 3010 if (tpte & PG_M) { 3011 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 3012 } 3013 3014 3015 npv = TAILQ_NEXT(pv, pv_plist); 3016 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 3017 3018 ppv->pv_list_count--; 3019 TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); 3020 if (TAILQ_FIRST(&ppv->pv_list) == NULL) { 3021 ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE); 3022 } 3023 3024 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 3025 free_pv_entry(pv); 3026 } 3027 splx(s); 3028 invltlb(); 3029} 3030 3031/* 3032 * pmap_testbit tests bits in pte's 3033 * note that the testbit/changebit routines are inline, 3034 * and a lot of things compile-time evaluate. 3035 */ 3036static boolean_t 3037pmap_testbit(pa, bit) 3038 register vm_offset_t pa; 3039 int bit; 3040{ 3041 register pv_entry_t pv; 3042 pv_table_t *ppv; 3043 unsigned *pte; 3044 int s; 3045 3046 if (!pmap_is_managed(pa)) 3047 return FALSE; 3048 3049 ppv = pa_to_pvh(pa); 3050 if (TAILQ_FIRST(&ppv->pv_list) == NULL) 3051 return FALSE; 3052 3053 s = splvm(); 3054 3055 for (pv = TAILQ_FIRST(&ppv->pv_list); 3056 pv; 3057 pv = TAILQ_NEXT(pv, pv_list)) { 3058 3059 /* 3060 * if the bit being tested is the modified bit, then 3061 * mark clean_map and ptes as never 3062 * modified. 3063 */ 3064 if (bit & (PG_A|PG_M)) { 3065 if (!pmap_track_modified(pv->pv_va)) 3066 continue; 3067 } 3068 3069#if defined(PMAP_DIAGNOSTIC) 3070 if (!pv->pv_pmap) { 3071 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 3072 continue; 3073 } 3074#endif 3075 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3076 if (*pte & bit) { 3077 splx(s); 3078 return TRUE; 3079 } 3080 } 3081 splx(s); 3082 return (FALSE); 3083} 3084 3085/* 3086 * this routine is used to modify bits in ptes 3087 */ 3088static void 3089pmap_changebit(pa, bit, setem) 3090 vm_offset_t pa; 3091 int bit; 3092 boolean_t setem; 3093{ 3094 register pv_entry_t pv; 3095 pv_table_t *ppv; 3096 register unsigned *pte; 3097 int changed; 3098 int s; 3099 3100 if (!pmap_is_managed(pa)) 3101 return; 3102 3103 s = splvm(); 3104 changed = 0; 3105 ppv = pa_to_pvh(pa); 3106 3107 /* 3108 * Loop over all current mappings setting/clearing as appropos If 3109 * setting RO do we need to clear the VAC? 3110 */ 3111 for (pv = TAILQ_FIRST(&ppv->pv_list); 3112 pv; 3113 pv = TAILQ_NEXT(pv, pv_list)) { 3114 3115 /* 3116 * don't write protect pager mappings 3117 */ 3118 if (!setem && (bit == PG_RW)) { 3119 if (!pmap_track_modified(pv->pv_va)) 3120 continue; 3121 } 3122 3123#if defined(PMAP_DIAGNOSTIC) 3124 if (!pv->pv_pmap) { 3125 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 3126 continue; 3127 } 3128#endif 3129 3130 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3131 3132 if (setem) { 3133 *(int *)pte |= bit; 3134 changed = 1; 3135 } else { 3136 vm_offset_t pbits = *(vm_offset_t *)pte; 3137 if (pbits & bit) { 3138 changed = 1; 3139 if (bit == PG_RW) { 3140 if (pbits & PG_M) { 3141 ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL; 3142 } 3143 *(int *)pte = pbits & ~(PG_M|PG_RW); 3144 } else { 3145 *(int *)pte = pbits & ~bit; 3146 } 3147 } 3148 } 3149 } 3150 splx(s); 3151 if (changed) 3152 invltlb(); 3153} 3154 3155/* 3156 * pmap_page_protect: 3157 * 3158 * Lower the permission for all mappings to a given page. 3159 */ 3160void 3161pmap_page_protect(vm_offset_t phys, vm_prot_t prot) 3162{ 3163 if ((prot & VM_PROT_WRITE) == 0) { 3164 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3165 pmap_changebit(phys, PG_RW, FALSE); 3166 } else { 3167 pmap_remove_all(phys); 3168 } 3169 } 3170} 3171 3172vm_offset_t 3173pmap_phys_address(ppn) 3174 int ppn; 3175{ 3176 return (i386_ptob(ppn)); 3177} 3178 3179/* 3180 * pmap_ts_referenced: 3181 * 3182 * Return the count of reference bits for a page, clearing all of them. 3183 * 3184 */ 3185int 3186pmap_ts_referenced(vm_offset_t pa) 3187{ 3188 register pv_entry_t pv; 3189 pv_table_t *ppv; 3190 unsigned *pte; 3191 int s; 3192 int rtval = 0; 3193 3194 if (!pmap_is_managed(pa)) 3195 return FALSE; 3196 3197 s = splvm(); 3198 3199 ppv = pa_to_pvh(pa); 3200 3201 if (TAILQ_FIRST(&ppv->pv_list) == NULL) { 3202 splx(s); 3203 return 0; 3204 } 3205 3206 /* 3207 * Not found, check current mappings returning immediately if found. 3208 */ 3209 for (pv = TAILQ_FIRST(&ppv->pv_list); 3210 pv; 3211 pv = TAILQ_NEXT(pv, pv_list)) { 3212 3213 TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); 3214 /* 3215 * if the bit being tested is the modified bit, then 3216 * mark clean_map and ptes as never 3217 * modified. 3218 */ 3219 if (!pmap_track_modified(pv->pv_va)) { 3220 TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); 3221 continue; 3222 } 3223 3224 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3225 if (pte == NULL) { 3226 TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); 3227 continue; 3228 } 3229 3230 if (*pte & PG_A) { 3231 rtval++; 3232 *pte &= ~PG_A; 3233 if (rtval > 4) { 3234 TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); 3235 break; 3236 } 3237 } 3238 TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); 3239 } 3240 3241 splx(s); 3242 if (rtval) { 3243 invltlb(); 3244 } 3245 return (rtval); 3246} 3247 3248/* 3249 * pmap_is_modified: 3250 * 3251 * Return whether or not the specified physical page was modified 3252 * in any physical maps. 3253 */ 3254boolean_t 3255pmap_is_modified(vm_offset_t pa) 3256{ 3257 return pmap_testbit((pa), PG_M); 3258} 3259 3260/* 3261 * Clear the modify bits on the specified physical page. 3262 */ 3263void 3264pmap_clear_modify(vm_offset_t pa) 3265{ 3266 pmap_changebit((pa), PG_M, FALSE); 3267} 3268 3269/* 3270 * pmap_clear_reference: 3271 * 3272 * Clear the reference bit on the specified physical page. 3273 */ 3274void 3275pmap_clear_reference(vm_offset_t pa) 3276{ 3277 pmap_changebit((pa), PG_A, FALSE); 3278} 3279 3280/* 3281 * Miscellaneous support routines follow 3282 */ 3283 3284static void 3285i386_protection_init() 3286{ 3287 register int *kp, prot; 3288 3289 kp = protection_codes; 3290 for (prot = 0; prot < 8; prot++) { 3291 switch (prot) { 3292 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 3293 /* 3294 * Read access is also 0. There isn't any execute bit, 3295 * so just make it readable. 3296 */ 3297 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 3298 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 3299 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 3300 *kp++ = 0; 3301 break; 3302 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 3303 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 3304 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 3305 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 3306 *kp++ = PG_RW; 3307 break; 3308 } 3309 } 3310} 3311 3312/* 3313 * Map a set of physical memory pages into the kernel virtual 3314 * address space. Return a pointer to where it is mapped. This 3315 * routine is intended to be used for mapping device memory, 3316 * NOT real memory. 3317 */ 3318void * 3319pmap_mapdev(pa, size) 3320 vm_offset_t pa; 3321 vm_size_t size; 3322{ 3323 vm_offset_t va, tmpva; 3324 unsigned *pte; 3325 3326 size = roundup(size, PAGE_SIZE); 3327 3328 va = kmem_alloc_pageable(kernel_map, size); 3329#if !defined(MAX_PERF) 3330 if (!va) 3331 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3332#endif 3333 3334 pa = pa & PG_FRAME; 3335 for (tmpva = va; size > 0;) { 3336 pte = (unsigned *)vtopte(tmpva); 3337 *pte = pa | PG_RW | PG_V | pgeflag; 3338 size -= PAGE_SIZE; 3339 tmpva += PAGE_SIZE; 3340 pa += PAGE_SIZE; 3341 } 3342 invltlb(); 3343 3344 return ((void *) va); 3345} 3346 3347/* 3348 * perform the pmap work for mincore 3349 */ 3350int 3351pmap_mincore(pmap, addr) 3352 pmap_t pmap; 3353 vm_offset_t addr; 3354{ 3355 3356 unsigned *ptep, pte; 3357 vm_page_t m; 3358 int val = 0; 3359 3360 ptep = pmap_pte(pmap, addr); 3361 if (ptep == 0) { 3362 return 0; 3363 } 3364 3365 if (pte = *ptep) { 3366 pv_table_t *ppv; 3367 vm_offset_t pa; 3368 3369 val = MINCORE_INCORE; 3370 if ((pte & PG_MANAGED) == 0) 3371 return val; 3372 3373 pa = pte & PG_FRAME; 3374 3375 ppv = pa_to_pvh((pa & PG_FRAME)); 3376 m = ppv->pv_vm_page; 3377 3378 /* 3379 * Modified by us 3380 */ 3381 if (pte & PG_M) 3382 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3383 /* 3384 * Modified by someone 3385 */ 3386 else if (m->dirty || pmap_is_modified(pa)) 3387 val |= MINCORE_MODIFIED_OTHER; 3388 /* 3389 * Referenced by us 3390 */ 3391 if (pte & PG_A) 3392 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3393 3394 /* 3395 * Referenced by someone 3396 */ 3397 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) { 3398 val |= MINCORE_REFERENCED_OTHER; 3399 m->flags |= PG_REFERENCED; 3400 } 3401 } 3402 return val; 3403} 3404 3405void 3406pmap_activate(struct proc *p) 3407{ 3408#if defined(SWTCH_OPTIM_STATS) 3409 tlb_flush_count++; 3410#endif 3411 load_cr3(p->p_addr->u_pcb.pcb_cr3 = 3412 vtophys(p->p_vmspace->vm_pmap.pm_pdir)); 3413} 3414 3415vm_offset_t 3416pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) { 3417 3418 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3419 return addr; 3420 } 3421 3422 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3423 return addr; 3424} 3425 3426 3427#if defined(PMAP_DEBUG) 3428pmap_pid_dump(int pid) { 3429 pmap_t pmap; 3430 struct proc *p; 3431 int npte = 0; 3432 int index; 3433 for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) { 3434 if (p->p_pid != pid) 3435 continue; 3436 3437 if (p->p_vmspace) { 3438 int i,j; 3439 index = 0; 3440 pmap = &p->p_vmspace->vm_pmap; 3441 for(i=0;i<1024;i++) { 3442 pd_entry_t *pde; 3443 unsigned *pte; 3444 unsigned base = i << PDRSHIFT; 3445 3446 pde = &pmap->pm_pdir[i]; 3447 if (pde && pmap_pde_v(pde)) { 3448 for(j=0;j<1024;j++) { 3449 unsigned va = base + (j << PAGE_SHIFT); 3450 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3451 if (index) { 3452 index = 0; 3453 printf("\n"); 3454 } 3455 return npte; 3456 } 3457 pte = pmap_pte_quick( pmap, va); 3458 if (pte && pmap_pte_v(pte)) { 3459 vm_offset_t pa; 3460 vm_page_t m; 3461 pa = *(int *)pte; 3462 m = PHYS_TO_VM_PAGE((pa & PG_FRAME)); 3463 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3464 va, pa, m->hold_count, m->wire_count, m->flags); 3465 npte++; 3466 index++; 3467 if (index >= 2) { 3468 index = 0; 3469 printf("\n"); 3470 } else { 3471 printf(" "); 3472 } 3473 } 3474 } 3475 } 3476 } 3477 } 3478 } 3479 return npte; 3480} 3481#endif 3482 3483#if defined(DEBUG) 3484 3485static void pads __P((pmap_t pm)); 3486static void pmap_pvdump __P((vm_offset_t pa)); 3487 3488/* print address space of pmap*/ 3489static void 3490pads(pm) 3491 pmap_t pm; 3492{ 3493 unsigned va, i, j; 3494 unsigned *ptep; 3495 3496 if (pm == kernel_pmap) 3497 return; 3498 for (i = 0; i < 1024; i++) 3499 if (pm->pm_pdir[i]) 3500 for (j = 0; j < 1024; j++) { 3501 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3502 if (pm == kernel_pmap && va < KERNBASE) 3503 continue; 3504 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3505 continue; 3506 ptep = pmap_pte_quick(pm, va); 3507 if (pmap_pte_v(ptep)) 3508 printf("%x:%x ", va, *(int *) ptep); 3509 }; 3510 3511} 3512 3513static void 3514pmap_pvdump(pa) 3515 vm_offset_t pa; 3516{ 3517 pv_table_t *ppv; 3518 register pv_entry_t pv; 3519 3520 printf("pa %x", pa); 3521 ppv = pa_to_pvh(pa); 3522 for (pv = TAILQ_FIRST(&ppv->pv_list); 3523 pv; 3524 pv = TAILQ_NEXT(pv, pv_list)) { 3525#ifdef used_to_be 3526 printf(" -> pmap %p, va %x, flags %x", 3527 (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags); 3528#endif 3529 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3530 pads(pv->pv_pmap); 3531 } 3532 printf(" "); 3533} 3534#endif 3535