pmap.c revision 91341
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $FreeBSD: head/sys/i386/i386/pmap.c 91341 2002-02-27 01:48:17Z dillon $ 43 */ 44 45/* 46 * Manages physical address maps. 47 * 48 * In addition to hardware address maps, this 49 * module is called upon to provide software-use-only 50 * maps which may or may not be stored in the same 51 * form as hardware maps. These pseudo-maps are 52 * used to store intermediate results from copy 53 * operations to and from address spaces. 54 * 55 * Since the information managed by this module is 56 * also stored by the logical address mapping module, 57 * this module may throw away valid virtual-to-physical 58 * mappings at almost any time. However, invalidations 59 * of virtual-to-physical mappings must be done as 60 * requested. 61 * 62 * In order to cope with hardware architectures which 63 * make virtual-to-physical map invalidates expensive, 64 * this module may delay invalidate or reduced protection 65 * operations until such time as they are actually 66 * necessary. This module is given full information as 67 * to which processors are currently using which maps, 68 * and to when physical maps must be made correct. 69 */ 70 71#include "opt_disable_pse.h" 72#include "opt_pmap.h" 73#include "opt_msgbuf.h" 74#include "opt_kstack_pages.h" 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/kernel.h> 79#include <sys/lock.h> 80#include <sys/mman.h> 81#include <sys/msgbuf.h> 82#include <sys/mutex.h> 83#include <sys/proc.h> 84#include <sys/sx.h> 85#include <sys/user.h> 86#include <sys/vmmeter.h> 87#include <sys/sysctl.h> 88#if defined(SMP) 89#include <sys/smp.h> 90#endif 91 92#include <vm/vm.h> 93#include <vm/vm_param.h> 94#include <vm/vm_kern.h> 95#include <vm/vm_page.h> 96#include <vm/vm_map.h> 97#include <vm/vm_object.h> 98#include <vm/vm_extern.h> 99#include <vm/vm_pageout.h> 100#include <vm/vm_pager.h> 101#include <vm/vm_zone.h> 102 103#include <machine/cputypes.h> 104#include <machine/md_var.h> 105#include <machine/specialreg.h> 106#if defined(SMP) || defined(APIC_IO) 107#include <machine/apic.h> 108#include <machine/segments.h> 109#include <machine/tss.h> 110#endif /* SMP || APIC_IO */ 111 112#define PMAP_KEEP_PDIRS 113#ifndef PMAP_SHPGPERPROC 114#define PMAP_SHPGPERPROC 200 115#endif 116 117#if defined(DIAGNOSTIC) 118#define PMAP_DIAGNOSTIC 119#endif 120 121#define MINPV 2048 122 123#if !defined(PMAP_DIAGNOSTIC) 124#define PMAP_INLINE __inline 125#else 126#define PMAP_INLINE 127#endif 128 129/* 130 * Get PDEs and PTEs for user/kernel address space 131 */ 132#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 133#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 134 135#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 136#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 137#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 138#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 139#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 140 141#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 142#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 143 144/* 145 * Given a map and a machine independent protection code, 146 * convert to a vax protection code. 147 */ 148#define pte_prot(m, p) (protection_codes[p]) 149static int protection_codes[8]; 150 151static struct pmap kernel_pmap_store; 152pmap_t kernel_pmap; 153LIST_HEAD(pmaplist, pmap); 154struct pmaplist allpmaps; 155 156vm_offset_t avail_start; /* PA of first available physical page */ 157vm_offset_t avail_end; /* PA of last available physical page */ 158vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 159vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 160static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 161static int pgeflag; /* PG_G or-in */ 162static int pseflag; /* PG_PS or-in */ 163 164static vm_object_t kptobj; 165 166static int nkpt; 167vm_offset_t kernel_vm_end; 168 169/* 170 * Data for the pv entry allocation mechanism 171 */ 172static vm_zone_t pvzone; 173static struct vm_zone pvzone_store; 174static struct vm_object pvzone_obj; 175static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 176static int pmap_pagedaemon_waken = 0; 177static struct pv_entry *pvinit; 178 179/* 180 * All those kernel PT submaps that BSD is so fond of 181 */ 182pt_entry_t *CMAP1 = 0; 183static pt_entry_t *CMAP2, *ptmmap; 184caddr_t CADDR1 = 0, ptvmmap = 0; 185static caddr_t CADDR2; 186static pt_entry_t *msgbufmap; 187struct msgbuf *msgbufp = 0; 188 189/* 190 * Crashdump maps. 191 */ 192static pt_entry_t *pt_crashdumpmap; 193static caddr_t crashdumpmap; 194 195#ifdef SMP 196extern pt_entry_t *SMPpt; 197#endif 198static pt_entry_t *PMAP1 = 0; 199static pt_entry_t *PADDR1 = 0; 200 201static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv)); 202static pt_entry_t *get_ptbase __P((pmap_t pmap)); 203static pv_entry_t get_pv_entry __P((void)); 204static void i386_protection_init __P((void)); 205static __inline void pmap_changebit __P((vm_page_t m, int bit, boolean_t setem)); 206 207static void pmap_remove_all __P((vm_page_t m)); 208static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, 209 vm_page_t m, vm_page_t mpte)); 210static int pmap_remove_pte __P((pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva)); 211static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); 212static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m, 213 vm_offset_t va)); 214static boolean_t pmap_testbit __P((vm_page_t m, int bit)); 215static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, 216 vm_page_t mpte, vm_page_t m)); 217 218static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); 219 220static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); 221static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex)); 222static pt_entry_t *pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); 223static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); 224static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); 225static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 226 227static pd_entry_t pdir4mb; 228 229/* 230 * Routine: pmap_pte 231 * Function: 232 * Extract the page table entry associated 233 * with the given map/virtual_address pair. 234 */ 235 236PMAP_INLINE pt_entry_t * 237pmap_pte(pmap, va) 238 register pmap_t pmap; 239 vm_offset_t va; 240{ 241 pd_entry_t *pdeaddr; 242 243 if (pmap) { 244 pdeaddr = pmap_pde(pmap, va); 245 if (*pdeaddr & PG_PS) 246 return pdeaddr; 247 if (*pdeaddr) { 248 return get_ptbase(pmap) + i386_btop(va); 249 } 250 } 251 return (0); 252} 253 254/* 255 * Move the kernel virtual free pointer to the next 256 * 4MB. This is used to help improve performance 257 * by using a large (4MB) page for much of the kernel 258 * (.text, .data, .bss) 259 */ 260static vm_offset_t 261pmap_kmem_choose(vm_offset_t addr) 262{ 263 vm_offset_t newaddr = addr; 264 265#ifndef DISABLE_PSE 266 if (cpu_feature & CPUID_PSE) 267 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 268#endif 269 return newaddr; 270} 271 272/* 273 * Bootstrap the system enough to run with virtual memory. 274 * 275 * On the i386 this is called after mapping has already been enabled 276 * and just syncs the pmap module with what has already been done. 277 * [We can't call it easily with mapping off since the kernel is not 278 * mapped with PA == VA, hence we would have to relocate every address 279 * from the linked base (virtual) address "KERNBASE" to the actual 280 * (physical) address starting relative to 0] 281 */ 282void 283pmap_bootstrap(firstaddr, loadaddr) 284 vm_offset_t firstaddr; 285 vm_offset_t loadaddr; 286{ 287 vm_offset_t va; 288 pt_entry_t *pte; 289 int i; 290 291 avail_start = firstaddr; 292 293 /* 294 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 295 * large. It should instead be correctly calculated in locore.s and 296 * not based on 'first' (which is a physical address, not a virtual 297 * address, for the start of unused physical memory). The kernel 298 * page tables are NOT double mapped and thus should not be included 299 * in this calculation. 300 */ 301 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 302 virtual_avail = pmap_kmem_choose(virtual_avail); 303 304 virtual_end = VM_MAX_KERNEL_ADDRESS; 305 306 /* 307 * Initialize protection array. 308 */ 309 i386_protection_init(); 310 311 /* 312 * The kernel's pmap is statically allocated so we don't have to use 313 * pmap_create, which is unlikely to work correctly at this part of 314 * the boot sequence (XXX and which no longer exists). 315 */ 316 kernel_pmap = &kernel_pmap_store; 317 318 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 319 kernel_pmap->pm_count = 1; 320 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 321 TAILQ_INIT(&kernel_pmap->pm_pvlist); 322 LIST_INIT(&allpmaps); 323 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 324 nkpt = NKPT; 325 326 /* 327 * Reserve some special page table entries/VA space for temporary 328 * mapping of pages. 329 */ 330#define SYSMAP(c, p, v, n) \ 331 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 332 333 va = virtual_avail; 334 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); 335 336 /* 337 * CMAP1/CMAP2 are used for zeroing and copying pages. 338 */ 339 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 340 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 341 342 /* 343 * Crashdump maps. 344 */ 345 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 346 347 /* 348 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 349 * XXX ptmmap is not used. 350 */ 351 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 352 353 /* 354 * msgbufp is used to map the system message buffer. 355 * XXX msgbufmap is not used. 356 */ 357 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 358 atop(round_page(MSGBUF_SIZE))) 359 360 /* 361 * ptemap is used for pmap_pte_quick 362 */ 363 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 364 365 virtual_avail = va; 366 367 *CMAP1 = *CMAP2 = 0; 368 for (i = 0; i < NKPT; i++) 369 PTD[i] = 0; 370 371 pgeflag = 0; 372#if !defined(SMP) || defined(ENABLE_PG_G) 373 if (cpu_feature & CPUID_PGE) 374 pgeflag = PG_G; 375#endif 376 377/* 378 * Initialize the 4MB page size flag 379 */ 380 pseflag = 0; 381/* 382 * The 4MB page version of the initial 383 * kernel page mapping. 384 */ 385 pdir4mb = 0; 386 387#ifndef DISABLE_PSE 388 if (cpu_feature & CPUID_PSE) { 389 pd_entry_t ptditmp; 390 /* 391 * Note that we have enabled PSE mode 392 */ 393 pseflag = PG_PS; 394 ptditmp = *(PTmap + i386_btop(KERNBASE)); 395 ptditmp &= ~(NBPDR - 1); 396 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 397 pdir4mb = ptditmp; 398 } 399#endif 400#ifndef SMP 401 /* 402 * Turn on PGE/PSE. SMP does this later on since the 403 * 4K page tables are required for AP boot (for now). 404 * XXX fixme. 405 */ 406 pmap_set_opt(); 407#endif 408#ifdef SMP 409 if (cpu_apic_address == 0) 410 panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); 411 /* local apic is mapped on last page */ 412 SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | 413 (cpu_apic_address & PG_FRAME)); 414#endif 415 cpu_invltlb(); 416} 417 418/* 419 * Enable 4MB page mode for MP startup. Turn on PG_G support. 420 * BSP will run this after all the AP's have started up. 421 */ 422void 423pmap_set_opt(void) 424{ 425 pt_entry_t *pte; 426 vm_offset_t va; 427 428 if (pgeflag && (cpu_feature & CPUID_PGE)) 429 load_cr4(rcr4() | CR4_PGE); 430#ifndef DISABLE_PSE 431 if (pseflag && (cpu_feature & CPUID_PSE)) 432 load_cr4(rcr4() | CR4_PSE); 433#endif 434 if (PCPU_GET(cpuid) == 0) { 435#ifndef DISABLE_PSE 436 if (pdir4mb) 437 kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; 438#endif 439 if (pgeflag) { 440 /* XXX see earlier comments about virtual_avail */ 441 for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE) 442 { 443 pte = vtopte(va); 444 if (*pte) 445 *pte |= pgeflag; 446 } 447 } 448 /* 449 * for SMP, this will cause all cpus to reload again, which 450 * is actually what we want since they now have CR4_PGE on. 451 */ 452 invltlb(); 453 } else 454 cpu_invltlb(); 455} 456 457/* 458 * Initialize the pmap module. 459 * Called by vm_init, to initialize any structures that the pmap 460 * system needs to map virtual memory. 461 * pmap_init has been enhanced to support in a fairly consistant 462 * way, discontiguous physical memory. 463 */ 464void 465pmap_init(phys_start, phys_end) 466 vm_offset_t phys_start, phys_end; 467{ 468 int i; 469 int initial_pvs; 470 471 /* 472 * object for kernel page table pages 473 */ 474 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); 475 476 /* 477 * Allocate memory for random pmap data structures. Includes the 478 * pv_head_table. 479 */ 480 481 for(i = 0; i < vm_page_array_size; i++) { 482 vm_page_t m; 483 484 m = &vm_page_array[i]; 485 TAILQ_INIT(&m->md.pv_list); 486 m->md.pv_list_count = 0; 487 } 488 489 /* 490 * init the pv free list 491 */ 492 initial_pvs = vm_page_array_size; 493 if (initial_pvs < MINPV) 494 initial_pvs = MINPV; 495 pvzone = &pvzone_store; 496 pvinit = (struct pv_entry *) kmem_alloc(kernel_map, 497 initial_pvs * sizeof (struct pv_entry)); 498 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 499 vm_page_array_size); 500 501 /* 502 * Now it is safe to enable pv_table recording. 503 */ 504 pmap_initialized = TRUE; 505} 506 507/* 508 * Initialize the address space (zone) for the pv_entries. Set a 509 * high water mark so that the system can recover from excessive 510 * numbers of pv entries. 511 */ 512void 513pmap_init2() 514{ 515 int shpgperproc = PMAP_SHPGPERPROC; 516 517 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 518 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 519 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 520 pv_entry_high_water = 9 * (pv_entry_max / 10); 521 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 522} 523 524 525/*************************************************** 526 * Low level helper routines..... 527 ***************************************************/ 528 529#if defined(PMAP_DIAGNOSTIC) 530 531/* 532 * This code checks for non-writeable/modified pages. 533 * This should be an invalid condition. 534 */ 535static int 536pmap_nw_modified(pt_entry_t ptea) 537{ 538 int pte; 539 540 pte = (int) ptea; 541 542 if ((pte & (PG_M|PG_RW)) == PG_M) 543 return 1; 544 else 545 return 0; 546} 547#endif 548 549 550/* 551 * this routine defines the region(s) of memory that should 552 * not be tested for the modified bit. 553 */ 554static PMAP_INLINE int 555pmap_track_modified(vm_offset_t va) 556{ 557 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 558 return 1; 559 else 560 return 0; 561} 562 563static __inline void 564pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 565{ 566#if defined(SMP) 567 u_int cpumask; 568 u_int other_cpus; 569 struct thread *td; 570 571 td = curthread; 572 critical_enter(); 573 /* 574 * We need to disable interrupt preemption but MUST NOT have 575 * interrupts disabled here. 576 * XXX we may need to hold schedlock to get a coherent pm_active 577 */ 578 if (td->td_critnest == 1) 579 cpu_critical_exit(td->td_savecrit); 580 if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { 581 invlpg(va); /* global */ 582 } else { 583 cpumask = PCPU_GET(cpumask); 584 other_cpus = PCPU_GET(other_cpus); 585 if (pmap->pm_active & cpumask) 586 cpu_invlpg(va); 587 if (pmap->pm_active & other_cpus) 588 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 589 } 590 critical_exit(); 591#else 592 if (pmap->pm_active) 593 cpu_invlpg(va); 594#endif 595} 596 597static __inline void 598pmap_invalidate_all(pmap_t pmap) 599{ 600#if defined(SMP) 601 u_int cpumask; 602 u_int other_cpus; 603 struct thread *td; 604 605 td = curthread; 606 critical_enter(); 607 /* 608 * We need to disable interrupt preemption but MUST NOT have 609 * interrupts disabled here. 610 * XXX we may need to hold schedlock to get a coherent pm_active 611 */ 612 if (td->td_critnest == 1) 613 cpu_critical_exit(td->td_savecrit); 614 if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { 615 invltlb(); /* global */ 616 } else { 617 cpumask = PCPU_GET(cpumask); 618 other_cpus = PCPU_GET(other_cpus); 619 if (pmap->pm_active & cpumask) 620 cpu_invltlb(); 621 if (pmap->pm_active & other_cpus) 622 smp_masked_invltlb(pmap->pm_active & other_cpus); 623 } 624 critical_exit(); 625#else 626 if (pmap->pm_active) 627 invltlb(); 628#endif 629} 630 631/* 632 * Return an address which is the base of the Virtual mapping of 633 * all the PTEs for the given pmap. Note this doesn't say that 634 * all the PTEs will be present or that the pages there are valid. 635 * The PTEs are made available by the recursive mapping trick. 636 * It will map in the alternate PTE space if needed. 637 */ 638static pt_entry_t * 639get_ptbase(pmap) 640 pmap_t pmap; 641{ 642 pd_entry_t frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME; 643 644 /* are we current address space or kernel? */ 645 if (pmap == kernel_pmap || frame == (PTDpde & PG_FRAME)) 646 return PTmap; 647 /* otherwise, we are alternate address space */ 648 if (frame != (APTDpde & PG_FRAME)) { 649 APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); 650 invltlb(); 651 } 652 return APTmap; 653} 654 655/* 656 * Super fast pmap_pte routine best used when scanning 657 * the pv lists. This eliminates many coarse-grained 658 * invltlb calls. Note that many of the pv list 659 * scans are across different pmaps. It is very wasteful 660 * to do an entire invltlb for checking a single mapping. 661 */ 662 663static pt_entry_t * 664pmap_pte_quick(pmap, va) 665 register pmap_t pmap; 666 vm_offset_t va; 667{ 668 pd_entry_t pde, newpf; 669 pde = pmap->pm_pdir[va >> PDRSHIFT]; 670 if (pde != 0) { 671 pd_entry_t frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME; 672 unsigned index = i386_btop(va); 673 /* are we current address space or kernel? */ 674 if (pmap == kernel_pmap || frame == (PTDpde & PG_FRAME)) 675 return PTmap + index; 676 newpf = pde & PG_FRAME; 677 if (((*PMAP1) & PG_FRAME) != newpf) { 678 *PMAP1 = newpf | PG_RW | PG_V; 679 pmap_invalidate_page(pmap, (vm_offset_t) PADDR1); 680 } 681 return PADDR1 + (index & (NPTEPG - 1)); 682 } 683 return (0); 684} 685 686/* 687 * Routine: pmap_extract 688 * Function: 689 * Extract the physical page address associated 690 * with the given map/virtual_address pair. 691 */ 692vm_offset_t 693pmap_extract(pmap, va) 694 register pmap_t pmap; 695 vm_offset_t va; 696{ 697 vm_offset_t rtval; /* XXX FIXME */ 698 vm_offset_t pdirindex; 699 700 if (pmap == 0) 701 return 0; 702 pdirindex = va >> PDRSHIFT; 703 rtval = pmap->pm_pdir[pdirindex]; 704 if (rtval != 0) { 705 pt_entry_t *pte; 706 if ((rtval & PG_PS) != 0) { 707 rtval &= ~(NBPDR - 1); 708 rtval |= va & (NBPDR - 1); 709 return rtval; 710 } 711 pte = get_ptbase(pmap) + i386_btop(va); 712 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 713 return rtval; 714 } 715 return 0; 716 717} 718 719/*************************************************** 720 * Low level mapping routines..... 721 ***************************************************/ 722 723/* 724 * add a wired page to the kva 725 */ 726PMAP_INLINE void 727pmap_kenter(vm_offset_t va, vm_offset_t pa) 728{ 729 pt_entry_t *pte; 730 pt_entry_t npte; 731 732 npte = pa | PG_RW | PG_V | pgeflag; 733 pte = vtopte(va); 734 *pte = npte; 735 invlpg(va); 736} 737 738/* 739 * remove a page from the kernel pagetables 740 */ 741PMAP_INLINE void 742pmap_kremove(vm_offset_t va) 743{ 744 register pt_entry_t *pte; 745 746 pte = vtopte(va); 747 *pte = 0; 748 invlpg(va); 749} 750 751/* 752 * Used to map a range of physical addresses into kernel 753 * virtual address space. 754 * 755 * The value passed in '*virt' is a suggested virtual address for 756 * the mapping. Architectures which can support a direct-mapped 757 * physical to virtual region can return the appropriate address 758 * within that region, leaving '*virt' unchanged. Other 759 * architectures should map the pages starting at '*virt' and 760 * update '*virt' with the first usable address after the mapped 761 * region. 762 */ 763vm_offset_t 764pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 765{ 766 vm_offset_t va, sva; 767 pt_entry_t *pte; 768 769 va = sva = *virt; 770 while (start < end) { 771 pte = vtopte(va); 772 *pte = start | PG_RW | PG_V | pgeflag; 773 va += PAGE_SIZE; 774 start += PAGE_SIZE; 775 } 776 invlpg_range(sva, end); 777 *virt = va; 778 return (sva); 779} 780 781 782/* 783 * Add a list of wired pages to the kva 784 * this routine is only used for temporary 785 * kernel mappings that do not need to have 786 * page modification or references recorded. 787 * Note that old mappings are simply written 788 * over. The page *must* be wired. 789 */ 790void 791pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 792{ 793 vm_offset_t va, end_va; 794 pt_entry_t *pte; 795 796 va = sva; 797 end_va = va + count * PAGE_SIZE; 798 799 while (va < end_va) { 800 pte = vtopte(va); 801 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; 802 va += PAGE_SIZE; 803 m++; 804 } 805 invlpg_range(sva, end_va); 806} 807 808/* 809 * this routine jerks page mappings from the 810 * kernel -- it is meant only for temporary mappings. 811 */ 812void 813pmap_qremove(vm_offset_t sva, int count) 814{ 815 pt_entry_t *pte; 816 vm_offset_t va, end_va; 817 818 va = sva; 819 end_va = va + count * PAGE_SIZE; 820 821 while (va < end_va) { 822 pte = vtopte(va); 823 *pte = 0; 824 va += PAGE_SIZE; 825 } 826 invlpg_range(sva, end_va); 827} 828 829static vm_page_t 830pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 831{ 832 vm_page_t m; 833retry: 834 m = vm_page_lookup(object, pindex); 835 if (m && vm_page_sleep_busy(m, FALSE, "pplookp")) 836 goto retry; 837 return m; 838} 839 840/* 841 * Create the Uarea stack for a new process. 842 * This routine directly affects the fork perf for a process. 843 */ 844void 845pmap_new_proc(struct proc *p) 846{ 847 int i; 848 vm_object_t upobj; 849 vm_offset_t up; 850 vm_page_t m; 851 pt_entry_t *ptek, oldpte; 852 853 /* 854 * allocate object for the upages 855 */ 856 upobj = p->p_upages_obj; 857 if (upobj == NULL) { 858 upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES); 859 p->p_upages_obj = upobj; 860 } 861 862 /* get a kernel virtual address for the U area for this thread */ 863 up = (vm_offset_t)p->p_uarea; 864 if (up == 0) { 865 up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE); 866 if (up == 0) 867 panic("pmap_new_proc: upage allocation failed"); 868 p->p_uarea = (struct user *)up; 869 } 870 871 ptek = vtopte(up); 872 873 for (i = 0; i < UAREA_PAGES; i++) { 874 /* 875 * Get a kernel stack page 876 */ 877 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 878 879 /* 880 * Wire the page 881 */ 882 m->wire_count++; 883 cnt.v_wire_count++; 884 885 oldpte = *(ptek + i); 886 /* 887 * Enter the page into the kernel address space. 888 */ 889 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 890 if (oldpte) 891 invlpg(up + i * PAGE_SIZE); 892 893 vm_page_wakeup(m); 894 vm_page_flag_clear(m, PG_ZERO); 895 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 896 m->valid = VM_PAGE_BITS_ALL; 897 } 898} 899 900/* 901 * Dispose the U-Area for a process that has exited. 902 * This routine directly impacts the exit perf of a process. 903 */ 904void 905pmap_dispose_proc(p) 906 struct proc *p; 907{ 908 int i; 909 vm_object_t upobj; 910 vm_offset_t up; 911 vm_page_t m; 912 pt_entry_t *ptek; 913 914 upobj = p->p_upages_obj; 915 up = (vm_offset_t)p->p_uarea; 916 ptek = vtopte(up); 917 for (i = 0; i < UAREA_PAGES; i++) { 918 m = vm_page_lookup(upobj, i); 919 if (m == NULL) 920 panic("pmap_dispose_proc: upage already missing?"); 921 vm_page_busy(m); 922 *(ptek + i) = 0; 923 invlpg(up + i * PAGE_SIZE); 924 vm_page_unwire(m, 0); 925 vm_page_free(m); 926 } 927} 928 929/* 930 * Allow the U_AREA for a process to be prejudicially paged out. 931 */ 932void 933pmap_swapout_proc(p) 934 struct proc *p; 935{ 936 int i; 937 vm_object_t upobj; 938 vm_offset_t up; 939 vm_page_t m; 940 941 upobj = p->p_upages_obj; 942 up = (vm_offset_t)p->p_uarea; 943 for (i = 0; i < UAREA_PAGES; i++) { 944 m = vm_page_lookup(upobj, i); 945 if (m == NULL) 946 panic("pmap_swapout_proc: upage already missing?"); 947 vm_page_dirty(m); 948 vm_page_unwire(m, 0); 949 pmap_kremove(up + i * PAGE_SIZE); 950 } 951} 952 953/* 954 * Bring the U-Area for a specified process back in. 955 */ 956void 957pmap_swapin_proc(p) 958 struct proc *p; 959{ 960 int i, rv; 961 vm_object_t upobj; 962 vm_offset_t up; 963 vm_page_t m; 964 965 upobj = p->p_upages_obj; 966 up = (vm_offset_t)p->p_uarea; 967 for (i = 0; i < UAREA_PAGES; i++) { 968 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 969 pmap_kenter(up + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 970 if (m->valid != VM_PAGE_BITS_ALL) { 971 rv = vm_pager_get_pages(upobj, &m, 1, 0); 972 if (rv != VM_PAGER_OK) 973 panic("pmap_swapin_proc: cannot get upage for proc: %d\n", p->p_pid); 974 m = vm_page_lookup(upobj, i); 975 m->valid = VM_PAGE_BITS_ALL; 976 } 977 vm_page_wire(m); 978 vm_page_wakeup(m); 979 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 980 } 981} 982 983/* 984 * Create the kernel stack (including pcb for i386) for a new thread. 985 * This routine directly affects the fork perf for a process and 986 * create performance for a thread. 987 */ 988void 989pmap_new_thread(struct thread *td) 990{ 991 int i; 992 vm_object_t ksobj; 993 vm_page_t m; 994 vm_offset_t ks; 995 pt_entry_t *ptek, oldpte; 996 997 /* 998 * allocate object for the kstack 999 */ 1000 ksobj = td->td_kstack_obj; 1001 if (ksobj == NULL) { 1002 ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES); 1003 td->td_kstack_obj = ksobj; 1004 } 1005 1006#ifdef KSTACK_GUARD 1007 /* get a kernel virtual address for the kstack for this thread */ 1008 ks = td->td_kstack; 1009 if (ks == 0) { 1010 ks = kmem_alloc_nofault(kernel_map, 1011 (KSTACK_PAGES + 1) * PAGE_SIZE); 1012 if (ks == 0) 1013 panic("pmap_new_thread: kstack allocation failed"); 1014 ks += PAGE_SIZE; 1015 td->td_kstack = ks; 1016 } 1017 1018 ptek = vtopte(ks - PAGE_SIZE); 1019 oldpte = *ptek; 1020 *ptek = 0; 1021 if (oldpte) 1022 invlpg(ks - PAGE_SIZE); 1023 ptek++; 1024#else 1025 /* get a kernel virtual address for the kstack for this thread */ 1026 ks = td->td_kstack; 1027 if (ks == 0) { 1028 ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE); 1029 if (ks == 0) 1030 panic("pmap_new_thread: kstack allocation failed"); 1031 td->td_kstack = ks; 1032 } 1033 ptek = vtopte(ks); 1034#endif 1035 for (i = 0; i < KSTACK_PAGES; i++) { 1036 /* 1037 * Get a kernel stack page 1038 */ 1039 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1040 1041 /* 1042 * Wire the page 1043 */ 1044 m->wire_count++; 1045 cnt.v_wire_count++; 1046 1047 oldpte = *(ptek + i); 1048 /* 1049 * Enter the page into the kernel address space. 1050 */ 1051 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 1052 if (oldpte) 1053 invlpg(ks + i * PAGE_SIZE); 1054 1055 vm_page_wakeup(m); 1056 vm_page_flag_clear(m, PG_ZERO); 1057 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1058 m->valid = VM_PAGE_BITS_ALL; 1059 } 1060} 1061 1062/* 1063 * Dispose the kernel stack for a thread that has exited. 1064 * This routine directly impacts the exit perf of a process and thread. 1065 */ 1066void 1067pmap_dispose_thread(td) 1068 struct thread *td; 1069{ 1070 int i; 1071 vm_object_t ksobj; 1072 vm_offset_t ks; 1073 vm_page_t m; 1074 pt_entry_t *ptek; 1075 1076 ksobj = td->td_kstack_obj; 1077 ks = td->td_kstack; 1078 ptek = vtopte(ks); 1079 for (i = 0; i < KSTACK_PAGES; i++) { 1080 m = vm_page_lookup(ksobj, i); 1081 if (m == NULL) 1082 panic("pmap_dispose_thread: kstack already missing?"); 1083 vm_page_busy(m); 1084 *(ptek + i) = 0; 1085 invlpg(ks + i * PAGE_SIZE); 1086 vm_page_unwire(m, 0); 1087 vm_page_free(m); 1088 } 1089} 1090 1091/* 1092 * Allow the Kernel stack for a thread to be prejudicially paged out. 1093 */ 1094void 1095pmap_swapout_thread(td) 1096 struct thread *td; 1097{ 1098 int i; 1099 vm_object_t ksobj; 1100 vm_offset_t ks; 1101 vm_page_t m; 1102 1103 ksobj = td->td_kstack_obj; 1104 ks = td->td_kstack; 1105 for (i = 0; i < KSTACK_PAGES; i++) { 1106 m = vm_page_lookup(ksobj, i); 1107 if (m == NULL) 1108 panic("pmap_swapout_thread: kstack already missing?"); 1109 vm_page_dirty(m); 1110 vm_page_unwire(m, 0); 1111 pmap_kremove(ks + i * PAGE_SIZE); 1112 } 1113} 1114 1115/* 1116 * Bring the kernel stack for a specified thread back in. 1117 */ 1118void 1119pmap_swapin_thread(td) 1120 struct thread *td; 1121{ 1122 int i, rv; 1123 vm_object_t ksobj; 1124 vm_offset_t ks; 1125 vm_page_t m; 1126 1127 ksobj = td->td_kstack_obj; 1128 ks = td->td_kstack; 1129 for (i = 0; i < KSTACK_PAGES; i++) { 1130 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1131 pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 1132 if (m->valid != VM_PAGE_BITS_ALL) { 1133 rv = vm_pager_get_pages(ksobj, &m, 1, 0); 1134 if (rv != VM_PAGER_OK) 1135 panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); 1136 m = vm_page_lookup(ksobj, i); 1137 m->valid = VM_PAGE_BITS_ALL; 1138 } 1139 vm_page_wire(m); 1140 vm_page_wakeup(m); 1141 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1142 } 1143} 1144 1145/*************************************************** 1146 * Page table page management routines..... 1147 ***************************************************/ 1148 1149/* 1150 * This routine unholds page table pages, and if the hold count 1151 * drops to zero, then it decrements the wire count. 1152 */ 1153static int 1154_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1155{ 1156 1157 while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) 1158 ; 1159 1160 if (m->hold_count == 0) { 1161 vm_offset_t pteva; 1162 /* 1163 * unmap the page table page 1164 */ 1165 pmap->pm_pdir[m->pindex] = 0; 1166 --pmap->pm_stats.resident_count; 1167 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1168 (PTDpde & PG_FRAME)) { 1169 /* 1170 * Do a invltlb to make the invalidated mapping 1171 * take effect immediately. 1172 */ 1173 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1174 pmap_invalidate_page(pmap, pteva); 1175 } 1176 1177 if (pmap->pm_ptphint == m) 1178 pmap->pm_ptphint = NULL; 1179 1180 /* 1181 * If the page is finally unwired, simply free it. 1182 */ 1183 --m->wire_count; 1184 if (m->wire_count == 0) { 1185 1186 vm_page_flash(m); 1187 vm_page_busy(m); 1188 vm_page_free_zero(m); 1189 --cnt.v_wire_count; 1190 } 1191 return 1; 1192 } 1193 return 0; 1194} 1195 1196static PMAP_INLINE int 1197pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1198{ 1199 vm_page_unhold(m); 1200 if (m->hold_count == 0) 1201 return _pmap_unwire_pte_hold(pmap, m); 1202 else 1203 return 0; 1204} 1205 1206/* 1207 * After removing a page table entry, this routine is used to 1208 * conditionally free the page, and manage the hold/wire counts. 1209 */ 1210static int 1211pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1212{ 1213 unsigned ptepindex; 1214 if (va >= VM_MAXUSER_ADDRESS) 1215 return 0; 1216 1217 if (mpte == NULL) { 1218 ptepindex = (va >> PDRSHIFT); 1219 if (pmap->pm_ptphint && 1220 (pmap->pm_ptphint->pindex == ptepindex)) { 1221 mpte = pmap->pm_ptphint; 1222 } else { 1223 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1224 pmap->pm_ptphint = mpte; 1225 } 1226 } 1227 1228 return pmap_unwire_pte_hold(pmap, mpte); 1229} 1230 1231void 1232pmap_pinit0(pmap) 1233 struct pmap *pmap; 1234{ 1235 pmap->pm_pdir = 1236 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1237 pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); 1238 pmap->pm_count = 1; 1239 pmap->pm_ptphint = NULL; 1240 pmap->pm_active = 0; 1241 TAILQ_INIT(&pmap->pm_pvlist); 1242 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1243 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1244} 1245 1246/* 1247 * Initialize a preallocated and zeroed pmap structure, 1248 * such as one in a vmspace structure. 1249 */ 1250void 1251pmap_pinit(pmap) 1252 register struct pmap *pmap; 1253{ 1254 vm_page_t ptdpg; 1255 1256 /* 1257 * No need to allocate page table space yet but we do need a valid 1258 * page directory table. 1259 */ 1260 if (pmap->pm_pdir == NULL) 1261 pmap->pm_pdir = 1262 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1263 1264 /* 1265 * allocate object for the ptes 1266 */ 1267 if (pmap->pm_pteobj == NULL) 1268 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); 1269 1270 /* 1271 * allocate the page directory page 1272 */ 1273 ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI, 1274 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1275 1276 ptdpg->wire_count = 1; 1277 ++cnt.v_wire_count; 1278 1279 1280 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ 1281 ptdpg->valid = VM_PAGE_BITS_ALL; 1282 1283 pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); 1284 if ((ptdpg->flags & PG_ZERO) == 0) 1285 bzero(pmap->pm_pdir, PAGE_SIZE); 1286 1287 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1288 /* Wire in kernel global address entries. */ 1289 /* XXX copies current process, does not fill in MPPTDI */ 1290 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 1291#ifdef SMP 1292 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 1293#endif 1294 1295 /* install self-referential address mapping entry */ 1296 pmap->pm_pdir[PTDPTDI] = 1297 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; 1298 1299 pmap->pm_count = 1; 1300 pmap->pm_active = 0; 1301 pmap->pm_ptphint = NULL; 1302 TAILQ_INIT(&pmap->pm_pvlist); 1303 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1304} 1305 1306/* 1307 * Wire in kernel global address entries. To avoid a race condition 1308 * between pmap initialization and pmap_growkernel, this procedure 1309 * should be called after the vmspace is attached to the process 1310 * but before this pmap is activated. 1311 */ 1312void 1313pmap_pinit2(pmap) 1314 struct pmap *pmap; 1315{ 1316 /* XXX: Remove this stub when no longer called */ 1317} 1318 1319static int 1320pmap_release_free_page(pmap_t pmap, vm_page_t p) 1321{ 1322 pd_entry_t *pde = pmap->pm_pdir; 1323 /* 1324 * This code optimizes the case of freeing non-busy 1325 * page-table pages. Those pages are zero now, and 1326 * might as well be placed directly into the zero queue. 1327 */ 1328 if (vm_page_sleep_busy(p, FALSE, "pmaprl")) 1329 return 0; 1330 1331 vm_page_busy(p); 1332 1333 /* 1334 * Remove the page table page from the processes address space. 1335 */ 1336 pde[p->pindex] = 0; 1337 pmap->pm_stats.resident_count--; 1338 1339 if (p->hold_count) { 1340 panic("pmap_release: freeing held page table page"); 1341 } 1342 /* 1343 * Page directory pages need to have the kernel 1344 * stuff cleared, so they can go into the zero queue also. 1345 */ 1346 if (p->pindex == PTDPTDI) { 1347 bzero(pde + KPTDI, nkpt * PTESIZE); 1348#ifdef SMP 1349 pde[MPPTDI] = 0; 1350#endif 1351 pde[APTDPTDI] = 0; 1352 pmap_kremove((vm_offset_t) pmap->pm_pdir); 1353 } 1354 1355 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1356 pmap->pm_ptphint = NULL; 1357 1358 p->wire_count--; 1359 cnt.v_wire_count--; 1360 vm_page_free_zero(p); 1361 return 1; 1362} 1363 1364/* 1365 * this routine is called if the page table page is not 1366 * mapped correctly. 1367 */ 1368static vm_page_t 1369_pmap_allocpte(pmap, ptepindex) 1370 pmap_t pmap; 1371 unsigned ptepindex; 1372{ 1373 vm_offset_t pteva, ptepa; /* XXXPA */ 1374 vm_page_t m; 1375 1376 /* 1377 * Find or fabricate a new pagetable page 1378 */ 1379 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1380 VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1381 1382 KASSERT(m->queue == PQ_NONE, 1383 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1384 1385 if (m->wire_count == 0) 1386 cnt.v_wire_count++; 1387 m->wire_count++; 1388 1389 /* 1390 * Increment the hold count for the page table page 1391 * (denoting a new mapping.) 1392 */ 1393 m->hold_count++; 1394 1395 /* 1396 * Map the pagetable page into the process address space, if 1397 * it isn't already there. 1398 */ 1399 1400 pmap->pm_stats.resident_count++; 1401 1402 ptepa = VM_PAGE_TO_PHYS(m); 1403 pmap->pm_pdir[ptepindex] = 1404 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1405 1406 /* 1407 * Set the page table hint 1408 */ 1409 pmap->pm_ptphint = m; 1410 1411 /* 1412 * Try to use the new mapping, but if we cannot, then 1413 * do it with the routine that maps the page explicitly. 1414 */ 1415 if ((m->flags & PG_ZERO) == 0) { 1416 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1417 (PTDpde & PG_FRAME)) { 1418 pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); 1419 bzero((caddr_t) pteva, PAGE_SIZE); 1420 } else { 1421 pmap_zero_page(ptepa); 1422 } 1423 } 1424 1425 m->valid = VM_PAGE_BITS_ALL; 1426 vm_page_flag_clear(m, PG_ZERO); 1427 vm_page_flag_set(m, PG_MAPPED); 1428 vm_page_wakeup(m); 1429 1430 return m; 1431} 1432 1433static vm_page_t 1434pmap_allocpte(pmap_t pmap, vm_offset_t va) 1435{ 1436 unsigned ptepindex; 1437 pd_entry_t ptepa; 1438 vm_page_t m; 1439 1440 /* 1441 * Calculate pagetable page index 1442 */ 1443 ptepindex = va >> PDRSHIFT; 1444 1445 /* 1446 * Get the page directory entry 1447 */ 1448 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; 1449 1450 /* 1451 * This supports switching from a 4MB page to a 1452 * normal 4K page. 1453 */ 1454 if (ptepa & PG_PS) { 1455 pmap->pm_pdir[ptepindex] = 0; 1456 ptepa = 0; 1457 invltlb(); 1458 } 1459 1460 /* 1461 * If the page table page is mapped, we just increment the 1462 * hold count, and activate it. 1463 */ 1464 if (ptepa) { 1465 /* 1466 * In order to get the page table page, try the 1467 * hint first. 1468 */ 1469 if (pmap->pm_ptphint && 1470 (pmap->pm_ptphint->pindex == ptepindex)) { 1471 m = pmap->pm_ptphint; 1472 } else { 1473 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1474 pmap->pm_ptphint = m; 1475 } 1476 m->hold_count++; 1477 return m; 1478 } 1479 /* 1480 * Here if the pte page isn't mapped, or if it has been deallocated. 1481 */ 1482 return _pmap_allocpte(pmap, ptepindex); 1483} 1484 1485 1486/*************************************************** 1487* Pmap allocation/deallocation routines. 1488 ***************************************************/ 1489 1490/* 1491 * Release any resources held by the given physical map. 1492 * Called when a pmap initialized by pmap_pinit is being released. 1493 * Should only be called if the map contains no valid mappings. 1494 */ 1495void 1496pmap_release(pmap_t pmap) 1497{ 1498 vm_page_t p,n,ptdpg; 1499 vm_object_t object = pmap->pm_pteobj; 1500 int curgeneration; 1501 1502#if defined(DIAGNOSTIC) 1503 if (object->ref_count != 1) 1504 panic("pmap_release: pteobj reference count != 1"); 1505#endif 1506 1507 ptdpg = NULL; 1508 LIST_REMOVE(pmap, pm_list); 1509retry: 1510 curgeneration = object->generation; 1511 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { 1512 n = TAILQ_NEXT(p, listq); 1513 if (p->pindex == PTDPTDI) { 1514 ptdpg = p; 1515 continue; 1516 } 1517 while (1) { 1518 if (!pmap_release_free_page(pmap, p) && 1519 (object->generation != curgeneration)) 1520 goto retry; 1521 } 1522 } 1523 1524 if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) 1525 goto retry; 1526} 1527 1528static int 1529kvm_size(SYSCTL_HANDLER_ARGS) 1530{ 1531 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1532 1533 return sysctl_handle_long(oidp, &ksize, 0, req); 1534} 1535SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1536 0, 0, kvm_size, "IU", "Size of KVM"); 1537 1538static int 1539kvm_free(SYSCTL_HANDLER_ARGS) 1540{ 1541 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1542 1543 return sysctl_handle_long(oidp, &kfree, 0, req); 1544} 1545SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1546 0, 0, kvm_free, "IU", "Amount of KVM free"); 1547 1548/* 1549 * grow the number of kernel page table entries, if needed 1550 */ 1551void 1552pmap_growkernel(vm_offset_t addr) 1553{ 1554 struct pmap *pmap; 1555 int s; 1556 vm_offset_t ptppaddr; 1557 vm_page_t nkpg; 1558 pd_entry_t newpdir; 1559 1560 s = splhigh(); 1561 if (kernel_vm_end == 0) { 1562 kernel_vm_end = KERNBASE; 1563 nkpt = 0; 1564 while (pdir_pde(PTD, kernel_vm_end)) { 1565 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1566 nkpt++; 1567 } 1568 } 1569 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1570 while (kernel_vm_end < addr) { 1571 if (pdir_pde(PTD, kernel_vm_end)) { 1572 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1573 continue; 1574 } 1575 1576 /* 1577 * This index is bogus, but out of the way 1578 */ 1579 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM); 1580 if (!nkpg) 1581 panic("pmap_growkernel: no memory to grow kernel"); 1582 1583 nkpt++; 1584 1585 vm_page_wire(nkpg); 1586 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1587 pmap_zero_page(ptppaddr); 1588 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1589 pdir_pde(PTD, kernel_vm_end) = newpdir; 1590 1591 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1592 *pmap_pde(pmap, kernel_vm_end) = newpdir; 1593 } 1594 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1595 } 1596 splx(s); 1597} 1598 1599/* 1600 * Retire the given physical map from service. 1601 * Should only be called if the map contains 1602 * no valid mappings. 1603 */ 1604void 1605pmap_destroy(pmap_t pmap) 1606{ 1607 int count; 1608 1609 if (pmap == NULL) 1610 return; 1611 1612 count = --pmap->pm_count; 1613 if (count == 0) { 1614 pmap_release(pmap); 1615 panic("destroying a pmap is not yet implemented"); 1616 } 1617} 1618 1619/* 1620 * Add a reference to the specified pmap. 1621 */ 1622void 1623pmap_reference(pmap_t pmap) 1624{ 1625 if (pmap != NULL) { 1626 pmap->pm_count++; 1627 } 1628} 1629 1630/*************************************************** 1631* page management routines. 1632 ***************************************************/ 1633 1634/* 1635 * free the pv_entry back to the free list 1636 */ 1637static PMAP_INLINE void 1638free_pv_entry(pv_entry_t pv) 1639{ 1640 pv_entry_count--; 1641 zfree(pvzone, pv); 1642} 1643 1644/* 1645 * get a new pv_entry, allocating a block from the system 1646 * when needed. 1647 * the memory allocation is performed bypassing the malloc code 1648 * because of the possibility of allocations at interrupt time. 1649 */ 1650static pv_entry_t 1651get_pv_entry(void) 1652{ 1653 pv_entry_count++; 1654 if (pv_entry_high_water && 1655 (pv_entry_count > pv_entry_high_water) && 1656 (pmap_pagedaemon_waken == 0)) { 1657 pmap_pagedaemon_waken = 1; 1658 wakeup (&vm_pages_needed); 1659 } 1660 return zalloc(pvzone); 1661} 1662 1663/* 1664 * This routine is very drastic, but can save the system 1665 * in a pinch. 1666 */ 1667void 1668pmap_collect() 1669{ 1670 int i; 1671 vm_page_t m; 1672 static int warningdone = 0; 1673 1674 if (pmap_pagedaemon_waken == 0) 1675 return; 1676 1677 if (warningdone < 5) { 1678 printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); 1679 warningdone++; 1680 } 1681 1682 for(i = 0; i < vm_page_array_size; i++) { 1683 m = &vm_page_array[i]; 1684 if (m->wire_count || m->hold_count || m->busy || 1685 (m->flags & (PG_BUSY | PG_UNMANAGED))) 1686 continue; 1687 pmap_remove_all(m); 1688 } 1689 pmap_pagedaemon_waken = 0; 1690} 1691 1692 1693/* 1694 * If it is the first entry on the list, it is actually 1695 * in the header and we must copy the following entry up 1696 * to the header. Otherwise we must search the list for 1697 * the entry. In either case we free the now unused entry. 1698 */ 1699 1700static int 1701pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1702{ 1703 pv_entry_t pv; 1704 int rtval; 1705 int s; 1706 1707 s = splvm(); 1708 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1709 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1710 if (pmap == pv->pv_pmap && va == pv->pv_va) 1711 break; 1712 } 1713 } else { 1714 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1715 if (va == pv->pv_va) 1716 break; 1717 } 1718 } 1719 1720 rtval = 0; 1721 if (pv) { 1722 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1723 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1724 m->md.pv_list_count--; 1725 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1726 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1727 1728 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1729 free_pv_entry(pv); 1730 } 1731 1732 splx(s); 1733 return rtval; 1734} 1735 1736/* 1737 * Create a pv entry for page at pa for 1738 * (pmap, va). 1739 */ 1740static void 1741pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1742{ 1743 1744 int s; 1745 pv_entry_t pv; 1746 1747 s = splvm(); 1748 pv = get_pv_entry(); 1749 pv->pv_va = va; 1750 pv->pv_pmap = pmap; 1751 pv->pv_ptem = mpte; 1752 1753 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1754 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1755 m->md.pv_list_count++; 1756 1757 splx(s); 1758} 1759 1760/* 1761 * pmap_remove_pte: do the things to unmap a page in a process 1762 */ 1763static int 1764pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1765{ 1766 pt_entry_t oldpte; 1767 vm_page_t m; 1768 1769 oldpte = atomic_readandclear_int(ptq); 1770 if (oldpte & PG_W) 1771 pmap->pm_stats.wired_count -= 1; 1772 /* 1773 * Machines that don't support invlpg, also don't support 1774 * PG_G. 1775 */ 1776 if (oldpte & PG_G) 1777 invlpg(va); 1778 pmap->pm_stats.resident_count -= 1; 1779 if (oldpte & PG_MANAGED) { 1780 m = PHYS_TO_VM_PAGE(oldpte); 1781 if (oldpte & PG_M) { 1782#if defined(PMAP_DIAGNOSTIC) 1783 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1784 printf( 1785 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1786 va, oldpte); 1787 } 1788#endif 1789 if (pmap_track_modified(va)) 1790 vm_page_dirty(m); 1791 } 1792 if (oldpte & PG_A) 1793 vm_page_flag_set(m, PG_REFERENCED); 1794 return pmap_remove_entry(pmap, m, va); 1795 } else { 1796 return pmap_unuse_pt(pmap, va, NULL); 1797 } 1798 1799 return 0; 1800} 1801 1802/* 1803 * Remove a single page from a process address space 1804 */ 1805static void 1806pmap_remove_page(pmap_t pmap, vm_offset_t va) 1807{ 1808 register pt_entry_t *ptq; 1809 1810 /* 1811 * if there is no pte for this address, just skip it!!! 1812 */ 1813 if (*pmap_pde(pmap, va) == 0) { 1814 return; 1815 } 1816 1817 /* 1818 * get a local va for mappings for this pmap. 1819 */ 1820 ptq = get_ptbase(pmap) + i386_btop(va); 1821 if (*ptq) { 1822 (void) pmap_remove_pte(pmap, ptq, va); 1823 pmap_invalidate_page(pmap, va); 1824 } 1825 return; 1826} 1827 1828/* 1829 * Remove the given range of addresses from the specified map. 1830 * 1831 * It is assumed that the start and end are properly 1832 * rounded to the page size. 1833 */ 1834void 1835pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1836{ 1837 register pt_entry_t *ptbase; 1838 vm_offset_t pdnxt; 1839 pd_entry_t ptpaddr; 1840 vm_offset_t sindex, eindex; 1841 int anyvalid; 1842 1843 if (pmap == NULL) 1844 return; 1845 1846 if (pmap->pm_stats.resident_count == 0) 1847 return; 1848 1849 /* 1850 * special handling of removing one page. a very 1851 * common operation and easy to short circuit some 1852 * code. 1853 */ 1854 if ((sva + PAGE_SIZE == eva) && 1855 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1856 pmap_remove_page(pmap, sva); 1857 return; 1858 } 1859 1860 anyvalid = 0; 1861 1862 /* 1863 * Get a local virtual address for the mappings that are being 1864 * worked with. 1865 */ 1866 ptbase = get_ptbase(pmap); 1867 1868 sindex = i386_btop(sva); 1869 eindex = i386_btop(eva); 1870 1871 for (; sindex < eindex; sindex = pdnxt) { 1872 unsigned pdirindex; 1873 1874 /* 1875 * Calculate index for next page table. 1876 */ 1877 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 1878 if (pmap->pm_stats.resident_count == 0) 1879 break; 1880 1881 pdirindex = sindex / NPDEPG; 1882 ptpaddr = pmap->pm_pdir[pdirindex]; 1883 if ((ptpaddr & PG_PS) != 0) { 1884 pmap->pm_pdir[pdirindex] = 0; 1885 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1886 anyvalid++; 1887 continue; 1888 } 1889 1890 /* 1891 * Weed out invalid mappings. Note: we assume that the page 1892 * directory table is always allocated, and in kernel virtual. 1893 */ 1894 if (ptpaddr == 0) 1895 continue; 1896 1897 /* 1898 * Limit our scan to either the end of the va represented 1899 * by the current page table page, or to the end of the 1900 * range being removed. 1901 */ 1902 if (pdnxt > eindex) { 1903 pdnxt = eindex; 1904 } 1905 1906 for (; sindex != pdnxt; sindex++) { 1907 vm_offset_t va; 1908 if (ptbase[sindex] == 0) { 1909 continue; 1910 } 1911 va = i386_ptob(sindex); 1912 1913 anyvalid++; 1914 if (pmap_remove_pte(pmap, 1915 ptbase + sindex, va)) 1916 break; 1917 } 1918 } 1919 1920 if (anyvalid) 1921 pmap_invalidate_all(pmap); 1922} 1923 1924/* 1925 * Routine: pmap_remove_all 1926 * Function: 1927 * Removes this physical page from 1928 * all physical maps in which it resides. 1929 * Reflects back modify bits to the pager. 1930 * 1931 * Notes: 1932 * Original versions of this routine were very 1933 * inefficient because they iteratively called 1934 * pmap_remove (slow...) 1935 */ 1936 1937static void 1938pmap_remove_all(vm_page_t m) 1939{ 1940 register pv_entry_t pv; 1941 pt_entry_t *pte, tpte; 1942 int s; 1943 1944#if defined(PMAP_DIAGNOSTIC) 1945 /* 1946 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1947 * pages! 1948 */ 1949 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1950 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); 1951 } 1952#endif 1953 1954 s = splvm(); 1955 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1956 pv->pv_pmap->pm_stats.resident_count--; 1957 1958 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 1959 1960 tpte = atomic_readandclear_int(pte); 1961 if (tpte & PG_W) 1962 pv->pv_pmap->pm_stats.wired_count--; 1963 1964 if (tpte & PG_A) 1965 vm_page_flag_set(m, PG_REFERENCED); 1966 1967 /* 1968 * Update the vm_page_t clean and reference bits. 1969 */ 1970 if (tpte & PG_M) { 1971#if defined(PMAP_DIAGNOSTIC) 1972 if (pmap_nw_modified((pt_entry_t) tpte)) { 1973 printf( 1974 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1975 pv->pv_va, tpte); 1976 } 1977#endif 1978 if (pmap_track_modified(pv->pv_va)) 1979 vm_page_dirty(m); 1980 } 1981 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1982 1983 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1984 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1985 m->md.pv_list_count--; 1986 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1987 free_pv_entry(pv); 1988 } 1989 1990 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1991 1992 splx(s); 1993} 1994 1995/* 1996 * Set the physical protection on the 1997 * specified range of this map as requested. 1998 */ 1999void 2000pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2001{ 2002 register pt_entry_t *ptbase; 2003 vm_offset_t pdnxt; 2004 pd_entry_t ptpaddr; 2005 vm_pindex_t sindex, eindex; 2006 int anychanged; 2007 2008 if (pmap == NULL) 2009 return; 2010 2011 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2012 pmap_remove(pmap, sva, eva); 2013 return; 2014 } 2015 2016 if (prot & VM_PROT_WRITE) 2017 return; 2018 2019 anychanged = 0; 2020 2021 ptbase = get_ptbase(pmap); 2022 2023 sindex = i386_btop(sva); 2024 eindex = i386_btop(eva); 2025 2026 for (; sindex < eindex; sindex = pdnxt) { 2027 2028 unsigned pdirindex; 2029 2030 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 2031 2032 pdirindex = sindex / NPDEPG; 2033 ptpaddr = pmap->pm_pdir[pdirindex]; 2034 if ((ptpaddr & PG_PS) != 0) { 2035 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2036 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2037 anychanged++; 2038 continue; 2039 } 2040 2041 /* 2042 * Weed out invalid mappings. Note: we assume that the page 2043 * directory table is always allocated, and in kernel virtual. 2044 */ 2045 if (ptpaddr == 0) 2046 continue; 2047 2048 if (pdnxt > eindex) { 2049 pdnxt = eindex; 2050 } 2051 2052 for (; sindex != pdnxt; sindex++) { 2053 2054 pt_entry_t pbits; 2055 vm_page_t m; 2056 2057 pbits = ptbase[sindex]; 2058 2059 if (pbits & PG_MANAGED) { 2060 m = NULL; 2061 if (pbits & PG_A) { 2062 m = PHYS_TO_VM_PAGE(pbits); 2063 vm_page_flag_set(m, PG_REFERENCED); 2064 pbits &= ~PG_A; 2065 } 2066 if (pbits & PG_M) { 2067 if (pmap_track_modified(i386_ptob(sindex))) { 2068 if (m == NULL) 2069 m = PHYS_TO_VM_PAGE(pbits); 2070 vm_page_dirty(m); 2071 pbits &= ~PG_M; 2072 } 2073 } 2074 } 2075 2076 pbits &= ~PG_RW; 2077 2078 if (pbits != ptbase[sindex]) { 2079 ptbase[sindex] = pbits; 2080 anychanged = 1; 2081 } 2082 } 2083 } 2084 if (anychanged) 2085 pmap_invalidate_all(pmap); 2086} 2087 2088/* 2089 * Insert the given physical page (p) at 2090 * the specified virtual address (v) in the 2091 * target physical map with the protection requested. 2092 * 2093 * If specified, the page will be wired down, meaning 2094 * that the related pte can not be reclaimed. 2095 * 2096 * NB: This is the only routine which MAY NOT lazy-evaluate 2097 * or lose information. That is, this routine must actually 2098 * insert this page into the given map NOW. 2099 */ 2100void 2101pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2102 boolean_t wired) 2103{ 2104 vm_offset_t pa; 2105 register pt_entry_t *pte; 2106 vm_offset_t opa; 2107 pt_entry_t origpte, newpte; 2108 vm_page_t mpte; 2109 2110 if (pmap == NULL) 2111 return; 2112 2113 va &= PG_FRAME; 2114#ifdef PMAP_DIAGNOSTIC 2115 if (va > VM_MAX_KERNEL_ADDRESS) 2116 panic("pmap_enter: toobig"); 2117 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2118 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2119#endif 2120 2121 mpte = NULL; 2122 /* 2123 * In the case that a page table page is not 2124 * resident, we are creating it here. 2125 */ 2126 if (va < VM_MAXUSER_ADDRESS) { 2127 mpte = pmap_allocpte(pmap, va); 2128 } 2129#if 0 && defined(PMAP_DIAGNOSTIC) 2130 else { 2131 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2132 origpte = *pdeaddr; 2133 if ((origpte & PG_V) == 0) { 2134 panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 2135 pmap->pm_pdir[PTDPTDI], origpte, va); 2136 } 2137 } 2138#endif 2139 2140 pte = pmap_pte(pmap, va); 2141 2142 /* 2143 * Page Directory table entry not valid, we need a new PT page 2144 */ 2145 if (pte == NULL) { 2146 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 2147 (void *)pmap->pm_pdir[PTDPTDI], va); 2148 } 2149 2150 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 2151 origpte = *(vm_offset_t *)pte; 2152 opa = origpte & PG_FRAME; 2153 2154 if (origpte & PG_PS) 2155 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2156 2157 /* 2158 * Mapping has not changed, must be protection or wiring change. 2159 */ 2160 if (origpte && (opa == pa)) { 2161 /* 2162 * Wiring change, just update stats. We don't worry about 2163 * wiring PT pages as they remain resident as long as there 2164 * are valid mappings in them. Hence, if a user page is wired, 2165 * the PT page will be also. 2166 */ 2167 if (wired && ((origpte & PG_W) == 0)) 2168 pmap->pm_stats.wired_count++; 2169 else if (!wired && (origpte & PG_W)) 2170 pmap->pm_stats.wired_count--; 2171 2172#if defined(PMAP_DIAGNOSTIC) 2173 if (pmap_nw_modified((pt_entry_t) origpte)) { 2174 printf( 2175 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 2176 va, origpte); 2177 } 2178#endif 2179 2180 /* 2181 * Remove extra pte reference 2182 */ 2183 if (mpte) 2184 mpte->hold_count--; 2185 2186 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 2187 if ((origpte & PG_RW) == 0) { 2188 *pte |= PG_RW; 2189 pmap_invalidate_page(pmap, va); 2190 } 2191 return; 2192 } 2193 2194 /* 2195 * We might be turning off write access to the page, 2196 * so we go ahead and sense modify status. 2197 */ 2198 if (origpte & PG_MANAGED) { 2199 if ((origpte & PG_M) && pmap_track_modified(va)) { 2200 vm_page_t om; 2201 om = PHYS_TO_VM_PAGE(opa); 2202 vm_page_dirty(om); 2203 } 2204 pa |= PG_MANAGED; 2205 } 2206 goto validate; 2207 } 2208 /* 2209 * Mapping has changed, invalidate old range and fall through to 2210 * handle validating new mapping. 2211 */ 2212 if (opa) { 2213 int err; 2214 err = pmap_remove_pte(pmap, pte, va); 2215 if (err) 2216 panic("pmap_enter: pte vanished, va: 0x%x", va); 2217 } 2218 2219 /* 2220 * Enter on the PV list if part of our managed memory. Note that we 2221 * raise IPL while manipulating pv_table since pmap_enter can be 2222 * called at interrupt time. 2223 */ 2224 if (pmap_initialized && 2225 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2226 pmap_insert_entry(pmap, va, mpte, m); 2227 pa |= PG_MANAGED; 2228 } 2229 2230 /* 2231 * Increment counters 2232 */ 2233 pmap->pm_stats.resident_count++; 2234 if (wired) 2235 pmap->pm_stats.wired_count++; 2236 2237validate: 2238 /* 2239 * Now validate mapping with desired protection/wiring. 2240 */ 2241 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); 2242 2243 if (wired) 2244 newpte |= PG_W; 2245 if (va < VM_MAXUSER_ADDRESS) 2246 newpte |= PG_U; 2247 if (pmap == kernel_pmap) 2248 newpte |= pgeflag; 2249 2250 /* 2251 * if the mapping or permission bits are different, we need 2252 * to update the pte. 2253 */ 2254 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2255 *pte = newpte | PG_A; 2256 /*if (origpte)*/ { 2257 pmap_invalidate_page(pmap, va); 2258 } 2259 } 2260} 2261 2262/* 2263 * this code makes some *MAJOR* assumptions: 2264 * 1. Current pmap & pmap exists. 2265 * 2. Not wired. 2266 * 3. Read access. 2267 * 4. No page table pages. 2268 * 5. Tlbflush is deferred to calling procedure. 2269 * 6. Page IS managed. 2270 * but is *MUCH* faster than pmap_enter... 2271 */ 2272 2273static vm_page_t 2274pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2275{ 2276 pt_entry_t *pte; 2277 vm_offset_t pa; 2278 2279 /* 2280 * In the case that a page table page is not 2281 * resident, we are creating it here. 2282 */ 2283 if (va < VM_MAXUSER_ADDRESS) { 2284 unsigned ptepindex; 2285 pd_entry_t ptepa; 2286 2287 /* 2288 * Calculate pagetable page index 2289 */ 2290 ptepindex = va >> PDRSHIFT; 2291 if (mpte && (mpte->pindex == ptepindex)) { 2292 mpte->hold_count++; 2293 } else { 2294retry: 2295 /* 2296 * Get the page directory entry 2297 */ 2298 ptepa = pmap->pm_pdir[ptepindex]; 2299 2300 /* 2301 * If the page table page is mapped, we just increment 2302 * the hold count, and activate it. 2303 */ 2304 if (ptepa) { 2305 if (ptepa & PG_PS) 2306 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2307 if (pmap->pm_ptphint && 2308 (pmap->pm_ptphint->pindex == ptepindex)) { 2309 mpte = pmap->pm_ptphint; 2310 } else { 2311 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 2312 pmap->pm_ptphint = mpte; 2313 } 2314 if (mpte == NULL) 2315 goto retry; 2316 mpte->hold_count++; 2317 } else { 2318 mpte = _pmap_allocpte(pmap, ptepindex); 2319 } 2320 } 2321 } else { 2322 mpte = NULL; 2323 } 2324 2325 /* 2326 * This call to vtopte makes the assumption that we are 2327 * entering the page into the current pmap. In order to support 2328 * quick entry into any pmap, one would likely use pmap_pte_quick. 2329 * But that isn't as quick as vtopte. 2330 */ 2331 pte = vtopte(va); 2332 if (*pte) { 2333 if (mpte) 2334 pmap_unwire_pte_hold(pmap, mpte); 2335 return 0; 2336 } 2337 2338 /* 2339 * Enter on the PV list if part of our managed memory. Note that we 2340 * raise IPL while manipulating pv_table since pmap_enter can be 2341 * called at interrupt time. 2342 */ 2343 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2344 pmap_insert_entry(pmap, va, mpte, m); 2345 2346 /* 2347 * Increment counters 2348 */ 2349 pmap->pm_stats.resident_count++; 2350 2351 pa = VM_PAGE_TO_PHYS(m); 2352 2353 /* 2354 * Now validate mapping with RO protection 2355 */ 2356 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2357 *pte = pa | PG_V | PG_U; 2358 else 2359 *pte = pa | PG_V | PG_U | PG_MANAGED; 2360 2361 return mpte; 2362} 2363 2364/* 2365 * Make a temporary mapping for a physical address. This is only intended 2366 * to be used for panic dumps. 2367 */ 2368void * 2369pmap_kenter_temporary(vm_offset_t pa, int i) 2370{ 2371 pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); 2372 return ((void *)crashdumpmap); 2373} 2374 2375#define MAX_INIT_PT (96) 2376/* 2377 * pmap_object_init_pt preloads the ptes for a given object 2378 * into the specified pmap. This eliminates the blast of soft 2379 * faults on process startup and immediately after an mmap. 2380 */ 2381void 2382pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2383 vm_object_t object, vm_pindex_t pindex, 2384 vm_size_t size, int limit) 2385{ 2386 vm_offset_t tmpidx; 2387 int psize; 2388 vm_page_t p, mpte; 2389 int objpgs; 2390 2391 if (pmap == NULL || object == NULL) 2392 return; 2393 2394 /* 2395 * This code maps large physical mmap regions into the 2396 * processor address space. Note that some shortcuts 2397 * are taken, but the code works. 2398 */ 2399 if (pseflag && (object->type == OBJT_DEVICE) && 2400 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2401 int i; 2402 vm_page_t m[1]; 2403 unsigned int ptepindex; 2404 int npdes; 2405 pd_entry_t ptepa; 2406 2407 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2408 return; 2409 2410retry: 2411 p = vm_page_lookup(object, pindex); 2412 if (p && vm_page_sleep_busy(p, FALSE, "init4p")) 2413 goto retry; 2414 2415 if (p == NULL) { 2416 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2417 if (p == NULL) 2418 return; 2419 m[0] = p; 2420 2421 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2422 vm_page_free(p); 2423 return; 2424 } 2425 2426 p = vm_page_lookup(object, pindex); 2427 vm_page_wakeup(p); 2428 } 2429 2430 ptepa = VM_PAGE_TO_PHYS(p); 2431 if (ptepa & (NBPDR - 1)) { 2432 return; 2433 } 2434 2435 p->valid = VM_PAGE_BITS_ALL; 2436 2437 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2438 npdes = size >> PDRSHIFT; 2439 for(i = 0; i < npdes; i++) { 2440 pmap->pm_pdir[ptepindex] = 2441 ptepa | PG_U | PG_RW | PG_V | PG_PS; 2442 ptepa += NBPDR; 2443 ptepindex += 1; 2444 } 2445 vm_page_flag_set(p, PG_MAPPED); 2446 invltlb(); 2447 return; 2448 } 2449 2450 psize = i386_btop(size); 2451 2452 if ((object->type != OBJT_VNODE) || 2453 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2454 (object->resident_page_count > MAX_INIT_PT))) { 2455 return; 2456 } 2457 2458 if (psize + pindex > object->size) { 2459 if (object->size < pindex) 2460 return; 2461 psize = object->size - pindex; 2462 } 2463 2464 mpte = NULL; 2465 /* 2466 * if we are processing a major portion of the object, then scan the 2467 * entire thing. 2468 */ 2469 if (psize > (object->resident_page_count >> 2)) { 2470 objpgs = psize; 2471 2472 for (p = TAILQ_FIRST(&object->memq); 2473 ((objpgs > 0) && (p != NULL)); 2474 p = TAILQ_NEXT(p, listq)) { 2475 2476 tmpidx = p->pindex; 2477 if (tmpidx < pindex) { 2478 continue; 2479 } 2480 tmpidx -= pindex; 2481 if (tmpidx >= psize) { 2482 continue; 2483 } 2484 /* 2485 * don't allow an madvise to blow away our really 2486 * free pages allocating pv entries. 2487 */ 2488 if ((limit & MAP_PREFAULT_MADVISE) && 2489 cnt.v_free_count < cnt.v_free_reserved) { 2490 break; 2491 } 2492 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2493 (p->busy == 0) && 2494 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2495 if ((p->queue - p->pc) == PQ_CACHE) 2496 vm_page_deactivate(p); 2497 vm_page_busy(p); 2498 mpte = pmap_enter_quick(pmap, 2499 addr + i386_ptob(tmpidx), p, mpte); 2500 vm_page_flag_set(p, PG_MAPPED); 2501 vm_page_wakeup(p); 2502 } 2503 objpgs -= 1; 2504 } 2505 } else { 2506 /* 2507 * else lookup the pages one-by-one. 2508 */ 2509 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 2510 /* 2511 * don't allow an madvise to blow away our really 2512 * free pages allocating pv entries. 2513 */ 2514 if ((limit & MAP_PREFAULT_MADVISE) && 2515 cnt.v_free_count < cnt.v_free_reserved) { 2516 break; 2517 } 2518 p = vm_page_lookup(object, tmpidx + pindex); 2519 if (p && 2520 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2521 (p->busy == 0) && 2522 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2523 if ((p->queue - p->pc) == PQ_CACHE) 2524 vm_page_deactivate(p); 2525 vm_page_busy(p); 2526 mpte = pmap_enter_quick(pmap, 2527 addr + i386_ptob(tmpidx), p, mpte); 2528 vm_page_flag_set(p, PG_MAPPED); 2529 vm_page_wakeup(p); 2530 } 2531 } 2532 } 2533 return; 2534} 2535 2536/* 2537 * pmap_prefault provides a quick way of clustering 2538 * pagefaults into a processes address space. It is a "cousin" 2539 * of pmap_object_init_pt, except it runs at page fault time instead 2540 * of mmap time. 2541 */ 2542#define PFBAK 4 2543#define PFFOR 4 2544#define PAGEORDER_SIZE (PFBAK+PFFOR) 2545 2546static int pmap_prefault_pageorder[] = { 2547 -PAGE_SIZE, PAGE_SIZE, 2548 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2549 -3 * PAGE_SIZE, 3 * PAGE_SIZE 2550 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2551}; 2552 2553void 2554pmap_prefault(pmap, addra, entry) 2555 pmap_t pmap; 2556 vm_offset_t addra; 2557 vm_map_entry_t entry; 2558{ 2559 int i; 2560 vm_offset_t starta; 2561 vm_offset_t addr; 2562 vm_pindex_t pindex; 2563 vm_page_t m, mpte; 2564 vm_object_t object; 2565 2566 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2567 return; 2568 2569 object = entry->object.vm_object; 2570 2571 starta = addra - PFBAK * PAGE_SIZE; 2572 if (starta < entry->start) { 2573 starta = entry->start; 2574 } else if (starta > addra) { 2575 starta = 0; 2576 } 2577 2578 mpte = NULL; 2579 for (i = 0; i < PAGEORDER_SIZE; i++) { 2580 vm_object_t lobject; 2581 pt_entry_t *pte; 2582 2583 addr = addra + pmap_prefault_pageorder[i]; 2584 if (addr > addra + (PFFOR * PAGE_SIZE)) 2585 addr = 0; 2586 2587 if (addr < starta || addr >= entry->end) 2588 continue; 2589 2590 if ((*pmap_pde(pmap, addr)) == NULL) 2591 continue; 2592 2593 pte = vtopte(addr); 2594 if (*pte) 2595 continue; 2596 2597 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2598 lobject = object; 2599 for (m = vm_page_lookup(lobject, pindex); 2600 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2601 lobject = lobject->backing_object) { 2602 if (lobject->backing_object_offset & PAGE_MASK) 2603 break; 2604 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2605 m = vm_page_lookup(lobject->backing_object, pindex); 2606 } 2607 2608 /* 2609 * give-up when a page is not in memory 2610 */ 2611 if (m == NULL) 2612 break; 2613 2614 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2615 (m->busy == 0) && 2616 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2617 2618 if ((m->queue - m->pc) == PQ_CACHE) { 2619 vm_page_deactivate(m); 2620 } 2621 vm_page_busy(m); 2622 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2623 vm_page_flag_set(m, PG_MAPPED); 2624 vm_page_wakeup(m); 2625 } 2626 } 2627} 2628 2629/* 2630 * Routine: pmap_change_wiring 2631 * Function: Change the wiring attribute for a map/virtual-address 2632 * pair. 2633 * In/out conditions: 2634 * The mapping must already exist in the pmap. 2635 */ 2636void 2637pmap_change_wiring(pmap, va, wired) 2638 register pmap_t pmap; 2639 vm_offset_t va; 2640 boolean_t wired; 2641{ 2642 register pt_entry_t *pte; 2643 2644 if (pmap == NULL) 2645 return; 2646 2647 pte = pmap_pte(pmap, va); 2648 2649 if (wired && !pmap_pte_w(pte)) 2650 pmap->pm_stats.wired_count++; 2651 else if (!wired && pmap_pte_w(pte)) 2652 pmap->pm_stats.wired_count--; 2653 2654 /* 2655 * Wiring is not a hardware characteristic so there is no need to 2656 * invalidate TLB. 2657 */ 2658 pmap_pte_set_w(pte, wired); 2659} 2660 2661 2662 2663/* 2664 * Copy the range specified by src_addr/len 2665 * from the source map to the range dst_addr/len 2666 * in the destination map. 2667 * 2668 * This routine is only advisory and need not do anything. 2669 */ 2670 2671void 2672pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2673 vm_offset_t src_addr) 2674{ 2675 vm_offset_t addr; 2676 vm_offset_t end_addr = src_addr + len; 2677 vm_offset_t pdnxt; 2678 pd_entry_t src_frame, dst_frame; 2679 vm_page_t m; 2680 2681 if (dst_addr != src_addr) 2682 return; 2683 2684 src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2685 if (src_frame != (PTDpde & PG_FRAME)) 2686 return; 2687 2688 dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2689 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2690 pt_entry_t *src_pte, *dst_pte; 2691 vm_page_t dstmpte, srcmpte; 2692 pd_entry_t srcptepaddr; 2693 unsigned ptepindex; 2694 2695 if (addr >= UPT_MIN_ADDRESS) 2696 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2697 2698 /* 2699 * Don't let optional prefaulting of pages make us go 2700 * way below the low water mark of free pages or way 2701 * above high water mark of used pv entries. 2702 */ 2703 if (cnt.v_free_count < cnt.v_free_reserved || 2704 pv_entry_count > pv_entry_high_water) 2705 break; 2706 2707 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); 2708 ptepindex = addr >> PDRSHIFT; 2709 2710 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2711 if (srcptepaddr == 0) 2712 continue; 2713 2714 if (srcptepaddr & PG_PS) { 2715 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2716 dst_pmap->pm_pdir[ptepindex] = srcptepaddr; 2717 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 2718 } 2719 continue; 2720 } 2721 2722 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2723 if ((srcmpte == NULL) || 2724 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2725 continue; 2726 2727 if (pdnxt > end_addr) 2728 pdnxt = end_addr; 2729 2730 /* 2731 * Have to recheck this before every avtopte() call below 2732 * in case we have blocked and something else used APTDpde. 2733 */ 2734 if (dst_frame != (APTDpde & PG_FRAME)) { 2735 APTDpde = dst_frame | PG_RW | PG_V; 2736 invltlb(); 2737 } 2738 src_pte = vtopte(addr); 2739 dst_pte = avtopte(addr); 2740 while (addr < pdnxt) { 2741 pt_entry_t ptetemp; 2742 ptetemp = *src_pte; 2743 /* 2744 * we only virtual copy managed pages 2745 */ 2746 if ((ptetemp & PG_MANAGED) != 0) { 2747 /* 2748 * We have to check after allocpte for the 2749 * pte still being around... allocpte can 2750 * block. 2751 */ 2752 dstmpte = pmap_allocpte(dst_pmap, addr); 2753 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2754 /* 2755 * Clear the modified and 2756 * accessed (referenced) bits 2757 * during the copy. 2758 */ 2759 m = PHYS_TO_VM_PAGE(ptetemp); 2760 *dst_pte = ptetemp & ~(PG_M | PG_A); 2761 dst_pmap->pm_stats.resident_count++; 2762 pmap_insert_entry(dst_pmap, addr, 2763 dstmpte, m); 2764 } else { 2765 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2766 } 2767 if (dstmpte->hold_count >= srcmpte->hold_count) 2768 break; 2769 } 2770 addr += PAGE_SIZE; 2771 src_pte++; 2772 dst_pte++; 2773 } 2774 } 2775} 2776 2777/* 2778 * Routine: pmap_kernel 2779 * Function: 2780 * Returns the physical map handle for the kernel. 2781 */ 2782pmap_t 2783pmap_kernel() 2784{ 2785 return (kernel_pmap); 2786} 2787 2788/* 2789 * pmap_zero_page zeros the specified hardware page by mapping 2790 * the page into KVM and using bzero to clear its contents. 2791 */ 2792void 2793pmap_zero_page(vm_offset_t phys) 2794{ 2795 2796#ifdef SMP 2797 /* XXX overkill, we only want to disable migration here */ 2798 /* XXX or maybe not. down the track we have reentrancy issues */ 2799 critical_enter(); 2800#endif 2801 if (*CMAP2) 2802 panic("pmap_zero_page: CMAP2 busy"); 2803 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2804 cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ 2805#if defined(I686_CPU) 2806 if (cpu_class == CPUCLASS_686) 2807 i686_pagezero(CADDR2); 2808 else 2809#endif 2810 bzero(CADDR2, PAGE_SIZE); 2811 *CMAP2 = 0; 2812#ifdef SMP 2813 critical_exit(); 2814#endif 2815} 2816 2817/* 2818 * pmap_zero_page_area zeros the specified hardware page by mapping 2819 * the page into KVM and using bzero to clear its contents. 2820 * 2821 * off and size may not cover an area beyond a single hardware page. 2822 */ 2823void 2824pmap_zero_page_area(vm_offset_t phys, int off, int size) 2825{ 2826 2827#ifdef SMP 2828 /* XXX overkill, we only want to disable migration here */ 2829 /* XXX or maybe not. down the track we have reentrancy issues */ 2830 critical_enter(); 2831#endif 2832 if (*CMAP2) 2833 panic("pmap_zero_page: CMAP2 busy"); 2834 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2835 cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */ 2836#if defined(I686_CPU) 2837 if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) 2838 i686_pagezero(CADDR2); 2839 else 2840#endif 2841 bzero((char *)CADDR2 + off, size); 2842 *CMAP2 = 0; 2843#ifdef SMP 2844 critical_exit(); 2845#endif 2846} 2847 2848/* 2849 * pmap_copy_page copies the specified (machine independent) 2850 * page by mapping the page into virtual memory and using 2851 * bcopy to copy the page, one machine dependent page at a 2852 * time. 2853 */ 2854void 2855pmap_copy_page(vm_offset_t src, vm_offset_t dst) 2856{ 2857 2858#ifdef SMP 2859 /* XXX overkill, we only want to disable migration here */ 2860 /* XXX or maybe not. down the track we have reentrancy issues */ 2861 critical_enter(); 2862#endif 2863 if (*CMAP1) 2864 panic("pmap_copy_page: CMAP1 busy"); 2865 if (*CMAP2) 2866 panic("pmap_copy_page: CMAP2 busy"); 2867 2868 *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; 2869 *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; 2870 cpu_invlpg((u_int)CADDR1); /* SMP: local only */ 2871 cpu_invlpg((u_int)CADDR2); /* SMP: local only */ 2872 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2873 *CMAP1 = 0; 2874 *CMAP2 = 0; 2875#ifdef SMP 2876 critical_exit(); 2877#endif 2878} 2879 2880 2881/* 2882 * Routine: pmap_pageable 2883 * Function: 2884 * Make the specified pages (by pmap, offset) 2885 * pageable (or not) as requested. 2886 * 2887 * A page which is not pageable may not take 2888 * a fault; therefore, its page table entry 2889 * must remain valid for the duration. 2890 * 2891 * This routine is merely advisory; pmap_enter 2892 * will specify that these pages are to be wired 2893 * down (or not) as appropriate. 2894 */ 2895void 2896pmap_pageable(pmap, sva, eva, pageable) 2897 pmap_t pmap; 2898 vm_offset_t sva, eva; 2899 boolean_t pageable; 2900{ 2901} 2902 2903/* 2904 * this routine returns true if a physical page resides 2905 * in the given pmap. 2906 */ 2907boolean_t 2908pmap_page_exists(pmap, m) 2909 pmap_t pmap; 2910 vm_page_t m; 2911{ 2912 register pv_entry_t pv; 2913 int s; 2914 2915 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2916 return FALSE; 2917 2918 s = splvm(); 2919 2920 /* 2921 * Not found, check current mappings returning immediately if found. 2922 */ 2923 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2924 if (pv->pv_pmap == pmap) { 2925 splx(s); 2926 return TRUE; 2927 } 2928 } 2929 splx(s); 2930 return (FALSE); 2931} 2932 2933#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2934/* 2935 * Remove all pages from specified address space 2936 * this aids process exit speeds. Also, this code 2937 * is special cased for current process only, but 2938 * can have the more generic (and slightly slower) 2939 * mode enabled. This is much faster than pmap_remove 2940 * in the case of running down an entire address space. 2941 */ 2942void 2943pmap_remove_pages(pmap, sva, eva) 2944 pmap_t pmap; 2945 vm_offset_t sva, eva; 2946{ 2947 pt_entry_t *pte, tpte; 2948 vm_page_t m; 2949 pv_entry_t pv, npv; 2950 int s; 2951 2952#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2953 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2954 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2955 return; 2956 } 2957#endif 2958 2959 s = splvm(); 2960 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2961 2962 if (pv->pv_va >= eva || pv->pv_va < sva) { 2963 npv = TAILQ_NEXT(pv, pv_plist); 2964 continue; 2965 } 2966 2967#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2968 pte = vtopte(pv->pv_va); 2969#else 2970 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2971#endif 2972 tpte = *pte; 2973 2974 if (tpte == 0) { 2975 printf("TPTE at %p IS ZERO @ VA %08x\n", 2976 pte, pv->pv_va); 2977 panic("bad pte"); 2978 } 2979 2980/* 2981 * We cannot remove wired pages from a process' mapping at this time 2982 */ 2983 if (tpte & PG_W) { 2984 npv = TAILQ_NEXT(pv, pv_plist); 2985 continue; 2986 } 2987 2988 m = PHYS_TO_VM_PAGE(tpte); 2989 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2990 ("vm_page_t %p phys_addr mismatch %08x %08x", 2991 m, m->phys_addr, tpte)); 2992 2993 KASSERT(m < &vm_page_array[vm_page_array_size], 2994 ("pmap_remove_pages: bad tpte %x", tpte)); 2995 2996 pv->pv_pmap->pm_stats.resident_count--; 2997 2998 *pte = 0; 2999 3000 /* 3001 * Update the vm_page_t clean and reference bits. 3002 */ 3003 if (tpte & PG_M) { 3004 vm_page_dirty(m); 3005 } 3006 3007 npv = TAILQ_NEXT(pv, pv_plist); 3008 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 3009 3010 m->md.pv_list_count--; 3011 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3012 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 3013 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 3014 } 3015 3016 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 3017 free_pv_entry(pv); 3018 } 3019 splx(s); 3020 pmap_invalidate_all(pmap); 3021} 3022 3023/* 3024 * pmap_testbit tests bits in pte's 3025 * note that the testbit/changebit routines are inline, 3026 * and a lot of things compile-time evaluate. 3027 */ 3028static boolean_t 3029pmap_testbit(m, bit) 3030 vm_page_t m; 3031 int bit; 3032{ 3033 pv_entry_t pv; 3034 pt_entry_t *pte; 3035 int s; 3036 3037 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3038 return FALSE; 3039 3040 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3041 return FALSE; 3042 3043 s = splvm(); 3044 3045 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3046 /* 3047 * if the bit being tested is the modified bit, then 3048 * mark clean_map and ptes as never 3049 * modified. 3050 */ 3051 if (bit & (PG_A|PG_M)) { 3052 if (!pmap_track_modified(pv->pv_va)) 3053 continue; 3054 } 3055 3056#if defined(PMAP_DIAGNOSTIC) 3057 if (!pv->pv_pmap) { 3058 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 3059 continue; 3060 } 3061#endif 3062 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3063 if (*pte & bit) { 3064 splx(s); 3065 return TRUE; 3066 } 3067 } 3068 splx(s); 3069 return (FALSE); 3070} 3071 3072/* 3073 * this routine is used to modify bits in ptes 3074 */ 3075static __inline void 3076pmap_changebit(vm_page_t m, int bit, boolean_t setem) 3077{ 3078 register pv_entry_t pv; 3079 register pt_entry_t *pte; 3080 int s; 3081 3082 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3083 return; 3084 3085 s = splvm(); 3086 3087 /* 3088 * Loop over all current mappings setting/clearing as appropos If 3089 * setting RO do we need to clear the VAC? 3090 */ 3091 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3092 /* 3093 * don't write protect pager mappings 3094 */ 3095 if (!setem && (bit == PG_RW)) { 3096 if (!pmap_track_modified(pv->pv_va)) 3097 continue; 3098 } 3099 3100#if defined(PMAP_DIAGNOSTIC) 3101 if (!pv->pv_pmap) { 3102 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 3103 continue; 3104 } 3105#endif 3106 3107 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3108 3109 if (setem) { 3110 *pte |= bit; 3111 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3112 } else { 3113 pt_entry_t pbits = *pte; 3114 if (pbits & bit) { 3115 if (bit == PG_RW) { 3116 if (pbits & PG_M) { 3117 vm_page_dirty(m); 3118 } 3119 *pte = pbits & ~(PG_M|PG_RW); 3120 } else { 3121 *pte = pbits & ~bit; 3122 } 3123 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3124 } 3125 } 3126 } 3127 splx(s); 3128} 3129 3130/* 3131 * pmap_page_protect: 3132 * 3133 * Lower the permission for all mappings to a given page. 3134 */ 3135void 3136pmap_page_protect(vm_page_t m, vm_prot_t prot) 3137{ 3138 if ((prot & VM_PROT_WRITE) == 0) { 3139 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3140 pmap_changebit(m, PG_RW, FALSE); 3141 } else { 3142 pmap_remove_all(m); 3143 } 3144 } 3145} 3146 3147vm_offset_t 3148pmap_phys_address(ppn) 3149 int ppn; 3150{ 3151 return (i386_ptob(ppn)); 3152} 3153 3154/* 3155 * pmap_ts_referenced: 3156 * 3157 * Return the count of reference bits for a page, clearing all of them. 3158 */ 3159int 3160pmap_ts_referenced(vm_page_t m) 3161{ 3162 register pv_entry_t pv, pvf, pvn; 3163 pt_entry_t *pte; 3164 int s; 3165 int rtval = 0; 3166 3167 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3168 return (rtval); 3169 3170 s = splvm(); 3171 3172 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3173 3174 pvf = pv; 3175 3176 do { 3177 pvn = TAILQ_NEXT(pv, pv_list); 3178 3179 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3180 3181 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3182 3183 if (!pmap_track_modified(pv->pv_va)) 3184 continue; 3185 3186 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3187 3188 if (pte && (*pte & PG_A)) { 3189 *pte &= ~PG_A; 3190 3191 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3192 3193 rtval++; 3194 if (rtval > 4) { 3195 break; 3196 } 3197 } 3198 } while ((pv = pvn) != NULL && pv != pvf); 3199 } 3200 splx(s); 3201 3202 return (rtval); 3203} 3204 3205/* 3206 * pmap_is_modified: 3207 * 3208 * Return whether or not the specified physical page was modified 3209 * in any physical maps. 3210 */ 3211boolean_t 3212pmap_is_modified(vm_page_t m) 3213{ 3214 return pmap_testbit(m, PG_M); 3215} 3216 3217/* 3218 * Clear the modify bits on the specified physical page. 3219 */ 3220void 3221pmap_clear_modify(vm_page_t m) 3222{ 3223 pmap_changebit(m, PG_M, FALSE); 3224} 3225 3226/* 3227 * pmap_clear_reference: 3228 * 3229 * Clear the reference bit on the specified physical page. 3230 */ 3231void 3232pmap_clear_reference(vm_page_t m) 3233{ 3234 pmap_changebit(m, PG_A, FALSE); 3235} 3236 3237/* 3238 * Miscellaneous support routines follow 3239 */ 3240 3241static void 3242i386_protection_init() 3243{ 3244 register int *kp, prot; 3245 3246 kp = protection_codes; 3247 for (prot = 0; prot < 8; prot++) { 3248 switch (prot) { 3249 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 3250 /* 3251 * Read access is also 0. There isn't any execute bit, 3252 * so just make it readable. 3253 */ 3254 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 3255 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 3256 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 3257 *kp++ = 0; 3258 break; 3259 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 3260 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 3261 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 3262 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 3263 *kp++ = PG_RW; 3264 break; 3265 } 3266 } 3267} 3268 3269/* 3270 * Map a set of physical memory pages into the kernel virtual 3271 * address space. Return a pointer to where it is mapped. This 3272 * routine is intended to be used for mapping device memory, 3273 * NOT real memory. 3274 */ 3275void * 3276pmap_mapdev(pa, size) 3277 vm_offset_t pa; 3278 vm_size_t size; 3279{ 3280 vm_offset_t va, tmpva, offset; 3281 pt_entry_t *pte; 3282 3283 offset = pa & PAGE_MASK; 3284 size = roundup(offset + size, PAGE_SIZE); 3285 3286 GIANT_REQUIRED; 3287 3288 va = kmem_alloc_pageable(kernel_map, size); 3289 if (!va) 3290 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3291 3292 pa = pa & PG_FRAME; 3293 for (tmpva = va; size > 0; ) { 3294 pte = vtopte(tmpva); 3295 *pte = pa | PG_RW | PG_V | pgeflag; 3296 size -= PAGE_SIZE; 3297 tmpva += PAGE_SIZE; 3298 } 3299 invlpg_range(va, tmpva); 3300 3301 return ((void *)(va + offset)); 3302} 3303 3304void 3305pmap_unmapdev(va, size) 3306 vm_offset_t va; 3307 vm_size_t size; 3308{ 3309 vm_offset_t base, offset, tmpva; 3310 pt_entry_t *pte; 3311 3312 base = va & PG_FRAME; 3313 offset = va & PAGE_MASK; 3314 size = roundup(offset + size, PAGE_SIZE); 3315 3316 for (tmpva = base; size > 0; ) { 3317 pte = vtopte(tmpva); 3318 *pte = 0; 3319 size -= PAGE_SIZE; 3320 tmpva += PAGE_SIZE; 3321 } 3322 invlpg_range(va, tmpva); 3323 kmem_free(kernel_map, base, size); 3324} 3325 3326/* 3327 * perform the pmap work for mincore 3328 */ 3329int 3330pmap_mincore(pmap, addr) 3331 pmap_t pmap; 3332 vm_offset_t addr; 3333{ 3334 pt_entry_t *ptep, pte; 3335 vm_page_t m; 3336 int val = 0; 3337 3338 ptep = pmap_pte(pmap, addr); 3339 if (ptep == 0) { 3340 return 0; 3341 } 3342 3343 if ((pte = *ptep) != 0) { 3344 vm_offset_t pa; 3345 3346 val = MINCORE_INCORE; 3347 if ((pte & PG_MANAGED) == 0) 3348 return val; 3349 3350 pa = pte & PG_FRAME; 3351 3352 m = PHYS_TO_VM_PAGE(pa); 3353 3354 /* 3355 * Modified by us 3356 */ 3357 if (pte & PG_M) 3358 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3359 /* 3360 * Modified by someone 3361 */ 3362 else if (m->dirty || pmap_is_modified(m)) 3363 val |= MINCORE_MODIFIED_OTHER; 3364 /* 3365 * Referenced by us 3366 */ 3367 if (pte & PG_A) 3368 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3369 3370 /* 3371 * Referenced by someone 3372 */ 3373 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3374 val |= MINCORE_REFERENCED_OTHER; 3375 vm_page_flag_set(m, PG_REFERENCED); 3376 } 3377 } 3378 return val; 3379} 3380 3381void 3382pmap_activate(struct thread *td) 3383{ 3384 struct proc *p = td->td_proc; 3385 pmap_t pmap; 3386 u_int32_t cr3; 3387 3388 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3389#if defined(SMP) 3390 pmap->pm_active |= PCPU_GET(cpumask); 3391#else 3392 pmap->pm_active |= 1; 3393#endif 3394#if defined(SWTCH_OPTIM_STATS) 3395 tlb_flush_count++; 3396#endif 3397 cr3 = vtophys(pmap->pm_pdir); 3398 /* XXXKSE this is wrong. 3399 * pmap_activate is for the current thread on the current cpu 3400 */ 3401 if (p->p_flag & P_KSES) { 3402 /* Make sure all other cr3 entries are updated. */ 3403 /* what if they are running? XXXKSE (maybe abort them) */ 3404 FOREACH_THREAD_IN_PROC(p, td) { 3405 td->td_pcb->pcb_cr3 = cr3; 3406 } 3407 } else { 3408 td->td_pcb->pcb_cr3 = cr3; 3409 } 3410 load_cr3(cr3); 3411} 3412 3413vm_offset_t 3414pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3415{ 3416 3417 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3418 return addr; 3419 } 3420 3421 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3422 return addr; 3423} 3424 3425 3426#if defined(PMAP_DEBUG) 3427pmap_pid_dump(int pid) 3428{ 3429 pmap_t pmap; 3430 struct proc *p; 3431 int npte = 0; 3432 int index; 3433 3434 sx_slock(&allproc_lock); 3435 LIST_FOREACH(p, &allproc, p_list) { 3436 if (p->p_pid != pid) 3437 continue; 3438 3439 if (p->p_vmspace) { 3440 int i,j; 3441 index = 0; 3442 pmap = vmspace_pmap(p->p_vmspace); 3443 for (i = 0; i < NPDEPG; i++) { 3444 pd_entry_t *pde; 3445 pt_entry_t *pte; 3446 vm_offset_t base = i << PDRSHIFT; 3447 3448 pde = &pmap->pm_pdir[i]; 3449 if (pde && pmap_pde_v(pde)) { 3450 for (j = 0; j < NPTEPG; j++) { 3451 vm_offset_t va = base + (j << PAGE_SHIFT); 3452 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3453 if (index) { 3454 index = 0; 3455 printf("\n"); 3456 } 3457 sx_sunlock(&allproc_lock); 3458 return npte; 3459 } 3460 pte = pmap_pte_quick(pmap, va); 3461 if (pte && pmap_pte_v(pte)) { 3462 pt_entry_t pa; 3463 vm_page_t m; 3464 pa = *pte; 3465 m = PHYS_TO_VM_PAGE(pa); 3466 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3467 va, pa, m->hold_count, m->wire_count, m->flags); 3468 npte++; 3469 index++; 3470 if (index >= 2) { 3471 index = 0; 3472 printf("\n"); 3473 } else { 3474 printf(" "); 3475 } 3476 } 3477 } 3478 } 3479 } 3480 } 3481 } 3482 sx_sunlock(&allproc_lock); 3483 return npte; 3484} 3485#endif 3486 3487#if defined(DEBUG) 3488 3489static void pads __P((pmap_t pm)); 3490void pmap_pvdump __P((vm_offset_t pa)); 3491 3492/* print address space of pmap*/ 3493static void 3494pads(pm) 3495 pmap_t pm; 3496{ 3497 int i, j; 3498 vm_offset_t va; 3499 pt_entry_t *ptep; 3500 3501 if (pm == kernel_pmap) 3502 return; 3503 for (i = 0; i < NPDEPG; i++) 3504 if (pm->pm_pdir[i]) 3505 for (j = 0; j < NPTEPG; j++) { 3506 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3507 if (pm == kernel_pmap && va < KERNBASE) 3508 continue; 3509 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3510 continue; 3511 ptep = pmap_pte_quick(pm, va); 3512 if (pmap_pte_v(ptep)) 3513 printf("%x:%x ", va, *ptep); 3514 }; 3515 3516} 3517 3518void 3519pmap_pvdump(pa) 3520 vm_offset_t pa; 3521{ 3522 pv_entry_t pv; 3523 vm_page_t m; 3524 3525 printf("pa %x", pa); 3526 m = PHYS_TO_VM_PAGE(pa); 3527 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3528 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3529 pads(pv->pv_pmap); 3530 } 3531 printf(" "); 3532} 3533#endif 3534