pmap.c revision 14245
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $Id: pmap.c,v 1.76 1996/02/25 03:02:44 dyson Exp $ 43 */ 44 45/* 46 * Derived from hp300 version by Mike Hibler, this version by William 47 * Jolitz uses a recursive map [a pde points to the page directory] to 48 * map the page tables using the pagetables themselves. This is done to 49 * reduce the impact on kernel virtual memory for lots of sparse address 50 * space, and to reduce the cost of memory to each process. 51 * 52 * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 53 */ 54/* 55 * Major modifications by John S. Dyson primarily to support 56 * pageable page tables, eliminating pmap_attributes, 57 * discontiguous memory pages, and using more efficient string 58 * instructions. Jan 13, 1994. Further modifications on Mar 2, 1994, 59 * general clean-up and efficiency mods. 60 */ 61 62/* 63 * Manages physical address maps. 64 * 65 * In addition to hardware address maps, this 66 * module is called upon to provide software-use-only 67 * maps which may or may not be stored in the same 68 * form as hardware maps. These pseudo-maps are 69 * used to store intermediate results from copy 70 * operations to and from address spaces. 71 * 72 * Since the information managed by this module is 73 * also stored by the logical address mapping module, 74 * this module may throw away valid virtual-to-physical 75 * mappings at almost any time. However, invalidations 76 * of virtual-to-physical mappings must be done as 77 * requested. 78 * 79 * In order to cope with hardware architectures which 80 * make virtual-to-physical map invalidates expensive, 81 * this module may delay invalidate or reduced protection 82 * operations until such time as they are actually 83 * necessary. This module is given full information as 84 * to which processors are currently using which maps, 85 * and to when physical maps must be made correct. 86 */ 87 88#include <sys/param.h> 89#include <sys/systm.h> 90#include <sys/proc.h> 91#include <sys/malloc.h> 92#include <sys/msgbuf.h> 93#include <sys/queue.h> 94#include <sys/vmmeter.h> 95 96#include <vm/vm.h> 97#include <vm/vm_param.h> 98#include <vm/vm_prot.h> 99#include <vm/lock.h> 100#include <vm/vm_kern.h> 101#include <vm/vm_page.h> 102#include <vm/vm_map.h> 103#include <vm/vm_object.h> 104#include <vm/vm_extern.h> 105 106#include <machine/pcb.h> 107#include <machine/cputypes.h> 108#include <machine/md_var.h> 109 110#include <i386/isa/isa.h> 111 112#define PMAP_KEEP_PDIRS 113 114static void init_pv_entries __P((int)); 115 116/* 117 * Get PDEs and PTEs for user/kernel address space 118 */ 119#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) 120#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023]) 121 122#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) 123 124#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 125#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 126#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 127#define pmap_pte_u(pte) ((*(int *)pte & PG_U) != 0) 128#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 129 130#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 131#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 132 133/* 134 * Given a map and a machine independent protection code, 135 * convert to a vax protection code. 136 */ 137#define pte_prot(m, p) (protection_codes[p]) 138static int protection_codes[8]; 139 140static struct pmap kernel_pmap_store; 141pmap_t kernel_pmap; 142 143vm_offset_t avail_start; /* PA of first available physical page */ 144vm_offset_t avail_end; /* PA of last available physical page */ 145vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 146vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 147static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 148static vm_offset_t vm_first_phys; 149 150static int nkpt; 151 152extern vm_offset_t clean_sva, clean_eva; 153extern int cpu_class; 154 155/* 156 * All those kernel PT submaps that BSD is so fond of 157 */ 158pt_entry_t *CMAP1; 159static pt_entry_t *CMAP2, *ptmmap; 160static pv_entry_t pv_table; 161caddr_t CADDR1, ptvmmap; 162static caddr_t CADDR2; 163static pt_entry_t *msgbufmap; 164struct msgbuf *msgbufp; 165 166static void free_pv_entry __P((pv_entry_t pv)); 167static pt_entry_t * 168 get_pt_entry __P((pmap_t pmap)); 169static pv_entry_t 170 get_pv_entry __P((void)); 171static void i386_protection_init __P((void)); 172static void pmap_alloc_pv_entry __P((void)); 173static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); 174static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, 175 vm_offset_t pa)); 176static int pmap_is_managed __P((vm_offset_t pa)); 177static void pmap_remove_all __P((vm_offset_t pa)); 178static void pmap_remove_entry __P((struct pmap *pmap, pv_entry_t pv, 179 vm_offset_t va)); 180static vm_page_t 181 pmap_pte_vm_page __P((pmap_t pmap, vm_offset_t pt)); 182static boolean_t 183 pmap_testbit __P((vm_offset_t pa, int bit)); 184static void * pmap_getpdir __P((void)); 185void pmap_prefault __P((pmap_t pmap, vm_offset_t addra, 186 vm_map_entry_t entry, vm_object_t object)); 187 188/* 189 * The below are finer grained pmap_update routines. These eliminate 190 * the gratuitious tlb flushes on non-i386 architectures. 191 */ 192static __inline void 193pmap_update_1pg( vm_offset_t va) { 194#if defined(I386_CPU) 195 if (cpu_class == CPUCLASS_386) 196 pmap_update(); 197 else 198#endif 199 __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va)); 200} 201 202static __inline void 203pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { 204#if defined(I386_CPU) 205 if (cpu_class == CPUCLASS_386) { 206 pmap_update(); 207 } else 208#endif 209 { 210 __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1)); 211 __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2)); 212 } 213} 214 215/* 216 * Routine: pmap_pte 217 * Function: 218 * Extract the page table entry associated 219 * with the given map/virtual_address pair. 220 * [ what about induced faults -wfj] 221 */ 222 223__inline pt_entry_t * __pure 224pmap_pte(pmap, va) 225 register pmap_t pmap; 226 vm_offset_t va; 227{ 228 229 if (pmap && *pmap_pde(pmap, va)) { 230 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 231 232 /* are we current address space or kernel? */ 233 if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) 234 return ((pt_entry_t *) vtopte(va)); 235 /* otherwise, we are alternate address space */ 236 else { 237 if (frame != ((int) APTDpde & PG_FRAME)) { 238 APTDpde = pmap->pm_pdir[PTDPTDI]; 239 pmap_update(); 240 } 241 return ((pt_entry_t *) avtopte(va)); 242 } 243 } 244 return (0); 245} 246 247/* 248 * Routine: pmap_extract 249 * Function: 250 * Extract the physical page address associated 251 * with the given map/virtual_address pair. 252 */ 253 254vm_offset_t 255pmap_extract(pmap, va) 256 register pmap_t pmap; 257 vm_offset_t va; 258{ 259 vm_offset_t pa; 260 261 if (pmap && *pmap_pde(pmap, va)) { 262 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 263 264 /* are we current address space or kernel? */ 265 if ((pmap == kernel_pmap) 266 || (frame == ((int) PTDpde & PG_FRAME))) { 267 pa = *(int *) vtopte(va); 268 /* otherwise, we are alternate address space */ 269 } else { 270 if (frame != ((int) APTDpde & PG_FRAME)) { 271 APTDpde = pmap->pm_pdir[PTDPTDI]; 272 pmap_update(); 273 } 274 pa = *(int *) avtopte(va); 275 } 276 return ((pa & PG_FRAME) | (va & ~PG_FRAME)); 277 } 278 return 0; 279 280} 281 282/* 283 * determine if a page is managed (memory vs. device) 284 */ 285static __inline int 286pmap_is_managed(pa) 287 vm_offset_t pa; 288{ 289 int i; 290 291 if (!pmap_initialized) 292 return 0; 293 294 for (i = 0; phys_avail[i + 1]; i += 2) { 295 if (pa >= phys_avail[i] && pa < phys_avail[i + 1]) 296 return 1; 297 } 298 return 0; 299} 300 301/* 302 * find the vm_page_t of a pte (only) given va of pte and pmap 303 */ 304static __inline vm_page_t 305pmap_pte_vm_page(pmap, pt) 306 pmap_t pmap; 307 vm_offset_t pt; 308{ 309 vm_page_t m; 310 311 pt = trunc_page(pt); 312 pt = (pt - UPT_MIN_ADDRESS) / PAGE_SIZE; 313 pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; 314 m = PHYS_TO_VM_PAGE(pt); 315 return m; 316} 317 318/* 319 * Wire a page table page 320 */ 321__inline vm_page_t 322pmap_use_pt(pmap, va) 323 pmap_t pmap; 324 vm_offset_t va; 325{ 326 vm_offset_t pt; 327 vm_page_t m; 328 329 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 330 return NULL; 331 332 pt = (vm_offset_t) vtopte(va); 333 m = pmap_pte_vm_page(pmap, pt); 334 vm_page_hold(m); 335 return m; 336} 337 338/* 339 * Unwire a page table page 340 */ 341__inline void 342pmap_unuse_pt(pmap, va, mpte) 343 pmap_t pmap; 344 vm_offset_t va; 345 vm_page_t mpte; 346{ 347 348 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 349 return; 350 351 if (mpte == NULL) { 352 vm_offset_t pt; 353 pt = (vm_offset_t) vtopte(va); 354 mpte = pmap_pte_vm_page(pmap, pt); 355 } 356 357 vm_page_unhold(mpte); 358 359 if (pmap != kernel_pmap && 360 (mpte->hold_count == 0) && 361 (mpte->wire_count == 0) && 362 (va < KPT_MIN_ADDRESS)) { 363/* 364 * We don't free page-table-pages anymore because it can have a negative 365 * impact on perf at times. Now we just deactivate, and it'll get cleaned 366 * up if needed... Also, if the page ends up getting used, it will fault 367 * back into the process address space and be reactivated. 368 */ 369#ifdef PMAP_FREE_OLD_PTES 370 pmap_page_protect(VM_PAGE_TO_PHYS(mpte), VM_PROT_NONE); 371 vm_page_free(mpte); 372#else 373 mpte->dirty = 0; 374 vm_page_deactivate(mpte); 375#endif 376 } 377} 378 379/* 380 * Bootstrap the system enough to run with virtual memory. 381 * 382 * On the i386 this is called after mapping has already been enabled 383 * and just syncs the pmap module with what has already been done. 384 * [We can't call it easily with mapping off since the kernel is not 385 * mapped with PA == VA, hence we would have to relocate every address 386 * from the linked base (virtual) address "KERNBASE" to the actual 387 * (physical) address starting relative to 0] 388 */ 389void 390pmap_bootstrap(firstaddr, loadaddr) 391 vm_offset_t firstaddr; 392 vm_offset_t loadaddr; 393{ 394 vm_offset_t va; 395 pt_entry_t *pte; 396 397 avail_start = firstaddr; 398 399 /* 400 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 401 * large. It should instead be correctly calculated in locore.s and 402 * not based on 'first' (which is a physical address, not a virtual 403 * address, for the start of unused physical memory). The kernel 404 * page tables are NOT double mapped and thus should not be included 405 * in this calculation. 406 */ 407 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 408 virtual_end = VM_MAX_KERNEL_ADDRESS; 409 410 /* 411 * Initialize protection array. 412 */ 413 i386_protection_init(); 414 415 /* 416 * The kernel's pmap is statically allocated so we don't have to use 417 * pmap_create, which is unlikely to work correctly at this part of 418 * the boot sequence (XXX and which no longer exists). 419 */ 420 kernel_pmap = &kernel_pmap_store; 421 422 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); 423 424 kernel_pmap->pm_count = 1; 425 nkpt = NKPT; 426 427 /* 428 * Reserve some special page table entries/VA space for temporary 429 * mapping of pages. 430 */ 431#define SYSMAP(c, p, v, n) \ 432 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 433 434 va = virtual_avail; 435 pte = pmap_pte(kernel_pmap, va); 436 437 /* 438 * CMAP1/CMAP2 are used for zeroing and copying pages. 439 */ 440 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 441 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 442 443 /* 444 * ptmmap is used for reading arbitrary physical pages via /dev/mem. 445 */ 446 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 447 448 /* 449 * msgbufmap is used to map the system message buffer. 450 */ 451 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1) 452 453 virtual_avail = va; 454 455 *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; 456 pmap_update(); 457} 458 459/* 460 * Initialize the pmap module. 461 * Called by vm_init, to initialize any structures that the pmap 462 * system needs to map virtual memory. 463 * pmap_init has been enhanced to support in a fairly consistant 464 * way, discontiguous physical memory. 465 */ 466void 467pmap_init(phys_start, phys_end) 468 vm_offset_t phys_start, phys_end; 469{ 470 vm_offset_t addr; 471 vm_size_t npg, s; 472 int i; 473 474 /* 475 * calculate the number of pv_entries needed 476 */ 477 vm_first_phys = phys_avail[0]; 478 for (i = 0; phys_avail[i + 1]; i += 2); 479 npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE; 480 481 /* 482 * Allocate memory for random pmap data structures. Includes the 483 * pv_head_table. 484 */ 485 s = (vm_size_t) (sizeof(struct pv_entry) * npg); 486 s = round_page(s); 487 addr = (vm_offset_t) kmem_alloc(kernel_map, s); 488 pv_table = (pv_entry_t) addr; 489 490 /* 491 * init the pv free list 492 */ 493 init_pv_entries(npg); 494 /* 495 * Now it is safe to enable pv_table recording. 496 */ 497 pmap_initialized = TRUE; 498} 499 500/* 501 * Used to map a range of physical addresses into kernel 502 * virtual address space. 503 * 504 * For now, VM is already on, we only need to map the 505 * specified memory. 506 */ 507vm_offset_t 508pmap_map(virt, start, end, prot) 509 vm_offset_t virt; 510 vm_offset_t start; 511 vm_offset_t end; 512 int prot; 513{ 514 while (start < end) { 515 pmap_enter(kernel_pmap, virt, start, prot, FALSE); 516 virt += PAGE_SIZE; 517 start += PAGE_SIZE; 518 } 519 return (virt); 520} 521 522#ifdef PMAP_KEEP_PDIRS 523int nfreepdir; 524caddr_t *pdirlist; 525#define NFREEPDIR 3 526 527static void * 528pmap_getpdir() { 529 caddr_t *pdir; 530 if (pdirlist) { 531 --nfreepdir; 532 pdir = pdirlist; 533 pdirlist = (caddr_t *) *pdir; 534 bzero( (caddr_t) pdir, PAGE_SIZE); 535 } else { 536 pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); 537 } 538 539 return (void *) pdir; 540} 541 542static void 543pmap_freepdir(void *pdir) { 544 if (nfreepdir > NFREEPDIR) { 545 kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); 546 } else { 547 * (caddr_t *) pdir = (caddr_t) pdirlist; 548 pdirlist = (caddr_t *) pdir; 549 ++nfreepdir; 550 } 551} 552#endif 553 554/* 555 * Initialize a preallocated and zeroed pmap structure, 556 * such as one in a vmspace structure. 557 */ 558void 559pmap_pinit(pmap) 560 register struct pmap *pmap; 561{ 562 /* 563 * No need to allocate page table space yet but we do need a valid 564 * page directory table. 565 */ 566 567#ifdef PMAP_KEEP_PDIRS 568 pmap->pm_pdir = pmap_getpdir(); 569#else 570 pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); 571#endif 572 573 /* wire in kernel global address entries */ 574 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 575 576 /* install self-referential address mapping entry */ 577 *(int *) (pmap->pm_pdir + PTDPTDI) = 578 ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW; 579 580 pmap->pm_count = 1; 581} 582 583/* 584 * grow the number of kernel page table entries, if needed 585 */ 586 587static vm_page_t nkpg; 588vm_offset_t kernel_vm_end; 589 590void 591pmap_growkernel(vm_offset_t addr) 592{ 593 struct proc *p; 594 struct pmap *pmap; 595 int s; 596 597 s = splhigh(); 598 if (kernel_vm_end == 0) { 599 kernel_vm_end = KERNBASE; 600 nkpt = 0; 601 while (pdir_pde(PTD, kernel_vm_end)) { 602 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 603 ++nkpt; 604 } 605 } 606 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 607 while (kernel_vm_end < addr) { 608 if (pdir_pde(PTD, kernel_vm_end)) { 609 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 610 continue; 611 } 612 ++nkpt; 613 if (!nkpg) { 614 nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM); 615 if (!nkpg) 616 panic("pmap_growkernel: no memory to grow kernel"); 617 vm_page_wire(nkpg); 618 vm_page_remove(nkpg); 619 pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); 620 } 621 pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); 622 nkpg = NULL; 623 624 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 625 if (p->p_vmspace) { 626 pmap = &p->p_vmspace->vm_pmap; 627 *pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 628 } 629 } 630 *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 631 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 632 } 633 splx(s); 634} 635 636/* 637 * Retire the given physical map from service. 638 * Should only be called if the map contains 639 * no valid mappings. 640 */ 641void 642pmap_destroy(pmap) 643 register pmap_t pmap; 644{ 645 int count; 646 647 if (pmap == NULL) 648 return; 649 650 count = --pmap->pm_count; 651 if (count == 0) { 652 pmap_release(pmap); 653 free((caddr_t) pmap, M_VMPMAP); 654 } 655} 656 657/* 658 * Release any resources held by the given physical map. 659 * Called when a pmap initialized by pmap_pinit is being released. 660 * Should only be called if the map contains no valid mappings. 661 */ 662void 663pmap_release(pmap) 664 register struct pmap *pmap; 665{ 666#ifdef PMAP_KEEP_PDIRS 667 pmap_freepdir( (void *)pmap->pm_pdir); 668#else 669 kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); 670#endif 671} 672 673/* 674 * Add a reference to the specified pmap. 675 */ 676void 677pmap_reference(pmap) 678 pmap_t pmap; 679{ 680 if (pmap != NULL) { 681 pmap->pm_count++; 682 } 683} 684 685#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) 686 687/* 688 * Data for the pv entry allocation mechanism 689 */ 690static int pv_freelistcnt; 691static pv_entry_t pv_freelist; 692static vm_offset_t pvva; 693static int npvvapg; 694 695/* 696 * free the pv_entry back to the free list 697 */ 698static __inline void 699free_pv_entry(pv) 700 pv_entry_t pv; 701{ 702 if (!pv) 703 return; 704 ++pv_freelistcnt; 705 pv->pv_next = pv_freelist; 706 pv_freelist = pv; 707} 708 709/* 710 * get a new pv_entry, allocating a block from the system 711 * when needed. 712 * the memory allocation is performed bypassing the malloc code 713 * because of the possibility of allocations at interrupt time. 714 */ 715static __inline pv_entry_t 716get_pv_entry() 717{ 718 pv_entry_t tmp; 719 720 /* 721 * get more pv_entry pages if needed 722 */ 723 if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { 724 pmap_alloc_pv_entry(); 725 } 726 /* 727 * get a pv_entry off of the free list 728 */ 729 --pv_freelistcnt; 730 tmp = pv_freelist; 731 pv_freelist = tmp->pv_next; 732 return tmp; 733} 734 735/* 736 * this *strange* allocation routine *statistically* eliminates the 737 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure. 738 * also -- this code is MUCH MUCH faster than the malloc equiv... 739 */ 740static void 741pmap_alloc_pv_entry() 742{ 743 /* 744 * do we have any pre-allocated map-pages left? 745 */ 746 if (npvvapg) { 747 vm_page_t m; 748 749 /* 750 * we do this to keep recursion away 751 */ 752 pv_freelistcnt += PV_FREELIST_MIN; 753 /* 754 * allocate a physical page out of the vm system 755 */ 756 m = vm_page_alloc(kernel_object, 757 OFF_TO_IDX(pvva - vm_map_min(kernel_map)), 758 VM_ALLOC_INTERRUPT); 759 if (m) { 760 int newentries; 761 int i; 762 pv_entry_t entry; 763 764 newentries = (PAGE_SIZE / sizeof(struct pv_entry)); 765 /* 766 * wire the page 767 */ 768 vm_page_wire(m); 769 m->flags &= ~PG_BUSY; 770 /* 771 * let the kernel see it 772 */ 773 pmap_kenter(pvva, VM_PAGE_TO_PHYS(m)); 774 775 entry = (pv_entry_t) pvva; 776 /* 777 * update the allocation pointers 778 */ 779 pvva += PAGE_SIZE; 780 --npvvapg; 781 782 /* 783 * free the entries into the free list 784 */ 785 for (i = 0; i < newentries; i++) { 786 free_pv_entry(entry); 787 entry++; 788 } 789 } 790 pv_freelistcnt -= PV_FREELIST_MIN; 791 } 792 if (!pv_freelist) 793 panic("get_pv_entry: cannot get a pv_entry_t"); 794} 795 796 797 798/* 799 * init the pv_entry allocation system 800 */ 801#define PVSPERPAGE 64 802void 803init_pv_entries(npg) 804 int npg; 805{ 806 /* 807 * allocate enough kvm space for PVSPERPAGE entries per page (lots) 808 * kvm space is fairly cheap, be generous!!! (the system can panic if 809 * this is too small.) 810 */ 811 npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) 812 + PAGE_SIZE - 1) / PAGE_SIZE; 813 pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); 814 /* 815 * get the first batch of entries 816 */ 817 free_pv_entry(get_pv_entry()); 818} 819 820static pt_entry_t * 821get_pt_entry(pmap) 822 pmap_t pmap; 823{ 824 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 825 826 /* are we current address space or kernel? */ 827 if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { 828 return PTmap; 829 } 830 /* otherwise, we are alternate address space */ 831 if (frame != ((int) APTDpde & PG_FRAME)) { 832 APTDpde = pmap->pm_pdir[PTDPTDI]; 833 pmap_update(); 834 } 835 return APTmap; 836} 837 838/* 839 * If it is the first entry on the list, it is actually 840 * in the header and we must copy the following entry up 841 * to the header. Otherwise we must search the list for 842 * the entry. In either case we free the now unused entry. 843 */ 844static void 845pmap_remove_entry(pmap, pv, va) 846 struct pmap *pmap; 847 pv_entry_t pv; 848 vm_offset_t va; 849{ 850 pv_entry_t npv; 851 int s; 852 853 s = splhigh(); 854 if (pmap == pv->pv_pmap && va == pv->pv_va) { 855 pmap_unuse_pt(pmap, va, pv->pv_ptem); 856 npv = pv->pv_next; 857 if (npv) { 858 *pv = *npv; 859 free_pv_entry(npv); 860 } else { 861 pv->pv_pmap = NULL; 862 } 863 } else { 864 for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { 865 if (pmap == npv->pv_pmap && va == npv->pv_va) { 866 pmap_unuse_pt(pmap, va, npv->pv_ptem); 867 pv->pv_next = npv->pv_next; 868 free_pv_entry(npv); 869 break; 870 } 871 } 872 } 873 splx(s); 874} 875 876/* 877 * Remove the given range of addresses from the specified map. 878 * 879 * It is assumed that the start and end are properly 880 * rounded to the page size. 881 */ 882void 883pmap_remove(pmap, sva, eva) 884 struct pmap *pmap; 885 register vm_offset_t sva; 886 register vm_offset_t eva; 887{ 888 register pt_entry_t *ptp, *ptq; 889 vm_offset_t pa; 890 register pv_entry_t pv; 891 vm_offset_t va; 892 pt_entry_t oldpte; 893 vm_offset_t pdnxt; 894 vm_offset_t ptepaddr; 895 vm_page_t mpte; 896 int update_needed; 897 898 if (pmap == NULL) 899 return; 900 901 ptp = get_pt_entry(pmap); 902 903 /* 904 * special handling of removing one page. a very 905 * common operation and easy to short circuit some 906 * code. 907 */ 908 if ((sva + PAGE_SIZE) == eva) { 909 910 if (*pmap_pde(pmap, sva) == 0) 911 return; 912 913 ptq = ptp + i386_btop(sva); 914 915 if (!*ptq) 916 return; 917 918 oldpte = *ptq; 919 if (((int)oldpte) & PG_W) 920 pmap->pm_stats.wired_count--; 921 pmap->pm_stats.resident_count--; 922 923 *ptq = 0; 924 925 pa = ((int)oldpte) & PG_FRAME; 926 if (pmap_is_managed(pa)) { 927 if ((int) oldpte & PG_M) { 928 if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || 929 (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { 930 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 931 } 932 } 933 pv = pa_to_pvh(pa); 934 pmap_remove_entry(pmap, pv, sva); 935 } else { 936 pmap_unuse_pt(pmap, sva, NULL); 937 } 938 pmap_update_1pg(sva); 939 return; 940 } 941 942 update_needed = 0; 943 sva = i386_btop(sva); 944 pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 945 ptepaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sva)); 946 eva = i386_btop(eva); 947 mpte = NULL; 948 949 while (sva < eva) { 950 if (sva >= pdnxt) { 951 pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 952 ptepaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sva)); 953 mpte = NULL; 954 } 955 /* 956 * Weed out invalid mappings. Note: we assume that the page 957 * directory table is always allocated, and in kernel virtual. 958 */ 959 if (ptepaddr == 0) { 960 sva = pdnxt; 961 continue; 962 } 963 964 if (mpte == NULL) 965 mpte = PHYS_TO_VM_PAGE(i386_trunc_page(ptepaddr)); 966 if ((mpte->hold_count == 0) && (mpte->wire_count == 0)) { 967 sva = pdnxt; 968 continue; 969 } 970 971 if (pdnxt > eva) 972 pdnxt = eva; 973 /* 974 * search for page table entries 975 */ 976 while ((sva < pdnxt) && (*(ptp + sva) == 0)) 977 ++sva; 978 if (sva == pdnxt) { 979 continue; 980 } 981 982 ptq = ptp + sva; 983 /* 984 * Invalidate the PTEs. XXX: should cluster them up and 985 * invalidate as many as possible at once. 986 * Update statistics 987 */ 988 oldpte = *ptq; 989 *ptq = 0; 990 if (((int) oldpte) & PG_W) 991 pmap->pm_stats.wired_count--; 992 pmap->pm_stats.resident_count--; 993 994 va = i386_ptob(sva); 995 996 ++update_needed; 997 pa = ((int) oldpte) & PG_FRAME; 998 if (!pmap_is_managed(pa)) { 999 pmap_unuse_pt(pmap, (vm_offset_t) va, NULL); 1000 ++sva; 1001 continue; 1002 } 1003 if ((int) oldpte & PG_M) { 1004 if (va < USRSTACK + (UPAGES * PAGE_SIZE) || 1005 (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { 1006 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 1007 } 1008 } 1009 pv = pa_to_pvh(pa); 1010 pmap_remove_entry(pmap, pv, va); 1011 ++sva; 1012 } 1013 if (update_needed) 1014 pmap_update(); 1015} 1016 1017/* 1018 * Routine: pmap_remove_all 1019 * Function: 1020 * Removes this physical page from 1021 * all physical maps in which it resides. 1022 * Reflects back modify bits to the pager. 1023 * 1024 * Notes: 1025 * Original versions of this routine were very 1026 * inefficient because they iteratively called 1027 * pmap_remove (slow...) 1028 */ 1029static void 1030pmap_remove_all(pa) 1031 vm_offset_t pa; 1032{ 1033 register pv_entry_t pv, opv, npv; 1034 register pt_entry_t *pte, *ptp; 1035 vm_offset_t va; 1036 struct pmap *pmap; 1037 vm_page_t m; 1038 int s; 1039 int anyvalid = 0; 1040 1041 /* 1042 * Not one of ours 1043 */ 1044 /* 1045 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1046 * pages! 1047 */ 1048 if (!pmap_is_managed(pa)) 1049 return; 1050 1051 pa = trunc_page(pa); 1052 opv = pa_to_pvh(pa); 1053 if (opv->pv_pmap == NULL) 1054 return; 1055 1056 m = PHYS_TO_VM_PAGE(pa); 1057 s = splhigh(); 1058 pv = opv; 1059 while (pv && ((pmap = pv->pv_pmap) != NULL)) { 1060 int tpte; 1061 ptp = get_pt_entry(pmap); 1062 va = pv->pv_va; 1063 pte = ptp + i386_btop(va); 1064 if (tpte = ((int) *pte)) { 1065 *pte = 0; 1066 if (tpte & PG_W) 1067 pmap->pm_stats.wired_count--; 1068 pmap->pm_stats.resident_count--; 1069 if (curproc != pageproc) 1070 anyvalid++; 1071 1072 /* 1073 * Update the vm_page_t clean and reference bits. 1074 */ 1075 if ((tpte & PG_M) != 0) { 1076 if (va < USRSTACK + (UPAGES * PAGE_SIZE) || 1077 (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { 1078 m->dirty = VM_PAGE_BITS_ALL; 1079 } 1080 } 1081 } 1082 pv = pv->pv_next; 1083 } 1084 1085 for (pv = opv->pv_next; pv; pv = npv) { 1086 npv = pv->pv_next; 1087 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1088 free_pv_entry(pv); 1089 } 1090 1091 opv->pv_pmap = NULL; 1092 opv->pv_next = NULL; 1093 1094 splx(s); 1095 if (anyvalid) 1096 pmap_update(); 1097} 1098 1099 1100/* 1101 * Set the physical protection on the 1102 * specified range of this map as requested. 1103 */ 1104void 1105pmap_protect(pmap, sva, eva, prot) 1106 register pmap_t pmap; 1107 vm_offset_t sva, eva; 1108 vm_prot_t prot; 1109{ 1110 register pt_entry_t *pte; 1111 register vm_offset_t va; 1112 int i386prot; 1113 register pt_entry_t *ptp; 1114 int anychanged = 0; 1115 1116 if (pmap == NULL) 1117 return; 1118 1119 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1120 pmap_remove(pmap, sva, eva); 1121 return; 1122 } 1123 if (prot & VM_PROT_WRITE) 1124 return; 1125 1126 ptp = get_pt_entry(pmap); 1127 1128 sva = i386_btop(sva); 1129 eva = i386_btop(eva); 1130 1131 while (sva < eva) { 1132 vm_offset_t pdnxt; 1133 vm_offset_t ptepaddr; 1134 vm_page_t mpte; 1135 int pprot; 1136 /* 1137 * Weed out invalid mappings. Note: we assume that the page 1138 * directory table is always allocated, and in kernel virtual. 1139 */ 1140 1141 pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 1142 ptepaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sva)); 1143 if (ptepaddr == 0) { 1144 sva = pdnxt; 1145 continue; 1146 } 1147 1148 mpte = PHYS_TO_VM_PAGE(i386_trunc_page(ptepaddr)); 1149 if ((mpte->hold_count == 0) && (mpte->wire_count == 0)) { 1150 sva = pdnxt; 1151 continue; 1152 } 1153 1154 if (pdnxt > eva) 1155 pdnxt = eva; 1156 /* 1157 * search for page table entries 1158 */ 1159 while ((sva < pdnxt) && (*(ptp + sva) == 0)) 1160 ++sva; 1161 1162 if (sva == pdnxt) 1163 continue; 1164 1165 pte = ptp + sva; 1166 1167 va = i386_ptob(sva); 1168 i386prot = pte_prot(pmap, prot); 1169 if (va < UPT_MAX_ADDRESS) { 1170 i386prot |= PG_u; 1171 if (va >= UPT_MIN_ADDRESS) 1172 i386prot |= PG_RW; 1173 } 1174 pprot = *(int *)pte & PG_PROT; 1175 if (pprot != i386prot) { 1176 pmap_pte_set_prot(pte, i386prot); 1177 anychanged++; 1178 } 1179 ++sva; 1180 } 1181 if (anychanged) 1182 pmap_update(); 1183} 1184 1185/* 1186 * Insert the given physical page (p) at 1187 * the specified virtual address (v) in the 1188 * target physical map with the protection requested. 1189 * 1190 * If specified, the page will be wired down, meaning 1191 * that the related pte can not be reclaimed. 1192 * 1193 * NB: This is the only routine which MAY NOT lazy-evaluate 1194 * or lose information. That is, this routine must actually 1195 * insert this page into the given map NOW. 1196 */ 1197void 1198pmap_enter(pmap, va, pa, prot, wired) 1199 register pmap_t pmap; 1200 vm_offset_t va; 1201 register vm_offset_t pa; 1202 vm_prot_t prot; 1203 boolean_t wired; 1204{ 1205 register pt_entry_t *pte; 1206 register pt_entry_t npte; 1207 vm_offset_t opa; 1208 register pv_entry_t pv, npv; 1209 int ptevalid = 0; 1210 1211 if (pmap == NULL) 1212 return; 1213 1214 pv = NULL; 1215 1216 va = trunc_page(va); 1217 pa = trunc_page(pa); 1218 if (va > VM_MAX_KERNEL_ADDRESS) 1219 panic("pmap_enter: toobig"); 1220 1221 /* 1222 * Page Directory table entry not valid, we need a new PT page 1223 */ 1224 pte = pmap_pte(pmap, va); 1225 if (pte == NULL) { 1226 printf("kernel page directory invalid pdir=%p, va=0x%lx\n", 1227 pmap->pm_pdir[PTDPTDI], va); 1228 panic("invalid kernel page directory"); 1229 } 1230 opa = pmap_pte_pa(pte); 1231 1232 /* 1233 * Mapping has not changed, must be protection or wiring change. 1234 */ 1235 if (opa == pa) { 1236 /* 1237 * Wiring change, just update stats. We don't worry about 1238 * wiring PT pages as they remain resident as long as there 1239 * are valid mappings in them. Hence, if a user page is wired, 1240 * the PT page will be also. 1241 */ 1242 if (wired && !pmap_pte_w(pte)) 1243 pmap->pm_stats.wired_count++; 1244 else if (!wired && pmap_pte_w(pte)) 1245 pmap->pm_stats.wired_count--; 1246 1247 goto validate; 1248 } 1249 /* 1250 * Mapping has changed, invalidate old range and fall through to 1251 * handle validating new mapping. 1252 */ 1253 if (opa) { 1254 pmap_remove(pmap, va, va + PAGE_SIZE); 1255 } 1256 /* 1257 * Enter on the PV list if part of our managed memory Note that we 1258 * raise IPL while manipulating pv_table since pmap_enter can be 1259 * called at interrupt time. 1260 */ 1261 if (pmap_is_managed(pa)) { 1262 int s; 1263 1264 pv = pa_to_pvh(pa); 1265 s = splhigh(); 1266 /* 1267 * No entries yet, use header as the first entry 1268 */ 1269 if (pv->pv_pmap == NULL) { 1270 pv->pv_va = va; 1271 pv->pv_pmap = pmap; 1272 pv->pv_next = NULL; 1273 pv->pv_ptem = NULL; 1274 } 1275 /* 1276 * There is at least one other VA mapping this page. Place 1277 * this entry after the header. 1278 */ 1279 else { 1280 npv = get_pv_entry(); 1281 npv->pv_va = va; 1282 npv->pv_pmap = pmap; 1283 npv->pv_next = pv->pv_next; 1284 pv->pv_next = npv; 1285 pv = npv; 1286 pv->pv_ptem = NULL; 1287 } 1288 splx(s); 1289 } 1290 1291 /* 1292 * Increment counters 1293 */ 1294 pmap->pm_stats.resident_count++; 1295 if (wired) 1296 pmap->pm_stats.wired_count++; 1297 1298validate: 1299 /* 1300 * Now validate mapping with desired protection/wiring. 1301 */ 1302 npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V)); 1303 1304 /* 1305 * When forking (copy-on-write, etc): A process will turn off write 1306 * permissions for any of its writable pages. If the data (object) is 1307 * only referred to by one process, the processes map is modified 1308 * directly as opposed to using the object manipulation routine. When 1309 * using pmap_protect, the modified bits are not kept in the vm_page_t 1310 * data structure. Therefore, when using pmap_enter in vm_fault to 1311 * bring back writability of a page, there has been no memory of the 1312 * modified or referenced bits except at the pte level. this clause 1313 * supports the carryover of the modified and used (referenced) bits. 1314 */ 1315 if (pa == opa) 1316 (int) npte |= (int) *pte & (PG_M | PG_U); 1317 1318 if (wired) 1319 (int) npte |= PG_W; 1320 if (va < UPT_MIN_ADDRESS) 1321 (int) npte |= PG_u; 1322 else if (va < UPT_MAX_ADDRESS) 1323 (int) npte |= PG_u | PG_RW; 1324 1325 if (*pte != npte) { 1326 if (*pte) 1327 ptevalid++; 1328 *pte = npte; 1329 } 1330 if (ptevalid) { 1331 pmap_update_1pg(va); 1332 } else { 1333 if (pv) { 1334 pv->pv_ptem = pmap_use_pt(pmap, va); 1335 } 1336 } 1337} 1338 1339/* 1340 * Add a list of wired pages to the kva 1341 * this routine is only used for temporary 1342 * kernel mappings that do not need to have 1343 * page modification or references recorded. 1344 * Note that old mappings are simply written 1345 * over. The page *must* be wired. 1346 */ 1347void 1348pmap_qenter(va, m, count) 1349 vm_offset_t va; 1350 vm_page_t *m; 1351 int count; 1352{ 1353 int i; 1354 int anyvalid = 0; 1355 register pt_entry_t *pte; 1356 1357 for (i = 0; i < count; i++) { 1358 vm_offset_t tva = va + i * PAGE_SIZE; 1359 pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); 1360 pte = vtopte(tva); 1361 if (*pte && (*pte != npte)) 1362 pmap_update_1pg(tva); 1363 *pte = npte; 1364 } 1365} 1366/* 1367 * this routine jerks page mappings from the 1368 * kernel -- it is meant only for temporary mappings. 1369 */ 1370void 1371pmap_qremove(va, count) 1372 vm_offset_t va; 1373 int count; 1374{ 1375 int i; 1376 register pt_entry_t *pte; 1377 1378 for (i = 0; i < count; i++) { 1379 vm_offset_t tva = va + i * PAGE_SIZE; 1380 pte = vtopte(tva); 1381 *pte = 0; 1382 pmap_update_1pg(tva); 1383 } 1384} 1385 1386/* 1387 * add a wired page to the kva 1388 * note that in order for the mapping to take effect -- you 1389 * should do a pmap_update after doing the pmap_kenter... 1390 */ 1391void 1392pmap_kenter(va, pa) 1393 vm_offset_t va; 1394 register vm_offset_t pa; 1395{ 1396 register pt_entry_t *pte; 1397 int wasvalid = 0; 1398 1399 pte = vtopte(va); 1400 1401 if (*pte) 1402 wasvalid++; 1403 1404 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); 1405 1406 if (wasvalid) 1407 pmap_update_1pg(va); 1408} 1409 1410/* 1411 * remove a page from the kernel pagetables 1412 */ 1413void 1414pmap_kremove(va) 1415 vm_offset_t va; 1416{ 1417 register pt_entry_t *pte; 1418 1419 pte = vtopte(va); 1420 1421 *pte = (pt_entry_t) 0; 1422 pmap_update_1pg(va); 1423} 1424 1425/* 1426 * this code makes some *MAJOR* assumptions: 1427 * 1. Current pmap & pmap exists. 1428 * 2. Not wired. 1429 * 3. Read access. 1430 * 4. No page table pages. 1431 * 5. Tlbflush is deferred to calling procedure. 1432 * 6. Page IS managed. 1433 * but is *MUCH* faster than pmap_enter... 1434 */ 1435 1436static void 1437pmap_enter_quick(pmap, va, pa) 1438 register pmap_t pmap; 1439 vm_offset_t va; 1440 register vm_offset_t pa; 1441{ 1442 register pt_entry_t *pte; 1443 register pv_entry_t pv, npv; 1444 int s; 1445 1446 /* 1447 * Enter on the PV list if part of our managed memory Note that we 1448 * raise IPL while manipulating pv_table since pmap_enter can be 1449 * called at interrupt time. 1450 */ 1451 1452 pte = vtopte(va); 1453#if 1 1454 /* a fault on the page table might occur here */ 1455 if (*pte) { 1456 pmap_remove(pmap, va, va + PAGE_SIZE); 1457 } 1458#endif 1459 1460 pv = pa_to_pvh(pa); 1461 s = splhigh(); 1462 /* 1463 * No entries yet, use header as the first entry 1464 */ 1465 if (pv->pv_pmap == NULL) { 1466 pv->pv_pmap = pmap; 1467 pv->pv_va = va; 1468 pv->pv_next = NULL; 1469 } 1470 /* 1471 * There is at least one other VA mapping this page. Place this entry 1472 * after the header. 1473 */ 1474 else { 1475 npv = get_pv_entry(); 1476 npv->pv_va = va; 1477 npv->pv_pmap = pmap; 1478 npv->pv_next = pv->pv_next; 1479 pv->pv_next = npv; 1480 pv = npv; 1481 } 1482 splx(s); 1483 pv->pv_ptem = pmap_use_pt(pmap, va); 1484 1485 /* 1486 * Increment counters 1487 */ 1488 pmap->pm_stats.resident_count++; 1489 1490 /* 1491 * Now validate mapping with desired protection/wiring. 1492 */ 1493 *pte = (pt_entry_t) ((int) (pa | PG_V | PG_u)); 1494 1495 return; 1496} 1497 1498#define MAX_INIT_PT (512) 1499/* 1500 * pmap_object_init_pt preloads the ptes for a given object 1501 * into the specified pmap. This eliminates the blast of soft 1502 * faults on process startup and immediately after an mmap. 1503 */ 1504void 1505pmap_object_init_pt(pmap, addr, object, pindex, size) 1506 pmap_t pmap; 1507 vm_offset_t addr; 1508 vm_object_t object; 1509 vm_pindex_t pindex; 1510 vm_size_t size; 1511{ 1512 vm_offset_t tmpidx; 1513 int psize; 1514 vm_page_t p; 1515 int objpgs; 1516 1517 psize = (size >> PAGE_SHIFT); 1518 1519 if (!pmap || ((psize > MAX_INIT_PT) && 1520 (object->resident_page_count > MAX_INIT_PT))) { 1521 return; 1522 } 1523 1524 /* 1525 * remove any already used mappings 1526 */ 1527 pmap_remove( pmap, trunc_page(addr), round_page(addr + size)); 1528 1529 /* 1530 * if we are processing a major portion of the object, then scan the 1531 * entire thing. 1532 */ 1533 if (psize > (object->size >> 2)) { 1534 objpgs = psize; 1535 1536 for (p = object->memq.tqh_first; 1537 ((objpgs > 0) && (p != NULL)); 1538 p = p->listq.tqe_next) { 1539 1540 tmpidx = p->pindex; 1541 if (tmpidx < pindex) { 1542 continue; 1543 } 1544 tmpidx -= pindex; 1545 if (tmpidx >= psize) { 1546 continue; 1547 } 1548 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1549 (p->busy == 0) && 1550 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1551 if (p->queue == PQ_CACHE) 1552 vm_page_deactivate(p); 1553 vm_page_hold(p); 1554 p->flags |= PG_MAPPED; 1555 pmap_enter_quick(pmap, 1556 addr + (tmpidx << PAGE_SHIFT), 1557 VM_PAGE_TO_PHYS(p)); 1558 vm_page_unhold(p); 1559 } 1560 objpgs -= 1; 1561 } 1562 } else { 1563 /* 1564 * else lookup the pages one-by-one. 1565 */ 1566 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 1567 p = vm_page_lookup(object, tmpidx + pindex); 1568 if (p && (p->busy == 0) && 1569 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1570 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1571 if (p->queue == PQ_CACHE) 1572 vm_page_deactivate(p); 1573 vm_page_hold(p); 1574 p->flags |= PG_MAPPED; 1575 pmap_enter_quick(pmap, 1576 addr + (tmpidx << PAGE_SHIFT), 1577 VM_PAGE_TO_PHYS(p)); 1578 vm_page_unhold(p); 1579 } 1580 } 1581 } 1582} 1583 1584/* 1585 * pmap_prefault provides a quick way of clustering 1586 * pagefaults into a processes address space. It is a "cousin" 1587 * of pmap_object_init_pt, except it runs at page fault time instead 1588 * of mmap time. 1589 */ 1590#define PFBAK 2 1591#define PFFOR 2 1592#define PAGEORDER_SIZE (PFBAK+PFFOR) 1593 1594static int pmap_prefault_pageorder[] = { 1595 -NBPG, NBPG, -2 * NBPG, 2 * NBPG 1596}; 1597 1598void 1599pmap_prefault(pmap, addra, entry, object) 1600 pmap_t pmap; 1601 vm_offset_t addra; 1602 vm_map_entry_t entry; 1603 vm_object_t object; 1604{ 1605 int i; 1606 vm_offset_t starta; 1607 vm_offset_t addr; 1608 vm_pindex_t pindex; 1609 vm_page_t m; 1610 int pageorder_index; 1611 1612 if (entry->object.vm_object != object) 1613 return; 1614 1615 if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) 1616 return; 1617 1618 starta = addra - PFBAK * PAGE_SIZE; 1619 if (starta < entry->start) { 1620 starta = entry->start; 1621 } else if (starta > addra) { 1622 starta = 0; 1623 } 1624 1625 for (i = 0; i < PAGEORDER_SIZE; i++) { 1626 vm_object_t lobject; 1627 pt_entry_t *pte; 1628 1629 addr = addra + pmap_prefault_pageorder[i]; 1630 if (addr < starta || addr >= entry->end) 1631 continue; 1632 1633 pte = vtopte(addr); 1634 if (*pte) 1635 continue; 1636 1637 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 1638 lobject = object; 1639 for (m = vm_page_lookup(lobject, pindex); 1640 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 1641 lobject = lobject->backing_object) { 1642 if (lobject->backing_object_offset & PAGE_MASK) 1643 break; 1644 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 1645 m = vm_page_lookup(lobject->backing_object, pindex); 1646 } 1647 1648 /* 1649 * give-up when a page is not in memory 1650 */ 1651 if (m == NULL) 1652 break; 1653 1654 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1655 (m->busy == 0) && 1656 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1657 1658 if (m->queue == PQ_CACHE) { 1659 if ((cnt.v_free_count + cnt.v_cache_count) < 1660 cnt.v_free_min) 1661 break; 1662 vm_page_deactivate(m); 1663 } 1664 vm_page_hold(m); 1665 m->flags |= PG_MAPPED; 1666 pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); 1667 vm_page_unhold(m); 1668 } 1669 } 1670} 1671 1672/* 1673 * Routine: pmap_change_wiring 1674 * Function: Change the wiring attribute for a map/virtual-address 1675 * pair. 1676 * In/out conditions: 1677 * The mapping must already exist in the pmap. 1678 */ 1679void 1680pmap_change_wiring(pmap, va, wired) 1681 register pmap_t pmap; 1682 vm_offset_t va; 1683 boolean_t wired; 1684{ 1685 register pt_entry_t *pte; 1686 1687 if (pmap == NULL) 1688 return; 1689 1690 pte = pmap_pte(pmap, va); 1691 1692 if (wired && !pmap_pte_w(pte)) 1693 pmap->pm_stats.wired_count++; 1694 else if (!wired && pmap_pte_w(pte)) 1695 pmap->pm_stats.wired_count--; 1696 1697 /* 1698 * Wiring is not a hardware characteristic so there is no need to 1699 * invalidate TLB. 1700 */ 1701 pmap_pte_set_w(pte, wired); 1702} 1703 1704 1705 1706/* 1707 * Copy the range specified by src_addr/len 1708 * from the source map to the range dst_addr/len 1709 * in the destination map. 1710 * 1711 * This routine is only advisory and need not do anything. 1712 */ 1713void 1714pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1715 pmap_t dst_pmap, src_pmap; 1716 vm_offset_t dst_addr; 1717 vm_size_t len; 1718 vm_offset_t src_addr; 1719{ 1720} 1721 1722/* 1723 * Routine: pmap_kernel 1724 * Function: 1725 * Returns the physical map handle for the kernel. 1726 */ 1727pmap_t 1728pmap_kernel() 1729{ 1730 return (kernel_pmap); 1731} 1732 1733/* 1734 * pmap_zero_page zeros the specified (machine independent) 1735 * page by mapping the page into virtual memory and using 1736 * bzero to clear its contents, one machine dependent page 1737 * at a time. 1738 */ 1739void 1740pmap_zero_page(phys) 1741 vm_offset_t phys; 1742{ 1743 if (*(int *) CMAP2) 1744 panic("pmap_zero_page: CMAP busy"); 1745 1746 *(int *) CMAP2 = PG_V | PG_KW | trunc_page(phys); 1747 bzero(CADDR2, PAGE_SIZE); 1748 1749 *(int *) CMAP2 = 0; 1750 pmap_update_1pg((vm_offset_t) CADDR2); 1751} 1752 1753/* 1754 * pmap_copy_page copies the specified (machine independent) 1755 * page by mapping the page into virtual memory and using 1756 * bcopy to copy the page, one machine dependent page at a 1757 * time. 1758 */ 1759void 1760pmap_copy_page(src, dst) 1761 vm_offset_t src; 1762 vm_offset_t dst; 1763{ 1764 if (*(int *) CMAP1 || *(int *) CMAP2) 1765 panic("pmap_copy_page: CMAP busy"); 1766 1767 *(int *) CMAP1 = PG_V | PG_KW | trunc_page(src); 1768 *(int *) CMAP2 = PG_V | PG_KW | trunc_page(dst); 1769 1770#if __GNUC__ > 1 1771 memcpy(CADDR2, CADDR1, PAGE_SIZE); 1772#else 1773 bcopy(CADDR1, CADDR2, PAGE_SIZE); 1774#endif 1775 *(int *) CMAP1 = 0; 1776 *(int *) CMAP2 = 0; 1777 pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2); 1778} 1779 1780 1781/* 1782 * Routine: pmap_pageable 1783 * Function: 1784 * Make the specified pages (by pmap, offset) 1785 * pageable (or not) as requested. 1786 * 1787 * A page which is not pageable may not take 1788 * a fault; therefore, its page table entry 1789 * must remain valid for the duration. 1790 * 1791 * This routine is merely advisory; pmap_enter 1792 * will specify that these pages are to be wired 1793 * down (or not) as appropriate. 1794 */ 1795void 1796pmap_pageable(pmap, sva, eva, pageable) 1797 pmap_t pmap; 1798 vm_offset_t sva, eva; 1799 boolean_t pageable; 1800{ 1801} 1802 1803/* 1804 * this routine returns true if a physical page resides 1805 * in the given pmap. 1806 */ 1807boolean_t 1808pmap_page_exists(pmap, pa) 1809 pmap_t pmap; 1810 vm_offset_t pa; 1811{ 1812 register pv_entry_t pv; 1813 int s; 1814 1815 if (!pmap_is_managed(pa)) 1816 return FALSE; 1817 1818 pv = pa_to_pvh(pa); 1819 s = splhigh(); 1820 1821 /* 1822 * Not found, check current mappings returning immediately if found. 1823 */ 1824 if (pv->pv_pmap != NULL) { 1825 for (; pv; pv = pv->pv_next) { 1826 if (pv->pv_pmap == pmap) { 1827 splx(s); 1828 return TRUE; 1829 } 1830 } 1831 } 1832 splx(s); 1833 return (FALSE); 1834} 1835 1836/* 1837 * pmap_testbit tests bits in pte's 1838 * note that the testbit/changebit routines are inline, 1839 * and a lot of things compile-time evaluate. 1840 */ 1841static __inline boolean_t 1842pmap_testbit(pa, bit) 1843 register vm_offset_t pa; 1844 int bit; 1845{ 1846 register pv_entry_t pv; 1847 pt_entry_t *pte; 1848 int s; 1849 1850 if (!pmap_is_managed(pa)) 1851 return FALSE; 1852 1853 pv = pa_to_pvh(pa); 1854 s = splhigh(); 1855 1856 /* 1857 * Not found, check current mappings returning immediately if found. 1858 */ 1859 if (pv->pv_pmap != NULL) { 1860 for (; pv; pv = pv->pv_next) { 1861 /* 1862 * if the bit being tested is the modified bit, then 1863 * mark UPAGES as always modified, and ptes as never 1864 * modified. 1865 */ 1866 if (bit & (PG_U|PG_M)) { 1867 if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { 1868 continue; 1869 } 1870 } 1871 if (!pv->pv_pmap) { 1872 printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 1873 continue; 1874 } 1875 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1876 if ((int) *pte & bit) { 1877 splx(s); 1878 return TRUE; 1879 } 1880 } 1881 } 1882 splx(s); 1883 return (FALSE); 1884} 1885 1886/* 1887 * this routine is used to modify bits in ptes 1888 */ 1889static __inline void 1890pmap_changebit(pa, bit, setem) 1891 vm_offset_t pa; 1892 int bit; 1893 boolean_t setem; 1894{ 1895 register pv_entry_t pv; 1896 register pt_entry_t *pte, npte; 1897 vm_offset_t va; 1898 int changed; 1899 int s; 1900 1901 if (!pmap_is_managed(pa)) 1902 return; 1903 1904 pv = pa_to_pvh(pa); 1905 s = splhigh(); 1906 1907 /* 1908 * Loop over all current mappings setting/clearing as appropos If 1909 * setting RO do we need to clear the VAC? 1910 */ 1911 if (pv->pv_pmap != NULL) { 1912 for (; pv; pv = pv->pv_next) { 1913 va = pv->pv_va; 1914 1915 /* 1916 * don't write protect pager mappings 1917 */ 1918 if (!setem && (bit == PG_RW)) { 1919 if (va >= clean_sva && va < clean_eva) 1920 continue; 1921 } 1922 if (!pv->pv_pmap) { 1923 printf("Null pmap (cb) at va: 0x%lx\n", va); 1924 continue; 1925 } 1926 pte = pmap_pte(pv->pv_pmap, va); 1927 if (setem) { 1928 (int) npte = (int) *pte | bit; 1929 } else { 1930 (int) npte = (int) *pte & ~bit; 1931 } 1932 *pte = npte; 1933 } 1934 } 1935 splx(s); 1936 if (curproc != pageproc) 1937 pmap_update(); 1938} 1939 1940/* 1941 * pmap_page_protect: 1942 * 1943 * Lower the permission for all mappings to a given page. 1944 */ 1945void 1946pmap_page_protect(phys, prot) 1947 vm_offset_t phys; 1948 vm_prot_t prot; 1949{ 1950 if ((prot & VM_PROT_WRITE) == 0) { 1951 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) 1952 pmap_changebit(phys, PG_RW, FALSE); 1953 else 1954 pmap_remove_all(phys); 1955 } 1956} 1957 1958vm_offset_t 1959pmap_phys_address(ppn) 1960 int ppn; 1961{ 1962 return (i386_ptob(ppn)); 1963} 1964 1965/* 1966 * pmap_is_referenced: 1967 * 1968 * Return whether or not the specified physical page was referenced 1969 * by any physical maps. 1970 */ 1971boolean_t 1972pmap_is_referenced(vm_offset_t pa) 1973{ 1974 return pmap_testbit((pa), PG_U); 1975} 1976 1977/* 1978 * pmap_is_modified: 1979 * 1980 * Return whether or not the specified physical page was modified 1981 * in any physical maps. 1982 */ 1983boolean_t 1984pmap_is_modified(vm_offset_t pa) 1985{ 1986 return pmap_testbit((pa), PG_M); 1987} 1988 1989/* 1990 * Clear the modify bits on the specified physical page. 1991 */ 1992void 1993pmap_clear_modify(vm_offset_t pa) 1994{ 1995 pmap_changebit((pa), PG_M, FALSE); 1996} 1997 1998/* 1999 * pmap_clear_reference: 2000 * 2001 * Clear the reference bit on the specified physical page. 2002 */ 2003void 2004pmap_clear_reference(vm_offset_t pa) 2005{ 2006 pmap_changebit((pa), PG_U, FALSE); 2007} 2008 2009/* 2010 * Miscellaneous support routines follow 2011 */ 2012 2013static void 2014i386_protection_init() 2015{ 2016 register int *kp, prot; 2017 2018 kp = protection_codes; 2019 for (prot = 0; prot < 8; prot++) { 2020 switch (prot) { 2021 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2022 /* 2023 * Read access is also 0. There isn't any execute bit, 2024 * so just make it readable. 2025 */ 2026 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2027 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2028 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2029 *kp++ = 0; 2030 break; 2031 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2032 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2033 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2034 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2035 *kp++ = PG_RW; 2036 break; 2037 } 2038 } 2039} 2040 2041/* 2042 * Map a set of physical memory pages into the kernel virtual 2043 * address space. Return a pointer to where it is mapped. This 2044 * routine is intended to be used for mapping device memory, 2045 * NOT real memory. The non-cacheable bits are set on each 2046 * mapped page. 2047 */ 2048void * 2049pmap_mapdev(pa, size) 2050 vm_offset_t pa; 2051 vm_size_t size; 2052{ 2053 vm_offset_t va, tmpva; 2054 pt_entry_t *pte; 2055 2056 pa = trunc_page(pa); 2057 size = roundup(size, PAGE_SIZE); 2058 2059 va = kmem_alloc_pageable(kernel_map, size); 2060 if (!va) 2061 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2062 2063 for (tmpva = va; size > 0;) { 2064 pte = vtopte(tmpva); 2065 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); 2066 size -= PAGE_SIZE; 2067 tmpva += PAGE_SIZE; 2068 pa += PAGE_SIZE; 2069 } 2070 pmap_update(); 2071 2072 return ((void *) va); 2073} 2074 2075#ifdef PMAP_DEBUG 2076pmap_pid_dump(int pid) { 2077 pmap_t pmap; 2078 struct proc *p; 2079 int npte = 0; 2080 int index; 2081 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 2082 if (p->p_pid != pid) 2083 continue; 2084 2085 if (p->p_vmspace) { 2086 int i,j; 2087 index = 0; 2088 pmap = &p->p_vmspace->vm_pmap; 2089 for(i=0;i<1024;i++) { 2090 pd_entry_t *pde; 2091 pt_entry_t *pte; 2092 unsigned base = i << PD_SHIFT; 2093 2094 pde = &pmap->pm_pdir[i]; 2095 if (pde && pmap_pde_v(pde)) { 2096 for(j=0;j<1024;j++) { 2097 unsigned va = base + (j << PG_SHIFT); 2098 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 2099 if (index) { 2100 index = 0; 2101 printf("\n"); 2102 } 2103 return npte; 2104 } 2105 pte = pmap_pte( pmap, va); 2106 if (pte && pmap_pte_v(pte)) { 2107 vm_offset_t pa; 2108 vm_page_t m; 2109 pa = *(int *)pte; 2110 m = PHYS_TO_VM_PAGE((pa & PG_FRAME)); 2111 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 2112 va, pa, m->hold_count, m->wire_count, m->flags); 2113 npte++; 2114 index++; 2115 if (index >= 2) { 2116 index = 0; 2117 printf("\n"); 2118 } else { 2119 printf(" "); 2120 } 2121 } 2122 } 2123 } 2124 } 2125 } 2126 } 2127 return npte; 2128} 2129#endif 2130 2131#ifdef DEBUG 2132 2133static void pads __P((pmap_t pm)); 2134static void pmap_pvdump __P((vm_offset_t pa)); 2135 2136/* print address space of pmap*/ 2137static void 2138pads(pm) 2139 pmap_t pm; 2140{ 2141 unsigned va, i, j; 2142 pt_entry_t *ptep; 2143 2144 if (pm == kernel_pmap) 2145 return; 2146 for (i = 0; i < 1024; i++) 2147 if (pm->pm_pdir[i]) 2148 for (j = 0; j < 1024; j++) { 2149 va = (i << PD_SHIFT) + (j << PG_SHIFT); 2150 if (pm == kernel_pmap && va < KERNBASE) 2151 continue; 2152 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 2153 continue; 2154 ptep = pmap_pte(pm, va); 2155 if (pmap_pte_v(ptep)) 2156 printf("%x:%x ", va, *(int *) ptep); 2157 }; 2158 2159} 2160 2161static void 2162pmap_pvdump(pa) 2163 vm_offset_t pa; 2164{ 2165 register pv_entry_t pv; 2166 2167 printf("pa %x", pa); 2168 for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { 2169#ifdef used_to_be 2170 printf(" -> pmap %x, va %x, flags %x", 2171 pv->pv_pmap, pv->pv_va, pv->pv_flags); 2172#endif 2173 printf(" -> pmap %x, va %x", 2174 pv->pv_pmap, pv->pv_va); 2175 pads(pv->pv_pmap); 2176 } 2177 printf(" "); 2178} 2179#endif 2180