pmap.c revision 6807
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $Id: pmap.c,v 1.50 1995/02/26 05:14:16 bde Exp $ 43 */ 44 45/* 46 * Derived from hp300 version by Mike Hibler, this version by William 47 * Jolitz uses a recursive map [a pde points to the page directory] to 48 * map the page tables using the pagetables themselves. This is done to 49 * reduce the impact on kernel virtual memory for lots of sparse address 50 * space, and to reduce the cost of memory to each process. 51 * 52 * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 53 */ 54/* 55 * Major modifications by John S. Dyson primarily to support 56 * pageable page tables, eliminating pmap_attributes, 57 * discontiguous memory pages, and using more efficient string 58 * instructions. Jan 13, 1994. Further modifications on Mar 2, 1994, 59 * general clean-up and efficiency mods. 60 */ 61 62/* 63 * Manages physical address maps. 64 * 65 * In addition to hardware address maps, this 66 * module is called upon to provide software-use-only 67 * maps which may or may not be stored in the same 68 * form as hardware maps. These pseudo-maps are 69 * used to store intermediate results from copy 70 * operations to and from address spaces. 71 * 72 * Since the information managed by this module is 73 * also stored by the logical address mapping module, 74 * this module may throw away valid virtual-to-physical 75 * mappings at almost any time. However, invalidations 76 * of virtual-to-physical mappings must be done as 77 * requested. 78 * 79 * In order to cope with hardware architectures which 80 * make virtual-to-physical map invalidates expensive, 81 * this module may delay invalidate or reduced protection 82 * operations until such time as they are actually 83 * necessary. This module is given full information as 84 * to which processors are currently using which maps, 85 * and to when physical maps must be made correct. 86 */ 87 88#include <sys/param.h> 89#include <sys/systm.h> 90#include <sys/proc.h> 91#include <sys/malloc.h> 92#include <sys/user.h> 93 94#include <vm/vm.h> 95#include <vm/vm_kern.h> 96#include <vm/vm_page.h> 97 98#include <machine/cputypes.h> 99 100#include <i386/isa/isa.h> 101 102/* 103 * Allocate various and sundry SYSMAPs used in the days of old VM 104 * and not yet converted. XXX. 105 */ 106#define BSDVM_COMPAT 1 107 108/* 109 * Get PDEs and PTEs for user/kernel address space 110 */ 111#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) 112#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023]) 113 114#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) 115 116#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 117#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 118#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 119#define pmap_pte_u(pte) ((*(int *)pte & PG_U) != 0) 120#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 121 122#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 123#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 124 125/* 126 * Given a map and a machine independent protection code, 127 * convert to a vax protection code. 128 */ 129#define pte_prot(m, p) (protection_codes[p]) 130int protection_codes[8]; 131 132struct pmap kernel_pmap_store; 133pmap_t kernel_pmap; 134 135vm_offset_t phys_avail[6]; /* 2 entries + 1 null */ 136vm_offset_t avail_start; /* PA of first available physical page */ 137vm_offset_t avail_end; /* PA of last available physical page */ 138vm_size_t mem_size; /* memory size in bytes */ 139vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 140vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 141int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */ 142boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 143vm_offset_t vm_first_phys, vm_last_phys; 144 145static inline int pmap_is_managed(); 146static inline void *vm_get_pmap(); 147static inline void vm_put_pmap(); 148static void i386_protection_init(); 149static void pmap_alloc_pv_entry(); 150static inline pv_entry_t get_pv_entry(); 151int nkpt; 152 153 154extern vm_offset_t clean_sva, clean_eva; 155extern int cpu_class; 156 157#if BSDVM_COMPAT 158#include <sys/msgbuf.h> 159 160/* 161 * All those kernel PT submaps that BSD is so fond of 162 */ 163pt_entry_t *CMAP1, *CMAP2, *ptmmap; 164caddr_t CADDR1, CADDR2, ptvmmap; 165pt_entry_t *msgbufmap; 166struct msgbuf *msgbufp; 167 168#endif 169 170void 171init_pv_entries(int); 172 173/* 174 * Routine: pmap_pte 175 * Function: 176 * Extract the page table entry associated 177 * with the given map/virtual_address pair. 178 * [ what about induced faults -wfj] 179 */ 180 181inline pt_entry_t * const 182pmap_pte(pmap, va) 183 register pmap_t pmap; 184 vm_offset_t va; 185{ 186 187 if (pmap && *pmap_pde(pmap, va)) { 188 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 189 190 /* are we current address space or kernel? */ 191 if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) 192 return ((pt_entry_t *) vtopte(va)); 193 /* otherwise, we are alternate address space */ 194 else { 195 if (frame != ((int) APTDpde & PG_FRAME)) { 196 APTDpde = pmap->pm_pdir[PTDPTDI]; 197 pmap_update(); 198 } 199 return ((pt_entry_t *) avtopte(va)); 200 } 201 } 202 return (0); 203} 204 205/* 206 * Routine: pmap_extract 207 * Function: 208 * Extract the physical page address associated 209 * with the given map/virtual_address pair. 210 */ 211 212vm_offset_t 213pmap_extract(pmap, va) 214 register pmap_t pmap; 215 vm_offset_t va; 216{ 217 vm_offset_t pa; 218 219 if (pmap && *pmap_pde(pmap, va)) { 220 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 221 222 /* are we current address space or kernel? */ 223 if ((pmap == kernel_pmap) 224 || (frame == ((int) PTDpde & PG_FRAME))) { 225 pa = *(int *) vtopte(va); 226 /* otherwise, we are alternate address space */ 227 } else { 228 if (frame != ((int) APTDpde & PG_FRAME)) { 229 APTDpde = pmap->pm_pdir[PTDPTDI]; 230 pmap_update(); 231 } 232 pa = *(int *) avtopte(va); 233 } 234 return ((pa & PG_FRAME) | (va & ~PG_FRAME)); 235 } 236 return 0; 237 238} 239 240/* 241 * determine if a page is managed (memory vs. device) 242 */ 243static inline int 244pmap_is_managed(pa) 245 vm_offset_t pa; 246{ 247 int i; 248 249 if (!pmap_initialized) 250 return 0; 251 252 for (i = 0; phys_avail[i + 1]; i += 2) { 253 if (pa >= phys_avail[i] && pa < phys_avail[i + 1]) 254 return 1; 255 } 256 return 0; 257} 258 259/* 260 * find the vm_page_t of a pte (only) given va of pte and pmap 261 */ 262__inline vm_page_t 263pmap_pte_vm_page(pmap, pt) 264 pmap_t pmap; 265 vm_offset_t pt; 266{ 267 vm_page_t m; 268 269 pt = i386_trunc_page(pt); 270 pt = (pt - UPT_MIN_ADDRESS) / NBPG; 271 pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; 272 m = PHYS_TO_VM_PAGE(pt); 273 return m; 274} 275 276/* 277 * Wire a page table page 278 */ 279__inline void 280pmap_use_pt(pmap, va) 281 pmap_t pmap; 282 vm_offset_t va; 283{ 284 vm_offset_t pt; 285 286 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 287 return; 288 289 pt = (vm_offset_t) vtopte(va); 290 vm_page_hold(pmap_pte_vm_page(pmap, pt)); 291} 292 293/* 294 * Unwire a page table page 295 */ 296inline void 297pmap_unuse_pt(pmap, va) 298 pmap_t pmap; 299 vm_offset_t va; 300{ 301 vm_offset_t pt; 302 vm_page_t m; 303 304 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 305 return; 306 307 pt = (vm_offset_t) vtopte(va); 308 m = pmap_pte_vm_page(pmap, pt); 309 vm_page_unhold(m); 310 if (pmap != kernel_pmap && 311 (m->hold_count == 0) && 312 (m->wire_count == 0) && 313 (va < KPT_MIN_ADDRESS)) { 314 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 315 vm_page_free(m); 316 } 317} 318 319/* [ macro again?, should I force kstack into user map here? -wfj ] */ 320void 321pmap_activate(pmap, pcbp) 322 register pmap_t pmap; 323 struct pcb *pcbp; 324{ 325 PMAP_ACTIVATE(pmap, pcbp); 326} 327 328/* 329 * Bootstrap the system enough to run with virtual memory. 330 * Map the kernel's code and data, and allocate the system page table. 331 * 332 * On the I386 this is called after mapping has already been enabled 333 * and just syncs the pmap module with what has already been done. 334 * [We can't call it easily with mapping off since the kernel is not 335 * mapped with PA == VA, hence we would have to relocate every address 336 * from the linked base (virtual) address "KERNBASE" to the actual 337 * (physical) address starting relative to 0] 338 */ 339 340#define DMAPAGES 8 341void 342pmap_bootstrap(firstaddr, loadaddr) 343 vm_offset_t firstaddr; 344 vm_offset_t loadaddr; 345{ 346#if BSDVM_COMPAT 347 vm_offset_t va; 348 pt_entry_t *pte; 349 350#endif 351 352 avail_start = firstaddr + DMAPAGES * NBPG; 353 354 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 355 virtual_end = VM_MAX_KERNEL_ADDRESS; 356 i386pagesperpage = PAGE_SIZE / NBPG; 357 358 /* 359 * Initialize protection array. 360 */ 361 i386_protection_init(); 362 363 /* 364 * The kernel's pmap is statically allocated so we don't have to use 365 * pmap_create, which is unlikely to work correctly at this part of 366 * the boot sequence. 367 */ 368 kernel_pmap = &kernel_pmap_store; 369 370 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); 371 372 simple_lock_init(&kernel_pmap->pm_lock); 373 kernel_pmap->pm_count = 1; 374 nkpt = NKPT; 375 376#if BSDVM_COMPAT 377 /* 378 * Allocate all the submaps we need 379 */ 380#define SYSMAP(c, p, v, n) \ 381 v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); 382 383 va = virtual_avail; 384 pte = pmap_pte(kernel_pmap, va); 385 386 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 387 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 388 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 389 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1) 390 virtual_avail = va; 391#endif 392 /* 393 * Reserve special hunk of memory for use by bus dma as a bounce 394 * buffer (contiguous virtual *and* physical memory). 395 */ 396 { 397 extern vm_offset_t isaphysmem; 398 399 isaphysmem = va; 400 401 virtual_avail = pmap_map(va, firstaddr, 402 firstaddr + DMAPAGES * NBPG, VM_PROT_ALL); 403 } 404 405 *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; 406 pmap_update(); 407 408} 409 410/* 411 * Initialize the pmap module. 412 * Called by vm_init, to initialize any structures that the pmap 413 * system needs to map virtual memory. 414 * pmap_init has been enhanced to support in a fairly consistant 415 * way, discontiguous physical memory. 416 */ 417void 418pmap_init(phys_start, phys_end) 419 vm_offset_t phys_start, phys_end; 420{ 421 vm_offset_t addr; 422 vm_size_t npg, s; 423 int i; 424 425 /* 426 * Now that kernel map has been allocated, we can mark as unavailable 427 * regions which we have mapped in locore. 428 */ 429 addr = atdevbase; 430 (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, 431 &addr, (0x100000 - 0xa0000), FALSE); 432 433 addr = (vm_offset_t) KERNBASE + IdlePTD; 434 vm_object_reference(kernel_object); 435 (void) vm_map_find(kernel_map, kernel_object, addr, 436 &addr, (4 + NKPDE) * NBPG, FALSE); 437 438 /* 439 * calculate the number of pv_entries needed 440 */ 441 vm_first_phys = phys_avail[0]; 442 for (i = 0; phys_avail[i + 1]; i += 2); 443 npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG; 444 445 /* 446 * Allocate memory for random pmap data structures. Includes the 447 * pv_head_table. 448 */ 449 s = (vm_size_t) (sizeof(struct pv_entry) * npg); 450 s = i386_round_page(s); 451 addr = (vm_offset_t) kmem_alloc(kernel_map, s); 452 pv_table = (pv_entry_t) addr; 453 454 /* 455 * init the pv free list 456 */ 457 init_pv_entries(npg); 458 /* 459 * Now it is safe to enable pv_table recording. 460 */ 461 pmap_initialized = TRUE; 462} 463 464/* 465 * Used to map a range of physical addresses into kernel 466 * virtual address space. 467 * 468 * For now, VM is already on, we only need to map the 469 * specified memory. 470 */ 471vm_offset_t 472pmap_map(virt, start, end, prot) 473 vm_offset_t virt; 474 vm_offset_t start; 475 vm_offset_t end; 476 int prot; 477{ 478 while (start < end) { 479 pmap_enter(kernel_pmap, virt, start, prot, FALSE); 480 virt += PAGE_SIZE; 481 start += PAGE_SIZE; 482 } 483 return (virt); 484} 485 486/* 487 * Create and return a physical map. 488 * 489 * If the size specified for the map 490 * is zero, the map is an actual physical 491 * map, and may be referenced by the 492 * hardware. 493 * 494 * If the size specified is non-zero, 495 * the map will be used in software only, and 496 * is bounded by that size. 497 * 498 * [ just allocate a ptd and mark it uninitialize -- should we track 499 * with a table which process has which ptd? -wfj ] 500 */ 501 502pmap_t 503pmap_create(size) 504 vm_size_t size; 505{ 506 register pmap_t pmap; 507 508 /* 509 * Software use map does not need a pmap 510 */ 511 if (size) 512 return (NULL); 513 514 pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); 515 bzero(pmap, sizeof(*pmap)); 516 pmap_pinit(pmap); 517 return (pmap); 518} 519 520 521struct pmaplist { 522 struct pmaplist *next; 523}; 524 525static inline void * 526vm_get_pmap() 527{ 528 struct pmaplist *rtval; 529 530 rtval = (struct pmaplist *) kmem_alloc(kernel_map, ctob(1)); 531 bzero(rtval, ctob(1)); 532 return rtval; 533} 534 535static inline void 536vm_put_pmap(up) 537 struct pmaplist *up; 538{ 539 kmem_free(kernel_map, (vm_offset_t) up, ctob(1)); 540} 541 542/* 543 * Initialize a preallocated and zeroed pmap structure, 544 * such as one in a vmspace structure. 545 */ 546void 547pmap_pinit(pmap) 548 register struct pmap *pmap; 549{ 550 /* 551 * No need to allocate page table space yet but we do need a valid 552 * page directory table. 553 */ 554 pmap->pm_pdir = (pd_entry_t *) vm_get_pmap(); 555 556 /* wire in kernel global address entries */ 557 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 558 559 /* install self-referential address mapping entry */ 560 *(int *) (pmap->pm_pdir + PTDPTDI) = 561 ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW; 562 563 pmap->pm_count = 1; 564 simple_lock_init(&pmap->pm_lock); 565} 566 567/* 568 * grow the number of kernel page table entries, if needed 569 */ 570 571vm_page_t nkpg; 572vm_offset_t kernel_vm_end; 573 574void 575pmap_growkernel(vm_offset_t addr) 576{ 577 struct proc *p; 578 struct pmap *pmap; 579 int s; 580 581 s = splhigh(); 582 if (kernel_vm_end == 0) { 583 kernel_vm_end = KERNBASE; 584 nkpt = 0; 585 while (pdir_pde(PTD, kernel_vm_end)) { 586 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 587 ++nkpt; 588 } 589 } 590 addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 591 while (kernel_vm_end < addr) { 592 if (pdir_pde(PTD, kernel_vm_end)) { 593 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 594 continue; 595 } 596 ++nkpt; 597 if (!nkpg) { 598 nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM); 599 if (!nkpg) 600 panic("pmap_growkernel: no memory to grow kernel"); 601 vm_page_wire(nkpg); 602 vm_page_remove(nkpg); 603 pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); 604 } 605 pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); 606 nkpg = NULL; 607 608 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 609 if (p->p_vmspace) { 610 pmap = &p->p_vmspace->vm_pmap; 611 *pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 612 } 613 } 614 *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 615 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 616 } 617 splx(s); 618} 619 620/* 621 * Retire the given physical map from service. 622 * Should only be called if the map contains 623 * no valid mappings. 624 */ 625void 626pmap_destroy(pmap) 627 register pmap_t pmap; 628{ 629 int count; 630 631 if (pmap == NULL) 632 return; 633 634 simple_lock(&pmap->pm_lock); 635 count = --pmap->pm_count; 636 simple_unlock(&pmap->pm_lock); 637 if (count == 0) { 638 pmap_release(pmap); 639 free((caddr_t) pmap, M_VMPMAP); 640 } 641} 642 643/* 644 * Release any resources held by the given physical map. 645 * Called when a pmap initialized by pmap_pinit is being released. 646 * Should only be called if the map contains no valid mappings. 647 */ 648void 649pmap_release(pmap) 650 register struct pmap *pmap; 651{ 652 vm_put_pmap((struct pmaplist *) pmap->pm_pdir); 653} 654 655/* 656 * Add a reference to the specified pmap. 657 */ 658void 659pmap_reference(pmap) 660 pmap_t pmap; 661{ 662 if (pmap != NULL) { 663 simple_lock(&pmap->pm_lock); 664 pmap->pm_count++; 665 simple_unlock(&pmap->pm_lock); 666 } 667} 668 669#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2) 670 671/* 672 * Data for the pv entry allocation mechanism 673 */ 674int pv_freelistcnt; 675pv_entry_t pv_freelist; 676vm_offset_t pvva; 677int npvvapg; 678 679/* 680 * free the pv_entry back to the free list 681 */ 682inline static void 683free_pv_entry(pv) 684 pv_entry_t pv; 685{ 686 if (!pv) 687 return; 688 ++pv_freelistcnt; 689 pv->pv_next = pv_freelist; 690 pv_freelist = pv; 691} 692 693/* 694 * get a new pv_entry, allocating a block from the system 695 * when needed. 696 * the memory allocation is performed bypassing the malloc code 697 * because of the possibility of allocations at interrupt time. 698 */ 699static inline pv_entry_t 700get_pv_entry() 701{ 702 pv_entry_t tmp; 703 704 /* 705 * get more pv_entry pages if needed 706 */ 707 if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { 708 pmap_alloc_pv_entry(); 709 } 710 /* 711 * get a pv_entry off of the free list 712 */ 713 --pv_freelistcnt; 714 tmp = pv_freelist; 715 pv_freelist = tmp->pv_next; 716 return tmp; 717} 718 719/* 720 * this *strange* allocation routine *statistically* eliminates the 721 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure. 722 * also -- this code is MUCH MUCH faster than the malloc equiv... 723 */ 724static void 725pmap_alloc_pv_entry() 726{ 727 /* 728 * do we have any pre-allocated map-pages left? 729 */ 730 if (npvvapg) { 731 vm_page_t m; 732 733 /* 734 * we do this to keep recursion away 735 */ 736 pv_freelistcnt += PV_FREELIST_MIN; 737 /* 738 * allocate a physical page out of the vm system 739 */ 740 m = vm_page_alloc(kernel_object, 741 pvva - vm_map_min(kernel_map), VM_ALLOC_INTERRUPT); 742 if (m) { 743 int newentries; 744 int i; 745 pv_entry_t entry; 746 747 newentries = (NBPG / sizeof(struct pv_entry)); 748 /* 749 * wire the page 750 */ 751 vm_page_wire(m); 752 m->flags &= ~PG_BUSY; 753 /* 754 * let the kernel see it 755 */ 756 pmap_kenter(pvva, VM_PAGE_TO_PHYS(m)); 757 758 entry = (pv_entry_t) pvva; 759 /* 760 * update the allocation pointers 761 */ 762 pvva += NBPG; 763 --npvvapg; 764 765 /* 766 * free the entries into the free list 767 */ 768 for (i = 0; i < newentries; i++) { 769 free_pv_entry(entry); 770 entry++; 771 } 772 } 773 pv_freelistcnt -= PV_FREELIST_MIN; 774 } 775 if (!pv_freelist) 776 panic("get_pv_entry: cannot get a pv_entry_t"); 777} 778 779 780 781/* 782 * init the pv_entry allocation system 783 */ 784#define PVSPERPAGE 64 785void 786init_pv_entries(npg) 787 int npg; 788{ 789 /* 790 * allocate enough kvm space for PVSPERPAGE entries per page (lots) 791 * kvm space is fairly cheap, be generous!!! (the system can panic if 792 * this is too small.) 793 */ 794 npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG; 795 pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG); 796 /* 797 * get the first batch of entries 798 */ 799 free_pv_entry(get_pv_entry()); 800} 801 802static pt_entry_t * 803get_pt_entry(pmap) 804 pmap_t pmap; 805{ 806 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 807 808 /* are we current address space or kernel? */ 809 if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { 810 return PTmap; 811 } 812 /* otherwise, we are alternate address space */ 813 if (frame != ((int) APTDpde & PG_FRAME)) { 814 APTDpde = pmap->pm_pdir[PTDPTDI]; 815 pmap_update(); 816 } 817 return APTmap; 818} 819 820/* 821 * If it is the first entry on the list, it is actually 822 * in the header and we must copy the following entry up 823 * to the header. Otherwise we must search the list for 824 * the entry. In either case we free the now unused entry. 825 */ 826void 827pmap_remove_entry(pmap, pv, va) 828 struct pmap *pmap; 829 pv_entry_t pv; 830 vm_offset_t va; 831{ 832 pv_entry_t npv; 833 int s; 834 835 s = splhigh(); 836 if (pmap == pv->pv_pmap && va == pv->pv_va) { 837 npv = pv->pv_next; 838 if (npv) { 839 *pv = *npv; 840 free_pv_entry(npv); 841 } else { 842 pv->pv_pmap = NULL; 843 } 844 } else { 845 for (npv = pv->pv_next; npv; npv = npv->pv_next) { 846 if (pmap == npv->pv_pmap && va == npv->pv_va) { 847 break; 848 } 849 pv = npv; 850 } 851 if (npv) { 852 pv->pv_next = npv->pv_next; 853 free_pv_entry(npv); 854 } 855 } 856 splx(s); 857} 858 859/* 860 * Remove the given range of addresses from the specified map. 861 * 862 * It is assumed that the start and end are properly 863 * rounded to the page size. 864 */ 865void 866pmap_remove(pmap, sva, eva) 867 struct pmap *pmap; 868 register vm_offset_t sva; 869 register vm_offset_t eva; 870{ 871 register pt_entry_t *ptp, *ptq; 872 vm_offset_t pa; 873 register pv_entry_t pv; 874 vm_offset_t va; 875 vm_page_t m; 876 pt_entry_t oldpte; 877 878 if (pmap == NULL) 879 return; 880 881 ptp = get_pt_entry(pmap); 882 883 /* 884 * special handling of removing one page. a very 885 * common operation and easy to short circuit some 886 * code. 887 */ 888 if ((sva + NBPG) == eva) { 889 890 if (*pmap_pde(pmap, sva) == 0) 891 return; 892 893 ptq = ptp + i386_btop(sva); 894 895 if (!*ptq) 896 return; 897 /* 898 * Update statistics 899 */ 900 if (pmap_pte_w(ptq)) 901 pmap->pm_stats.wired_count--; 902 pmap->pm_stats.resident_count--; 903 904 pa = pmap_pte_pa(ptq); 905 oldpte = *ptq; 906 *ptq = 0; 907 908 if (pmap_is_managed(pa)) { 909 if ((int) oldpte & PG_M) { 910 if ((sva < USRSTACK || sva >= KERNBASE) || 911 (sva >= USRSTACK && sva < USRSTACK + (UPAGES * NBPG))) { 912 if (sva < clean_sva || sva >= clean_eva) { 913 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 914 } 915 } 916 } 917 pv = pa_to_pvh(pa); 918 pmap_remove_entry(pmap, pv, sva); 919 } 920 pmap_unuse_pt(pmap, sva); 921 pmap_update(); 922 return; 923 } 924 sva = i386_btop(sva); 925 eva = i386_btop(eva); 926 927 while (sva < eva) { 928 /* 929 * Weed out invalid mappings. Note: we assume that the page 930 * directory table is always allocated, and in kernel virtual. 931 */ 932 933 if (*pmap_pde(pmap, i386_ptob(sva)) == 0) { 934 /* We can race ahead here, straight to next pde.. */ 935 sva = ((sva + NPTEPG) & ~(NPTEPG - 1)); 936 continue; 937 } 938 ptq = ptp + sva; 939 940 /* 941 * search for page table entries, use string operations that 942 * are much faster than explicitly scanning when page tables 943 * are not fully populated. 944 */ 945 if (*ptq == 0) { 946 vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 947 vm_offset_t nscan = pdnxt - sva; 948 int found = 0; 949 950 if ((nscan + sva) > eva) 951 nscan = eva - sva; 952 953 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 954 "=D"(ptq), "=a"(found) : "c"(nscan), "0"(ptq) : "cx"); 955 956 if (!found) { 957 sva = pdnxt; 958 continue; 959 } 960 ptq -= 1; 961 962 sva = ptq - ptp; 963 } 964 /* 965 * Update statistics 966 */ 967 oldpte = *ptq; 968 if (((int) oldpte) & PG_W) 969 pmap->pm_stats.wired_count--; 970 pmap->pm_stats.resident_count--; 971 972 /* 973 * Invalidate the PTEs. XXX: should cluster them up and 974 * invalidate as many as possible at once. 975 */ 976 *ptq = 0; 977 978 va = i386_ptob(sva); 979 980 /* 981 * Remove from the PV table (raise IPL since we may be called 982 * at interrupt time). 983 */ 984 pa = ((int) oldpte) & PG_FRAME; 985 if (!pmap_is_managed(pa)) { 986 pmap_unuse_pt(pmap, va); 987 ++sva; 988 continue; 989 } 990 if ((int) oldpte & PG_M) { 991 if ((va < USRSTACK || va >= KERNBASE) || 992 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 993 if (va < clean_sva || va >= clean_eva) { 994 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 995 } 996 } 997 } 998 pv = pa_to_pvh(pa); 999 pmap_remove_entry(pmap, pv, va); 1000 pmap_unuse_pt(pmap, va); 1001 ++sva; 1002 } 1003 pmap_update(); 1004} 1005 1006/* 1007 * Routine: pmap_remove_all 1008 * Function: 1009 * Removes this physical page from 1010 * all physical maps in which it resides. 1011 * Reflects back modify bits to the pager. 1012 * 1013 * Notes: 1014 * Original versions of this routine were very 1015 * inefficient because they iteratively called 1016 * pmap_remove (slow...) 1017 */ 1018void 1019pmap_remove_all(pa) 1020 vm_offset_t pa; 1021{ 1022 register pv_entry_t pv, npv; 1023 register pt_entry_t *pte, *ptp; 1024 vm_offset_t va; 1025 struct pmap *pmap; 1026 vm_page_t m; 1027 int s; 1028 int anyvalid = 0; 1029 1030 /* 1031 * Not one of ours 1032 */ 1033 /* 1034 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1035 * pages! 1036 */ 1037 if (!pmap_is_managed(pa)) 1038 return; 1039 1040 pa = i386_trunc_page(pa); 1041 pv = pa_to_pvh(pa); 1042 m = PHYS_TO_VM_PAGE(pa); 1043 1044 s = splhigh(); 1045 while (pv->pv_pmap != NULL) { 1046 pmap = pv->pv_pmap; 1047 ptp = get_pt_entry(pmap); 1048 va = pv->pv_va; 1049 pte = ptp + i386_btop(va); 1050 if (pmap_pte_w(pte)) 1051 pmap->pm_stats.wired_count--; 1052 if (*pte) { 1053 pmap->pm_stats.resident_count--; 1054 anyvalid++; 1055 1056 /* 1057 * Update the vm_page_t clean and reference bits. 1058 */ 1059 if ((int) *pte & PG_M) { 1060 if ((va < USRSTACK || va >= KERNBASE) || 1061 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 1062 if (va < clean_sva || va >= clean_eva) { 1063 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 1064 } 1065 } 1066 } 1067 *pte = 0; 1068 pmap_unuse_pt(pmap, va); 1069 } 1070 npv = pv->pv_next; 1071 if (npv) { 1072 *pv = *npv; 1073 free_pv_entry(npv); 1074 } else { 1075 pv->pv_pmap = NULL; 1076 } 1077 } 1078 splx(s); 1079 if (anyvalid) 1080 pmap_update(); 1081} 1082 1083 1084/* 1085 * Set the physical protection on the 1086 * specified range of this map as requested. 1087 */ 1088void 1089pmap_protect(pmap, sva, eva, prot) 1090 register pmap_t pmap; 1091 vm_offset_t sva, eva; 1092 vm_prot_t prot; 1093{ 1094 register pt_entry_t *pte; 1095 register vm_offset_t va; 1096 int i386prot; 1097 register pt_entry_t *ptp; 1098 int evap = i386_btop(eva); 1099 int anyvalid = 0;; 1100 1101 if (pmap == NULL) 1102 return; 1103 1104 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1105 pmap_remove(pmap, sva, eva); 1106 return; 1107 } 1108 if (prot & VM_PROT_WRITE) 1109 return; 1110 1111 ptp = get_pt_entry(pmap); 1112 1113 va = sva; 1114 while (va < eva) { 1115 int found = 0; 1116 int svap; 1117 vm_offset_t nscan; 1118 1119 /* 1120 * Page table page is not allocated. Skip it, we don't want to 1121 * force allocation of unnecessary PTE pages just to set the 1122 * protection. 1123 */ 1124 if (!*pmap_pde(pmap, va)) { 1125 /* XXX: avoid address wrap around */ 1126 nextpde: 1127 if (va >= i386_trunc_pdr((vm_offset_t) - 1)) 1128 break; 1129 va = i386_round_pdr(va + PAGE_SIZE); 1130 continue; 1131 } 1132 pte = ptp + i386_btop(va); 1133 1134 if (*pte == 0) { 1135 /* 1136 * scan for a non-empty pte 1137 */ 1138 svap = pte - ptp; 1139 nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap; 1140 1141 if (nscan + svap > evap) 1142 nscan = evap - svap; 1143 1144 found = 0; 1145 if (nscan) 1146 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 1147 "=D"(pte), "=a"(found) : "c"(nscan), "0"(pte) : "cx"); 1148 1149 if (!found) 1150 goto nextpde; 1151 1152 pte -= 1; 1153 svap = pte - ptp; 1154 1155 va = i386_ptob(svap); 1156 } 1157 anyvalid++; 1158 1159 i386prot = pte_prot(pmap, prot); 1160 if (va < UPT_MAX_ADDRESS) { 1161 i386prot |= PG_u; 1162 if (va >= UPT_MIN_ADDRESS) 1163 i386prot |= PG_RW; 1164 } 1165 pmap_pte_set_prot(pte, i386prot); 1166 va += PAGE_SIZE; 1167 } 1168 if (anyvalid) 1169 pmap_update(); 1170} 1171 1172/* 1173 * Insert the given physical page (p) at 1174 * the specified virtual address (v) in the 1175 * target physical map with the protection requested. 1176 * 1177 * If specified, the page will be wired down, meaning 1178 * that the related pte can not be reclaimed. 1179 * 1180 * NB: This is the only routine which MAY NOT lazy-evaluate 1181 * or lose information. That is, this routine must actually 1182 * insert this page into the given map NOW. 1183 */ 1184void 1185pmap_enter(pmap, va, pa, prot, wired) 1186 register pmap_t pmap; 1187 vm_offset_t va; 1188 register vm_offset_t pa; 1189 vm_prot_t prot; 1190 boolean_t wired; 1191{ 1192 register pt_entry_t *pte; 1193 register pt_entry_t npte; 1194 vm_offset_t opa; 1195 int ptevalid = 0; 1196 1197 if (pmap == NULL) 1198 return; 1199 1200 va = i386_trunc_page(va); 1201 pa = i386_trunc_page(pa); 1202 if (va > VM_MAX_KERNEL_ADDRESS) 1203 panic("pmap_enter: toobig"); 1204 1205 /* 1206 * Page Directory table entry not valid, we need a new PT page 1207 */ 1208 if (*pmap_pde(pmap, va) == 0) { 1209 printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va); 1210 panic("invalid kernel page directory"); 1211 } 1212 pte = pmap_pte(pmap, va); 1213 opa = pmap_pte_pa(pte); 1214 1215 /* 1216 * Mapping has not changed, must be protection or wiring change. 1217 */ 1218 if (opa == pa) { 1219 /* 1220 * Wiring change, just update stats. We don't worry about 1221 * wiring PT pages as they remain resident as long as there 1222 * are valid mappings in them. Hence, if a user page is wired, 1223 * the PT page will be also. 1224 */ 1225 if (wired && !pmap_pte_w(pte)) 1226 pmap->pm_stats.wired_count++; 1227 else if (!wired && pmap_pte_w(pte)) 1228 pmap->pm_stats.wired_count--; 1229 1230 goto validate; 1231 } 1232 /* 1233 * Mapping has changed, invalidate old range and fall through to 1234 * handle validating new mapping. 1235 */ 1236 if (opa) { 1237 pmap_remove(pmap, va, va + PAGE_SIZE); 1238 } 1239 /* 1240 * Enter on the PV list if part of our managed memory Note that we 1241 * raise IPL while manipulating pv_table since pmap_enter can be 1242 * called at interrupt time. 1243 */ 1244 if (pmap_is_managed(pa)) { 1245 register pv_entry_t pv, npv; 1246 int s; 1247 1248 pv = pa_to_pvh(pa); 1249 s = splhigh(); 1250 /* 1251 * No entries yet, use header as the first entry 1252 */ 1253 if (pv->pv_pmap == NULL) { 1254 pv->pv_va = va; 1255 pv->pv_pmap = pmap; 1256 pv->pv_next = NULL; 1257 } 1258 /* 1259 * There is at least one other VA mapping this page. Place 1260 * this entry after the header. 1261 */ 1262 else { 1263 npv = get_pv_entry(); 1264 npv->pv_va = va; 1265 npv->pv_pmap = pmap; 1266 npv->pv_next = pv->pv_next; 1267 pv->pv_next = npv; 1268 } 1269 splx(s); 1270 } 1271 1272 /* 1273 * Increment counters 1274 */ 1275 pmap->pm_stats.resident_count++; 1276 if (wired) 1277 pmap->pm_stats.wired_count++; 1278 1279validate: 1280 /* 1281 * Now validate mapping with desired protection/wiring. 1282 */ 1283 npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V)); 1284 1285 /* 1286 * When forking (copy-on-write, etc): A process will turn off write 1287 * permissions for any of its writable pages. If the data (object) is 1288 * only referred to by one process, the processes map is modified 1289 * directly as opposed to using the object manipulation routine. When 1290 * using pmap_protect, the modified bits are not kept in the vm_page_t 1291 * data structure. Therefore, when using pmap_enter in vm_fault to 1292 * bring back writability of a page, there has been no memory of the 1293 * modified or referenced bits except at the pte level. this clause 1294 * supports the carryover of the modified and used (referenced) bits. 1295 */ 1296 if (pa == opa) 1297 (int) npte |= (int) *pte & (PG_M | PG_U); 1298 1299 1300 if (wired) 1301 (int) npte |= PG_W; 1302 if (va < UPT_MIN_ADDRESS) 1303 (int) npte |= PG_u; 1304 else if (va < UPT_MAX_ADDRESS) 1305 (int) npte |= PG_u | PG_RW; 1306 1307 if (*pte != npte) { 1308 if (*pte) 1309 ptevalid++; 1310 *pte = npte; 1311 } 1312 if (ptevalid) { 1313 pmap_update(); 1314 } else { 1315 pmap_use_pt(pmap, va); 1316 } 1317} 1318 1319/* 1320 * Add a list of wired pages to the kva 1321 * this routine is only used for temporary 1322 * kernel mappings that do not need to have 1323 * page modification or references recorded. 1324 * Note that old mappings are simply written 1325 * over. The page *must* be wired. 1326 */ 1327void 1328pmap_qenter(va, m, count) 1329 vm_offset_t va; 1330 vm_page_t *m; 1331 int count; 1332{ 1333 int i; 1334 int anyvalid = 0; 1335 register pt_entry_t *pte; 1336 1337 for (i = 0; i < count; i++) { 1338 pte = vtopte(va + i * NBPG); 1339 if (*pte) 1340 anyvalid++; 1341 *pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W)); 1342 } 1343 if (anyvalid) 1344 pmap_update(); 1345} 1346/* 1347 * this routine jerks page mappings from the 1348 * kernel -- it is meant only for temporary mappings. 1349 */ 1350void 1351pmap_qremove(va, count) 1352 vm_offset_t va; 1353 int count; 1354{ 1355 int i; 1356 register pt_entry_t *pte; 1357 1358 for (i = 0; i < count; i++) { 1359 pte = vtopte(va + i * NBPG); 1360 *pte = 0; 1361 } 1362 pmap_update(); 1363} 1364 1365/* 1366 * add a wired page to the kva 1367 * note that in order for the mapping to take effect -- you 1368 * should do a pmap_update after doing the pmap_kenter... 1369 */ 1370void 1371pmap_kenter(va, pa) 1372 vm_offset_t va; 1373 register vm_offset_t pa; 1374{ 1375 register pt_entry_t *pte; 1376 int wasvalid = 0; 1377 1378 pte = vtopte(va); 1379 1380 if (*pte) 1381 wasvalid++; 1382 1383 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_W)); 1384 1385 if (wasvalid) 1386 pmap_update(); 1387} 1388 1389/* 1390 * remove a page from the kernel pagetables 1391 */ 1392void 1393pmap_kremove(va) 1394 vm_offset_t va; 1395{ 1396 register pt_entry_t *pte; 1397 1398 pte = vtopte(va); 1399 1400 *pte = (pt_entry_t) 0; 1401 pmap_update(); 1402} 1403 1404/* 1405 * this code makes some *MAJOR* assumptions: 1406 * 1. Current pmap & pmap exists. 1407 * 2. Not wired. 1408 * 3. Read access. 1409 * 4. No page table pages. 1410 * 5. Tlbflush is deferred to calling procedure. 1411 * 6. Page IS managed. 1412 * but is *MUCH* faster than pmap_enter... 1413 */ 1414 1415static inline void 1416pmap_enter_quick(pmap, va, pa) 1417 register pmap_t pmap; 1418 vm_offset_t va; 1419 register vm_offset_t pa; 1420{ 1421 register pt_entry_t *pte; 1422 register pv_entry_t pv, npv; 1423 int s; 1424 1425 /* 1426 * Enter on the PV list if part of our managed memory Note that we 1427 * raise IPL while manipulating pv_table since pmap_enter can be 1428 * called at interrupt time. 1429 */ 1430 1431 pte = vtopte(va); 1432 1433 /* a fault on the page table might occur here */ 1434 if (*pte) { 1435 pmap_remove(pmap, va, va + PAGE_SIZE); 1436 } 1437 pv = pa_to_pvh(pa); 1438 s = splhigh(); 1439 /* 1440 * No entries yet, use header as the first entry 1441 */ 1442 if (pv->pv_pmap == NULL) { 1443 pv->pv_pmap = pmap; 1444 pv->pv_va = va; 1445 pv->pv_next = NULL; 1446 } 1447 /* 1448 * There is at least one other VA mapping this page. Place this entry 1449 * after the header. 1450 */ 1451 else { 1452 npv = get_pv_entry(); 1453 npv->pv_va = va; 1454 npv->pv_pmap = pmap; 1455 npv->pv_next = pv->pv_next; 1456 pv->pv_next = npv; 1457 } 1458 splx(s); 1459 1460 /* 1461 * Increment counters 1462 */ 1463 pmap->pm_stats.resident_count++; 1464 1465 /* 1466 * Now validate mapping with desired protection/wiring. 1467 */ 1468 *pte = (pt_entry_t) ((int) (pa | PG_V | PG_u)); 1469 1470 pmap_use_pt(pmap, va); 1471 1472 return; 1473} 1474 1475#define MAX_INIT_PT (1024*2048) 1476/* 1477 * pmap_object_init_pt preloads the ptes for a given object 1478 * into the specified pmap. This eliminates the blast of soft 1479 * faults on process startup and immediately after an mmap. 1480 */ 1481void 1482pmap_object_init_pt(pmap, addr, object, offset, size) 1483 pmap_t pmap; 1484 vm_offset_t addr; 1485 vm_object_t object; 1486 vm_offset_t offset; 1487 vm_offset_t size; 1488{ 1489 vm_offset_t tmpoff; 1490 vm_page_t p; 1491 int bits; 1492 int objbytes; 1493 1494 if (!pmap || ((size > MAX_INIT_PT) && 1495 (object->resident_page_count > (MAX_INIT_PT / NBPG)))) { 1496 return; 1497 } 1498 if (!vm_object_lock_try(object)) 1499 return; 1500 1501 /* 1502 * if we are processing a major portion of the object, then scan the 1503 * entire thing. 1504 */ 1505 if (size > (object->size >> 2)) { 1506 objbytes = size; 1507 1508 for (p = object->memq.tqh_first; 1509 ((objbytes > 0) && (p != NULL)); 1510 p = p->listq.tqe_next) { 1511 1512 tmpoff = p->offset; 1513 if (tmpoff < offset) { 1514 continue; 1515 } 1516 tmpoff -= offset; 1517 if (tmpoff >= size) { 1518 continue; 1519 } 1520 if (((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1521 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1522 (p->bmapped == 0) && 1523 (p->busy == 0) && 1524 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1525 vm_page_hold(p); 1526 p->flags |= PG_MAPPED; 1527 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1528 vm_page_unhold(p); 1529 } 1530 objbytes -= NBPG; 1531 } 1532 } else { 1533 /* 1534 * else lookup the pages one-by-one. 1535 */ 1536 for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) { 1537 p = vm_page_lookup(object, tmpoff + offset); 1538 if (p && ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1539 (p->bmapped == 0) && (p->busy == 0) && 1540 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1541 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1542 vm_page_hold(p); 1543 p->flags |= PG_MAPPED; 1544 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1545 vm_page_unhold(p); 1546 } 1547 } 1548 } 1549 vm_object_unlock(object); 1550} 1551 1552#if 0 1553/* 1554 * pmap_prefault provides a quick way of clustering 1555 * pagefaults into a processes address space. It is a "cousin" 1556 * of pmap_object_init_pt, except it runs at page fault time instead 1557 * of mmap time. 1558 */ 1559#define PFBAK 2 1560#define PFFOR 2 1561#define PAGEORDER_SIZE (PFBAK+PFFOR) 1562 1563static int pmap_prefault_pageorder[] = { 1564 -NBPG, NBPG, -2 * NBPG, 2 * NBPG 1565}; 1566 1567void 1568pmap_prefault(pmap, addra, entry, object) 1569 pmap_t pmap; 1570 vm_offset_t addra; 1571 vm_map_entry_t entry; 1572 vm_object_t object; 1573{ 1574 int i; 1575 vm_offset_t starta, enda; 1576 vm_offset_t offset, addr; 1577 vm_page_t m; 1578 int pageorder_index; 1579 1580 if (entry->object.vm_object != object) 1581 return; 1582 1583 if (pmap != &curproc->p_vmspace->vm_pmap) 1584 return; 1585 1586 starta = addra - PFBAK * NBPG; 1587 if (starta < entry->start) { 1588 starta = entry->start; 1589 } else if (starta > addra) 1590 starta = 0; 1591 1592 enda = addra + PFFOR * NBPG; 1593 if (enda > entry->end) 1594 enda = entry->end; 1595 1596 for (i = 0; i < PAGEORDER_SIZE; i++) { 1597 vm_object_t lobject; 1598 pt_entry_t *pte; 1599 1600 addr = addra + pmap_prefault_pageorder[i]; 1601 if (addr < starta || addr >= enda) 1602 continue; 1603 1604 pte = vtopte(addr); 1605 if (*pte) 1606 continue; 1607 1608 offset = (addr - entry->start) + entry->offset; 1609 lobject = object; 1610 for (m = vm_page_lookup(lobject, offset); 1611 (!m && lobject->shadow && !lobject->pager); 1612 lobject = lobject->shadow) { 1613 1614 offset += lobject->shadow_offset; 1615 m = vm_page_lookup(lobject->shadow, offset); 1616 } 1617 1618 /* 1619 * give-up when a page is not in memory 1620 */ 1621 if (m == NULL) 1622 break; 1623 1624 if (((m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) != 0) && 1625 ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1626 (m->busy == 0) && 1627 (m->bmapped == 0) && 1628 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1629 /* 1630 * test results show that the system is faster when 1631 * pages are activated. 1632 */ 1633 if ((m->flags & PG_ACTIVE) == 0) { 1634 if( m->flags & PG_CACHE) 1635 vm_page_deactivate(m); 1636 else 1637 vm_page_activate(m); 1638 } 1639 vm_page_hold(m); 1640 m->flags |= PG_MAPPED; 1641 pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); 1642 vm_page_unhold(m); 1643 } 1644 } 1645} 1646#endif 1647 1648/* 1649 * Routine: pmap_change_wiring 1650 * Function: Change the wiring attribute for a map/virtual-address 1651 * pair. 1652 * In/out conditions: 1653 * The mapping must already exist in the pmap. 1654 */ 1655void 1656pmap_change_wiring(pmap, va, wired) 1657 register pmap_t pmap; 1658 vm_offset_t va; 1659 boolean_t wired; 1660{ 1661 register pt_entry_t *pte; 1662 1663 if (pmap == NULL) 1664 return; 1665 1666 pte = pmap_pte(pmap, va); 1667 1668 if (wired && !pmap_pte_w(pte)) 1669 pmap->pm_stats.wired_count++; 1670 else if (!wired && pmap_pte_w(pte)) 1671 pmap->pm_stats.wired_count--; 1672 1673 /* 1674 * Wiring is not a hardware characteristic so there is no need to 1675 * invalidate TLB. 1676 */ 1677 pmap_pte_set_w(pte, wired); 1678 /* 1679 * When unwiring, set the modified bit in the pte -- could have been 1680 * changed by the kernel 1681 */ 1682 if (!wired) 1683 (int) *pte |= PG_M; 1684} 1685 1686 1687 1688/* 1689 * Copy the range specified by src_addr/len 1690 * from the source map to the range dst_addr/len 1691 * in the destination map. 1692 * 1693 * This routine is only advisory and need not do anything. 1694 */ 1695void 1696pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1697 pmap_t dst_pmap, src_pmap; 1698 vm_offset_t dst_addr; 1699 vm_size_t len; 1700 vm_offset_t src_addr; 1701{ 1702} 1703 1704/* 1705 * Routine: pmap_kernel 1706 * Function: 1707 * Returns the physical map handle for the kernel. 1708 */ 1709pmap_t 1710pmap_kernel() 1711{ 1712 return (kernel_pmap); 1713} 1714 1715/* 1716 * pmap_zero_page zeros the specified (machine independent) 1717 * page by mapping the page into virtual memory and using 1718 * bzero to clear its contents, one machine dependent page 1719 * at a time. 1720 */ 1721void 1722pmap_zero_page(phys) 1723 vm_offset_t phys; 1724{ 1725 if (*(int *) CMAP2) 1726 panic("pmap_zero_page: CMAP busy"); 1727 1728 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys); 1729 bzero(CADDR2, NBPG); 1730 1731 *(int *) CMAP2 = 0; 1732 pmap_update(); 1733} 1734 1735/* 1736 * pmap_copy_page copies the specified (machine independent) 1737 * page by mapping the page into virtual memory and using 1738 * bcopy to copy the page, one machine dependent page at a 1739 * time. 1740 */ 1741void 1742pmap_copy_page(src, dst) 1743 vm_offset_t src; 1744 vm_offset_t dst; 1745{ 1746 if (*(int *) CMAP1 || *(int *) CMAP2) 1747 panic("pmap_copy_page: CMAP busy"); 1748 1749 *(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src); 1750 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst); 1751 1752#if __GNUC__ > 1 1753 memcpy(CADDR2, CADDR1, NBPG); 1754#else 1755 bcopy(CADDR1, CADDR2, NBPG); 1756#endif 1757 *(int *) CMAP1 = 0; 1758 *(int *) CMAP2 = 0; 1759 pmap_update(); 1760} 1761 1762 1763/* 1764 * Routine: pmap_pageable 1765 * Function: 1766 * Make the specified pages (by pmap, offset) 1767 * pageable (or not) as requested. 1768 * 1769 * A page which is not pageable may not take 1770 * a fault; therefore, its page table entry 1771 * must remain valid for the duration. 1772 * 1773 * This routine is merely advisory; pmap_enter 1774 * will specify that these pages are to be wired 1775 * down (or not) as appropriate. 1776 */ 1777void 1778pmap_pageable(pmap, sva, eva, pageable) 1779 pmap_t pmap; 1780 vm_offset_t sva, eva; 1781 boolean_t pageable; 1782{ 1783} 1784 1785/* 1786 * this routine returns true if a physical page resides 1787 * in the given pmap. 1788 */ 1789boolean_t 1790pmap_page_exists(pmap, pa) 1791 pmap_t pmap; 1792 vm_offset_t pa; 1793{ 1794 register pv_entry_t pv; 1795 int s; 1796 1797 if (!pmap_is_managed(pa)) 1798 return FALSE; 1799 1800 pv = pa_to_pvh(pa); 1801 s = splhigh(); 1802 1803 /* 1804 * Not found, check current mappings returning immediately if found. 1805 */ 1806 if (pv->pv_pmap != NULL) { 1807 for (; pv; pv = pv->pv_next) { 1808 if (pv->pv_pmap == pmap) { 1809 splx(s); 1810 return TRUE; 1811 } 1812 } 1813 } 1814 splx(s); 1815 return (FALSE); 1816} 1817 1818/* 1819 * pmap_testbit tests bits in pte's 1820 * note that the testbit/changebit routines are inline, 1821 * and a lot of things compile-time evaluate. 1822 */ 1823__inline boolean_t 1824pmap_testbit(pa, bit) 1825 register vm_offset_t pa; 1826 int bit; 1827{ 1828 register pv_entry_t pv; 1829 pt_entry_t *pte; 1830 int s; 1831 1832 if (!pmap_is_managed(pa)) 1833 return FALSE; 1834 1835 pv = pa_to_pvh(pa); 1836 s = splhigh(); 1837 1838 /* 1839 * Not found, check current mappings returning immediately if found. 1840 */ 1841 if (pv->pv_pmap != NULL) { 1842 for (; pv; pv = pv->pv_next) { 1843 /* 1844 * if the bit being tested is the modified bit, then 1845 * mark UPAGES as always modified, and ptes as never 1846 * modified. 1847 */ 1848 if (bit & PG_U) { 1849 if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { 1850 continue; 1851 } 1852 } 1853 if (bit & PG_M) { 1854 if (pv->pv_va >= USRSTACK) { 1855 if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) { 1856 continue; 1857 } 1858 if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) { 1859 splx(s); 1860 return TRUE; 1861 } else if (pv->pv_va < KERNBASE) { 1862 splx(s); 1863 return FALSE; 1864 } 1865 } 1866 } 1867 if (!pv->pv_pmap) { 1868 printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 1869 continue; 1870 } 1871 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1872 if ((int) *pte & bit) { 1873 splx(s); 1874 return TRUE; 1875 } 1876 } 1877 } 1878 splx(s); 1879 return (FALSE); 1880} 1881 1882/* 1883 * this routine is used to modify bits in ptes 1884 */ 1885__inline void 1886pmap_changebit(pa, bit, setem) 1887 vm_offset_t pa; 1888 int bit; 1889 boolean_t setem; 1890{ 1891 register pv_entry_t pv; 1892 register pt_entry_t *pte, npte; 1893 vm_offset_t va; 1894 int s; 1895 1896 if (!pmap_is_managed(pa)) 1897 return; 1898 1899 pv = pa_to_pvh(pa); 1900 s = splhigh(); 1901 1902 /* 1903 * Loop over all current mappings setting/clearing as appropos If 1904 * setting RO do we need to clear the VAC? 1905 */ 1906 if (pv->pv_pmap != NULL) { 1907 for (; pv; pv = pv->pv_next) { 1908 va = pv->pv_va; 1909 1910 /* 1911 * don't write protect pager mappings 1912 */ 1913 if (!setem && (bit == PG_RW)) { 1914 if (va >= clean_sva && va < clean_eva) 1915 continue; 1916 } 1917 if (!pv->pv_pmap) { 1918 printf("Null pmap (cb) at va: 0x%lx\n", va); 1919 continue; 1920 } 1921 pte = pmap_pte(pv->pv_pmap, va); 1922 if (setem) 1923 (int) npte = (int) *pte | bit; 1924 else 1925 (int) npte = (int) *pte & ~bit; 1926 *pte = npte; 1927 } 1928 } 1929 splx(s); 1930 pmap_update(); 1931} 1932 1933/* 1934 * pmap_page_protect: 1935 * 1936 * Lower the permission for all mappings to a given page. 1937 */ 1938void 1939pmap_page_protect(phys, prot) 1940 vm_offset_t phys; 1941 vm_prot_t prot; 1942{ 1943 if ((prot & VM_PROT_WRITE) == 0) { 1944 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) 1945 pmap_changebit(phys, PG_RW, FALSE); 1946 else 1947 pmap_remove_all(phys); 1948 } 1949} 1950 1951vm_offset_t 1952pmap_phys_address(ppn) 1953 int ppn; 1954{ 1955 return (i386_ptob(ppn)); 1956} 1957 1958/* 1959 * pmap_is_referenced: 1960 * 1961 * Return whether or not the specified physical page was referenced 1962 * by any physical maps. 1963 */ 1964boolean_t 1965pmap_is_referenced(vm_offset_t pa) 1966{ 1967 return pmap_testbit((pa), PG_U); 1968} 1969 1970/* 1971 * pmap_is_modified: 1972 * 1973 * Return whether or not the specified physical page was modified 1974 * in any physical maps. 1975 */ 1976boolean_t 1977pmap_is_modified(vm_offset_t pa) 1978{ 1979 return pmap_testbit((pa), PG_M); 1980} 1981 1982/* 1983 * Clear the modify bits on the specified physical page. 1984 */ 1985void 1986pmap_clear_modify(vm_offset_t pa) 1987{ 1988 pmap_changebit((pa), PG_M, FALSE); 1989} 1990 1991/* 1992 * pmap_clear_reference: 1993 * 1994 * Clear the reference bit on the specified physical page. 1995 */ 1996void 1997pmap_clear_reference(vm_offset_t pa) 1998{ 1999 pmap_changebit((pa), PG_U, FALSE); 2000} 2001 2002/* 2003 * Routine: pmap_copy_on_write 2004 * Function: 2005 * Remove write privileges from all 2006 * physical maps for this physical page. 2007 */ 2008void 2009pmap_copy_on_write(vm_offset_t pa) 2010{ 2011 pmap_changebit((pa), PG_RW, FALSE); 2012} 2013 2014/* 2015 * Miscellaneous support routines follow 2016 */ 2017 2018void 2019i386_protection_init() 2020{ 2021 register int *kp, prot; 2022 2023 kp = protection_codes; 2024 for (prot = 0; prot < 8; prot++) { 2025 switch (prot) { 2026 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2027 /* 2028 * Read access is also 0. There isn't any execute bit, 2029 * so just make it readable. 2030 */ 2031 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2032 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2033 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2034 *kp++ = 0; 2035 break; 2036 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2037 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2038 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2039 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2040 *kp++ = PG_RW; 2041 break; 2042 } 2043 } 2044} 2045 2046/* 2047 * Map a set of physical memory pages into the kernel virtual 2048 * address space. Return a pointer to where it is mapped. This 2049 * routine is intended to be used for mapping device memory, 2050 * NOT real memory. The non-cacheable bits are set on each 2051 * mapped page. 2052 */ 2053void * 2054pmap_mapdev(pa, size) 2055 vm_offset_t pa; 2056 vm_size_t size; 2057{ 2058 vm_offset_t va, tmpva; 2059 pt_entry_t *pte; 2060 2061 pa = trunc_page(pa); 2062 size = roundup(size, PAGE_SIZE); 2063 2064 va = kmem_alloc_pageable(kernel_map, size); 2065 if (!va) 2066 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2067 2068 for (tmpva = va; size > 0;) { 2069 pte = vtopte(tmpva); 2070 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); 2071 size -= PAGE_SIZE; 2072 tmpva += PAGE_SIZE; 2073 pa += PAGE_SIZE; 2074 } 2075 pmap_update(); 2076 2077 return ((void *) va); 2078} 2079 2080#ifdef DEBUG 2081/* print address space of pmap*/ 2082void 2083pads(pm) 2084 pmap_t pm; 2085{ 2086 unsigned va, i, j; 2087 pt_entry_t *ptep; 2088 2089 if (pm == kernel_pmap) 2090 return; 2091 for (i = 0; i < 1024; i++) 2092 if (pm->pm_pdir[i]) 2093 for (j = 0; j < 1024; j++) { 2094 va = (i << PD_SHIFT) + (j << PG_SHIFT); 2095 if (pm == kernel_pmap && va < KERNBASE) 2096 continue; 2097 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 2098 continue; 2099 ptep = pmap_pte(pm, va); 2100 if (pmap_pte_v(ptep)) 2101 printf("%x:%x ", va, *(int *) ptep); 2102 }; 2103 2104} 2105 2106void 2107pmap_pvdump(pa) 2108 vm_offset_t pa; 2109{ 2110 register pv_entry_t pv; 2111 2112 printf("pa %x", pa); 2113 for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { 2114#ifdef used_to_be 2115 printf(" -> pmap %x, va %x, flags %x", 2116 pv->pv_pmap, pv->pv_va, pv->pv_flags); 2117#endif 2118 printf(" -> pmap %x, va %x", 2119 pv->pv_pmap, pv->pv_va); 2120 pads(pv->pv_pmap); 2121 } 2122 printf(" "); 2123} 2124#endif 2125