pmap.c revision 7090
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $Id: pmap.c,v 1.52 1995/03/10 08:05:00 davidg Exp $ 43 */ 44 45/* 46 * Derived from hp300 version by Mike Hibler, this version by William 47 * Jolitz uses a recursive map [a pde points to the page directory] to 48 * map the page tables using the pagetables themselves. This is done to 49 * reduce the impact on kernel virtual memory for lots of sparse address 50 * space, and to reduce the cost of memory to each process. 51 * 52 * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 53 */ 54/* 55 * Major modifications by John S. Dyson primarily to support 56 * pageable page tables, eliminating pmap_attributes, 57 * discontiguous memory pages, and using more efficient string 58 * instructions. Jan 13, 1994. Further modifications on Mar 2, 1994, 59 * general clean-up and efficiency mods. 60 */ 61 62/* 63 * Manages physical address maps. 64 * 65 * In addition to hardware address maps, this 66 * module is called upon to provide software-use-only 67 * maps which may or may not be stored in the same 68 * form as hardware maps. These pseudo-maps are 69 * used to store intermediate results from copy 70 * operations to and from address spaces. 71 * 72 * Since the information managed by this module is 73 * also stored by the logical address mapping module, 74 * this module may throw away valid virtual-to-physical 75 * mappings at almost any time. However, invalidations 76 * of virtual-to-physical mappings must be done as 77 * requested. 78 * 79 * In order to cope with hardware architectures which 80 * make virtual-to-physical map invalidates expensive, 81 * this module may delay invalidate or reduced protection 82 * operations until such time as they are actually 83 * necessary. This module is given full information as 84 * to which processors are currently using which maps, 85 * and to when physical maps must be made correct. 86 */ 87 88#include <sys/param.h> 89#include <sys/systm.h> 90#include <sys/proc.h> 91#include <sys/malloc.h> 92#include <sys/user.h> 93 94#include <vm/vm.h> 95#include <vm/vm_kern.h> 96#include <vm/vm_page.h> 97 98#include <machine/cputypes.h> 99#include <machine/md_var.h> 100 101#include <i386/isa/isa.h> 102 103/* 104 * Allocate various and sundry SYSMAPs used in the days of old VM 105 * and not yet converted. XXX. 106 */ 107#define BSDVM_COMPAT 1 108 109/* 110 * Get PDEs and PTEs for user/kernel address space 111 */ 112#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) 113#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023]) 114 115#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) 116 117#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 118#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 119#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 120#define pmap_pte_u(pte) ((*(int *)pte & PG_U) != 0) 121#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 122 123#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 124#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 125 126/* 127 * Given a map and a machine independent protection code, 128 * convert to a vax protection code. 129 */ 130#define pte_prot(m, p) (protection_codes[p]) 131int protection_codes[8]; 132 133struct pmap kernel_pmap_store; 134pmap_t kernel_pmap; 135 136vm_offset_t phys_avail[6]; /* 2 entries + 1 null */ 137vm_offset_t avail_start; /* PA of first available physical page */ 138vm_offset_t avail_end; /* PA of last available physical page */ 139vm_size_t mem_size; /* memory size in bytes */ 140vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 141vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 142int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */ 143boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 144vm_offset_t vm_first_phys, vm_last_phys; 145 146static inline int pmap_is_managed(); 147static void i386_protection_init(); 148static void pmap_alloc_pv_entry(); 149static inline pv_entry_t get_pv_entry(); 150int nkpt; 151 152 153extern vm_offset_t clean_sva, clean_eva; 154extern int cpu_class; 155 156#if BSDVM_COMPAT 157#include <sys/msgbuf.h> 158 159/* 160 * All those kernel PT submaps that BSD is so fond of 161 */ 162pt_entry_t *CMAP1, *CMAP2, *ptmmap; 163pv_entry_t pv_table; 164caddr_t CADDR1, CADDR2, ptvmmap; 165pt_entry_t *msgbufmap; 166struct msgbuf *msgbufp; 167 168#endif 169 170void 171init_pv_entries(int); 172 173/* 174 * Routine: pmap_pte 175 * Function: 176 * Extract the page table entry associated 177 * with the given map/virtual_address pair. 178 * [ what about induced faults -wfj] 179 */ 180 181inline pt_entry_t * const 182pmap_pte(pmap, va) 183 register pmap_t pmap; 184 vm_offset_t va; 185{ 186 187 if (pmap && *pmap_pde(pmap, va)) { 188 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 189 190 /* are we current address space or kernel? */ 191 if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) 192 return ((pt_entry_t *) vtopte(va)); 193 /* otherwise, we are alternate address space */ 194 else { 195 if (frame != ((int) APTDpde & PG_FRAME)) { 196 APTDpde = pmap->pm_pdir[PTDPTDI]; 197 pmap_update(); 198 } 199 return ((pt_entry_t *) avtopte(va)); 200 } 201 } 202 return (0); 203} 204 205/* 206 * Routine: pmap_extract 207 * Function: 208 * Extract the physical page address associated 209 * with the given map/virtual_address pair. 210 */ 211 212vm_offset_t 213pmap_extract(pmap, va) 214 register pmap_t pmap; 215 vm_offset_t va; 216{ 217 vm_offset_t pa; 218 219 if (pmap && *pmap_pde(pmap, va)) { 220 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 221 222 /* are we current address space or kernel? */ 223 if ((pmap == kernel_pmap) 224 || (frame == ((int) PTDpde & PG_FRAME))) { 225 pa = *(int *) vtopte(va); 226 /* otherwise, we are alternate address space */ 227 } else { 228 if (frame != ((int) APTDpde & PG_FRAME)) { 229 APTDpde = pmap->pm_pdir[PTDPTDI]; 230 pmap_update(); 231 } 232 pa = *(int *) avtopte(va); 233 } 234 return ((pa & PG_FRAME) | (va & ~PG_FRAME)); 235 } 236 return 0; 237 238} 239 240/* 241 * determine if a page is managed (memory vs. device) 242 */ 243static inline int 244pmap_is_managed(pa) 245 vm_offset_t pa; 246{ 247 int i; 248 249 if (!pmap_initialized) 250 return 0; 251 252 for (i = 0; phys_avail[i + 1]; i += 2) { 253 if (pa >= phys_avail[i] && pa < phys_avail[i + 1]) 254 return 1; 255 } 256 return 0; 257} 258 259/* 260 * find the vm_page_t of a pte (only) given va of pte and pmap 261 */ 262__inline vm_page_t 263pmap_pte_vm_page(pmap, pt) 264 pmap_t pmap; 265 vm_offset_t pt; 266{ 267 vm_page_t m; 268 269 pt = i386_trunc_page(pt); 270 pt = (pt - UPT_MIN_ADDRESS) / NBPG; 271 pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; 272 m = PHYS_TO_VM_PAGE(pt); 273 return m; 274} 275 276/* 277 * Wire a page table page 278 */ 279__inline void 280pmap_use_pt(pmap, va) 281 pmap_t pmap; 282 vm_offset_t va; 283{ 284 vm_offset_t pt; 285 286 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 287 return; 288 289 pt = (vm_offset_t) vtopte(va); 290 vm_page_hold(pmap_pte_vm_page(pmap, pt)); 291} 292 293/* 294 * Unwire a page table page 295 */ 296inline void 297pmap_unuse_pt(pmap, va) 298 pmap_t pmap; 299 vm_offset_t va; 300{ 301 vm_offset_t pt; 302 vm_page_t m; 303 304 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 305 return; 306 307 pt = (vm_offset_t) vtopte(va); 308 m = pmap_pte_vm_page(pmap, pt); 309 vm_page_unhold(m); 310 if (pmap != kernel_pmap && 311 (m->hold_count == 0) && 312 (m->wire_count == 0) && 313 (va < KPT_MIN_ADDRESS)) { 314 pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); 315 vm_page_free(m); 316 } 317} 318 319/* [ macro again?, should I force kstack into user map here? -wfj ] */ 320void 321pmap_activate(pmap, pcbp) 322 register pmap_t pmap; 323 struct pcb *pcbp; 324{ 325 PMAP_ACTIVATE(pmap, pcbp); 326} 327 328/* 329 * Bootstrap the system enough to run with virtual memory. 330 * Map the kernel's code and data, and allocate the system page table. 331 * 332 * On the I386 this is called after mapping has already been enabled 333 * and just syncs the pmap module with what has already been done. 334 * [We can't call it easily with mapping off since the kernel is not 335 * mapped with PA == VA, hence we would have to relocate every address 336 * from the linked base (virtual) address "KERNBASE" to the actual 337 * (physical) address starting relative to 0] 338 */ 339 340#define DMAPAGES 8 341void 342pmap_bootstrap(firstaddr, loadaddr) 343 vm_offset_t firstaddr; 344 vm_offset_t loadaddr; 345{ 346#if BSDVM_COMPAT 347 vm_offset_t va; 348 pt_entry_t *pte; 349 350#endif 351 352 avail_start = firstaddr + DMAPAGES * NBPG; 353 354 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 355 virtual_end = VM_MAX_KERNEL_ADDRESS; 356 i386pagesperpage = PAGE_SIZE / NBPG; 357 358 /* 359 * Initialize protection array. 360 */ 361 i386_protection_init(); 362 363 /* 364 * The kernel's pmap is statically allocated so we don't have to use 365 * pmap_create, which is unlikely to work correctly at this part of 366 * the boot sequence. 367 */ 368 kernel_pmap = &kernel_pmap_store; 369 370 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); 371 372 simple_lock_init(&kernel_pmap->pm_lock); 373 kernel_pmap->pm_count = 1; 374 nkpt = NKPT; 375 376#if BSDVM_COMPAT 377 /* 378 * Allocate all the submaps we need 379 */ 380#define SYSMAP(c, p, v, n) \ 381 v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); 382 383 va = virtual_avail; 384 pte = pmap_pte(kernel_pmap, va); 385 386 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 387 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 388 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 389 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1) 390 virtual_avail = va; 391#endif 392 /* 393 * Reserve special hunk of memory for use by bus dma as a bounce 394 * buffer (contiguous virtual *and* physical memory). 395 */ 396 { 397 isaphysmem = va; 398 399 virtual_avail = pmap_map(va, firstaddr, 400 firstaddr + DMAPAGES * NBPG, VM_PROT_ALL); 401 } 402 403 *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; 404 pmap_update(); 405 406} 407 408/* 409 * Initialize the pmap module. 410 * Called by vm_init, to initialize any structures that the pmap 411 * system needs to map virtual memory. 412 * pmap_init has been enhanced to support in a fairly consistant 413 * way, discontiguous physical memory. 414 */ 415void 416pmap_init(phys_start, phys_end) 417 vm_offset_t phys_start, phys_end; 418{ 419 vm_offset_t addr; 420 vm_size_t npg, s; 421 int i; 422 423 /* 424 * Now that kernel map has been allocated, we can mark as unavailable 425 * regions which we have mapped in locore. 426 */ 427 addr = atdevbase; 428 (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, 429 &addr, (0x100000 - 0xa0000), FALSE); 430 431 addr = (vm_offset_t) KERNBASE + IdlePTD; 432 vm_object_reference(kernel_object); 433 (void) vm_map_find(kernel_map, kernel_object, addr, 434 &addr, (4 + NKPDE) * NBPG, FALSE); 435 436 /* 437 * calculate the number of pv_entries needed 438 */ 439 vm_first_phys = phys_avail[0]; 440 for (i = 0; phys_avail[i + 1]; i += 2); 441 npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG; 442 443 /* 444 * Allocate memory for random pmap data structures. Includes the 445 * pv_head_table. 446 */ 447 s = (vm_size_t) (sizeof(struct pv_entry) * npg); 448 s = i386_round_page(s); 449 addr = (vm_offset_t) kmem_alloc(kernel_map, s); 450 pv_table = (pv_entry_t) addr; 451 452 /* 453 * init the pv free list 454 */ 455 init_pv_entries(npg); 456 /* 457 * Now it is safe to enable pv_table recording. 458 */ 459 pmap_initialized = TRUE; 460} 461 462/* 463 * Used to map a range of physical addresses into kernel 464 * virtual address space. 465 * 466 * For now, VM is already on, we only need to map the 467 * specified memory. 468 */ 469vm_offset_t 470pmap_map(virt, start, end, prot) 471 vm_offset_t virt; 472 vm_offset_t start; 473 vm_offset_t end; 474 int prot; 475{ 476 while (start < end) { 477 pmap_enter(kernel_pmap, virt, start, prot, FALSE); 478 virt += PAGE_SIZE; 479 start += PAGE_SIZE; 480 } 481 return (virt); 482} 483 484/* 485 * Create and return a physical map. 486 * 487 * If the size specified for the map 488 * is zero, the map is an actual physical 489 * map, and may be referenced by the 490 * hardware. 491 * 492 * If the size specified is non-zero, 493 * the map will be used in software only, and 494 * is bounded by that size. 495 * 496 */ 497 498pmap_t 499pmap_create(size) 500 vm_size_t size; 501{ 502 register pmap_t pmap; 503 504 /* 505 * Software use map does not need a pmap 506 */ 507 if (size) 508 return (NULL); 509 510 pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); 511 bzero(pmap, sizeof(*pmap)); 512 pmap_pinit(pmap); 513 return (pmap); 514} 515 516/* 517 * Initialize a preallocated and zeroed pmap structure, 518 * such as one in a vmspace structure. 519 */ 520void 521pmap_pinit(pmap) 522 register struct pmap *pmap; 523{ 524 /* 525 * No need to allocate page table space yet but we do need a valid 526 * page directory table. 527 */ 528 pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); 529 530 /* wire in kernel global address entries */ 531 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 532 533 /* install self-referential address mapping entry */ 534 *(int *) (pmap->pm_pdir + PTDPTDI) = 535 ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW; 536 537 pmap->pm_count = 1; 538 simple_lock_init(&pmap->pm_lock); 539} 540 541/* 542 * grow the number of kernel page table entries, if needed 543 */ 544 545vm_page_t nkpg; 546vm_offset_t kernel_vm_end; 547 548void 549pmap_growkernel(vm_offset_t addr) 550{ 551 struct proc *p; 552 struct pmap *pmap; 553 int s; 554 555 s = splhigh(); 556 if (kernel_vm_end == 0) { 557 kernel_vm_end = KERNBASE; 558 nkpt = 0; 559 while (pdir_pde(PTD, kernel_vm_end)) { 560 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 561 ++nkpt; 562 } 563 } 564 addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 565 while (kernel_vm_end < addr) { 566 if (pdir_pde(PTD, kernel_vm_end)) { 567 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 568 continue; 569 } 570 ++nkpt; 571 if (!nkpg) { 572 nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM); 573 if (!nkpg) 574 panic("pmap_growkernel: no memory to grow kernel"); 575 vm_page_wire(nkpg); 576 vm_page_remove(nkpg); 577 pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); 578 } 579 pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); 580 nkpg = NULL; 581 582 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 583 if (p->p_vmspace) { 584 pmap = &p->p_vmspace->vm_pmap; 585 *pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 586 } 587 } 588 *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 589 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 590 } 591 splx(s); 592} 593 594/* 595 * Retire the given physical map from service. 596 * Should only be called if the map contains 597 * no valid mappings. 598 */ 599void 600pmap_destroy(pmap) 601 register pmap_t pmap; 602{ 603 int count; 604 605 if (pmap == NULL) 606 return; 607 608 simple_lock(&pmap->pm_lock); 609 count = --pmap->pm_count; 610 simple_unlock(&pmap->pm_lock); 611 if (count == 0) { 612 pmap_release(pmap); 613 free((caddr_t) pmap, M_VMPMAP); 614 } 615} 616 617/* 618 * Release any resources held by the given physical map. 619 * Called when a pmap initialized by pmap_pinit is being released. 620 * Should only be called if the map contains no valid mappings. 621 */ 622void 623pmap_release(pmap) 624 register struct pmap *pmap; 625{ 626 kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); 627} 628 629/* 630 * Add a reference to the specified pmap. 631 */ 632void 633pmap_reference(pmap) 634 pmap_t pmap; 635{ 636 if (pmap != NULL) { 637 simple_lock(&pmap->pm_lock); 638 pmap->pm_count++; 639 simple_unlock(&pmap->pm_lock); 640 } 641} 642 643#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2) 644 645/* 646 * Data for the pv entry allocation mechanism 647 */ 648int pv_freelistcnt; 649pv_entry_t pv_freelist; 650vm_offset_t pvva; 651int npvvapg; 652 653/* 654 * free the pv_entry back to the free list 655 */ 656inline static void 657free_pv_entry(pv) 658 pv_entry_t pv; 659{ 660 if (!pv) 661 return; 662 ++pv_freelistcnt; 663 pv->pv_next = pv_freelist; 664 pv_freelist = pv; 665} 666 667/* 668 * get a new pv_entry, allocating a block from the system 669 * when needed. 670 * the memory allocation is performed bypassing the malloc code 671 * because of the possibility of allocations at interrupt time. 672 */ 673static inline pv_entry_t 674get_pv_entry() 675{ 676 pv_entry_t tmp; 677 678 /* 679 * get more pv_entry pages if needed 680 */ 681 if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { 682 pmap_alloc_pv_entry(); 683 } 684 /* 685 * get a pv_entry off of the free list 686 */ 687 --pv_freelistcnt; 688 tmp = pv_freelist; 689 pv_freelist = tmp->pv_next; 690 return tmp; 691} 692 693/* 694 * this *strange* allocation routine *statistically* eliminates the 695 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure. 696 * also -- this code is MUCH MUCH faster than the malloc equiv... 697 */ 698static void 699pmap_alloc_pv_entry() 700{ 701 /* 702 * do we have any pre-allocated map-pages left? 703 */ 704 if (npvvapg) { 705 vm_page_t m; 706 707 /* 708 * we do this to keep recursion away 709 */ 710 pv_freelistcnt += PV_FREELIST_MIN; 711 /* 712 * allocate a physical page out of the vm system 713 */ 714 m = vm_page_alloc(kernel_object, 715 pvva - vm_map_min(kernel_map), VM_ALLOC_INTERRUPT); 716 if (m) { 717 int newentries; 718 int i; 719 pv_entry_t entry; 720 721 newentries = (NBPG / sizeof(struct pv_entry)); 722 /* 723 * wire the page 724 */ 725 vm_page_wire(m); 726 m->flags &= ~PG_BUSY; 727 /* 728 * let the kernel see it 729 */ 730 pmap_kenter(pvva, VM_PAGE_TO_PHYS(m)); 731 732 entry = (pv_entry_t) pvva; 733 /* 734 * update the allocation pointers 735 */ 736 pvva += NBPG; 737 --npvvapg; 738 739 /* 740 * free the entries into the free list 741 */ 742 for (i = 0; i < newentries; i++) { 743 free_pv_entry(entry); 744 entry++; 745 } 746 } 747 pv_freelistcnt -= PV_FREELIST_MIN; 748 } 749 if (!pv_freelist) 750 panic("get_pv_entry: cannot get a pv_entry_t"); 751} 752 753 754 755/* 756 * init the pv_entry allocation system 757 */ 758#define PVSPERPAGE 64 759void 760init_pv_entries(npg) 761 int npg; 762{ 763 /* 764 * allocate enough kvm space for PVSPERPAGE entries per page (lots) 765 * kvm space is fairly cheap, be generous!!! (the system can panic if 766 * this is too small.) 767 */ 768 npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG; 769 pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG); 770 /* 771 * get the first batch of entries 772 */ 773 free_pv_entry(get_pv_entry()); 774} 775 776static pt_entry_t * 777get_pt_entry(pmap) 778 pmap_t pmap; 779{ 780 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 781 782 /* are we current address space or kernel? */ 783 if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { 784 return PTmap; 785 } 786 /* otherwise, we are alternate address space */ 787 if (frame != ((int) APTDpde & PG_FRAME)) { 788 APTDpde = pmap->pm_pdir[PTDPTDI]; 789 pmap_update(); 790 } 791 return APTmap; 792} 793 794/* 795 * If it is the first entry on the list, it is actually 796 * in the header and we must copy the following entry up 797 * to the header. Otherwise we must search the list for 798 * the entry. In either case we free the now unused entry. 799 */ 800void 801pmap_remove_entry(pmap, pv, va) 802 struct pmap *pmap; 803 pv_entry_t pv; 804 vm_offset_t va; 805{ 806 pv_entry_t npv; 807 int s; 808 809 s = splhigh(); 810 if (pmap == pv->pv_pmap && va == pv->pv_va) { 811 npv = pv->pv_next; 812 if (npv) { 813 *pv = *npv; 814 free_pv_entry(npv); 815 } else { 816 pv->pv_pmap = NULL; 817 } 818 } else { 819 for (npv = pv->pv_next; npv; npv = npv->pv_next) { 820 if (pmap == npv->pv_pmap && va == npv->pv_va) { 821 break; 822 } 823 pv = npv; 824 } 825 if (npv) { 826 pv->pv_next = npv->pv_next; 827 free_pv_entry(npv); 828 } 829 } 830 splx(s); 831} 832 833/* 834 * Remove the given range of addresses from the specified map. 835 * 836 * It is assumed that the start and end are properly 837 * rounded to the page size. 838 */ 839void 840pmap_remove(pmap, sva, eva) 841 struct pmap *pmap; 842 register vm_offset_t sva; 843 register vm_offset_t eva; 844{ 845 register pt_entry_t *ptp, *ptq; 846 vm_offset_t pa; 847 register pv_entry_t pv; 848 vm_offset_t va; 849 vm_page_t m; 850 pt_entry_t oldpte; 851 852 if (pmap == NULL) 853 return; 854 855 ptp = get_pt_entry(pmap); 856 857 /* 858 * special handling of removing one page. a very 859 * common operation and easy to short circuit some 860 * code. 861 */ 862 if ((sva + NBPG) == eva) { 863 864 if (*pmap_pde(pmap, sva) == 0) 865 return; 866 867 ptq = ptp + i386_btop(sva); 868 869 if (!*ptq) 870 return; 871 /* 872 * Update statistics 873 */ 874 if (pmap_pte_w(ptq)) 875 pmap->pm_stats.wired_count--; 876 pmap->pm_stats.resident_count--; 877 878 pa = pmap_pte_pa(ptq); 879 oldpte = *ptq; 880 *ptq = 0; 881 882 if (pmap_is_managed(pa)) { 883 if ((int) oldpte & PG_M) { 884 if ((sva < USRSTACK || sva >= KERNBASE) || 885 (sva >= USRSTACK && sva < USRSTACK + (UPAGES * NBPG))) { 886 if (sva < clean_sva || sva >= clean_eva) { 887 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 888 } 889 } 890 } 891 pv = pa_to_pvh(pa); 892 pmap_remove_entry(pmap, pv, sva); 893 } 894 pmap_unuse_pt(pmap, sva); 895 pmap_update(); 896 return; 897 } 898 sva = i386_btop(sva); 899 eva = i386_btop(eva); 900 901 while (sva < eva) { 902 /* 903 * Weed out invalid mappings. Note: we assume that the page 904 * directory table is always allocated, and in kernel virtual. 905 */ 906 907 if (*pmap_pde(pmap, i386_ptob(sva)) == 0) { 908 /* We can race ahead here, straight to next pde.. */ 909 sva = ((sva + NPTEPG) & ~(NPTEPG - 1)); 910 continue; 911 } 912 ptq = ptp + sva; 913 914 /* 915 * search for page table entries, use string operations that 916 * are much faster than explicitly scanning when page tables 917 * are not fully populated. 918 */ 919 if (*ptq == 0) { 920 vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 921 vm_offset_t nscan = pdnxt - sva; 922 int found = 0; 923 924 if ((nscan + sva) > eva) 925 nscan = eva - sva; 926 927 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 928 "=D"(ptq), "=a"(found) : "c"(nscan), "0"(ptq) : "cx"); 929 930 if (!found) { 931 sva = pdnxt; 932 continue; 933 } 934 ptq -= 1; 935 936 sva = ptq - ptp; 937 } 938 /* 939 * Update statistics 940 */ 941 oldpte = *ptq; 942 if (((int) oldpte) & PG_W) 943 pmap->pm_stats.wired_count--; 944 pmap->pm_stats.resident_count--; 945 946 /* 947 * Invalidate the PTEs. XXX: should cluster them up and 948 * invalidate as many as possible at once. 949 */ 950 *ptq = 0; 951 952 va = i386_ptob(sva); 953 954 /* 955 * Remove from the PV table (raise IPL since we may be called 956 * at interrupt time). 957 */ 958 pa = ((int) oldpte) & PG_FRAME; 959 if (!pmap_is_managed(pa)) { 960 pmap_unuse_pt(pmap, va); 961 ++sva; 962 continue; 963 } 964 if ((int) oldpte & PG_M) { 965 if ((va < USRSTACK || va >= KERNBASE) || 966 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 967 if (va < clean_sva || va >= clean_eva) { 968 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 969 } 970 } 971 } 972 pv = pa_to_pvh(pa); 973 pmap_remove_entry(pmap, pv, va); 974 pmap_unuse_pt(pmap, va); 975 ++sva; 976 } 977 pmap_update(); 978} 979 980/* 981 * Routine: pmap_remove_all 982 * Function: 983 * Removes this physical page from 984 * all physical maps in which it resides. 985 * Reflects back modify bits to the pager. 986 * 987 * Notes: 988 * Original versions of this routine were very 989 * inefficient because they iteratively called 990 * pmap_remove (slow...) 991 */ 992void 993pmap_remove_all(pa) 994 vm_offset_t pa; 995{ 996 register pv_entry_t pv, npv; 997 register pt_entry_t *pte, *ptp; 998 vm_offset_t va; 999 struct pmap *pmap; 1000 vm_page_t m; 1001 int s; 1002 int anyvalid = 0; 1003 1004 /* 1005 * Not one of ours 1006 */ 1007 /* 1008 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1009 * pages! 1010 */ 1011 if (!pmap_is_managed(pa)) 1012 return; 1013 1014 pa = i386_trunc_page(pa); 1015 pv = pa_to_pvh(pa); 1016 m = PHYS_TO_VM_PAGE(pa); 1017 1018 s = splhigh(); 1019 while (pv->pv_pmap != NULL) { 1020 pmap = pv->pv_pmap; 1021 ptp = get_pt_entry(pmap); 1022 va = pv->pv_va; 1023 pte = ptp + i386_btop(va); 1024 if (pmap_pte_w(pte)) 1025 pmap->pm_stats.wired_count--; 1026 if (*pte) { 1027 pmap->pm_stats.resident_count--; 1028 anyvalid++; 1029 1030 /* 1031 * Update the vm_page_t clean and reference bits. 1032 */ 1033 if ((int) *pte & PG_M) { 1034 if ((va < USRSTACK || va >= KERNBASE) || 1035 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 1036 if (va < clean_sva || va >= clean_eva) { 1037 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 1038 } 1039 } 1040 } 1041 *pte = 0; 1042 pmap_unuse_pt(pmap, va); 1043 } 1044 npv = pv->pv_next; 1045 if (npv) { 1046 *pv = *npv; 1047 free_pv_entry(npv); 1048 } else { 1049 pv->pv_pmap = NULL; 1050 } 1051 } 1052 splx(s); 1053 if (anyvalid) 1054 pmap_update(); 1055} 1056 1057 1058/* 1059 * Set the physical protection on the 1060 * specified range of this map as requested. 1061 */ 1062void 1063pmap_protect(pmap, sva, eva, prot) 1064 register pmap_t pmap; 1065 vm_offset_t sva, eva; 1066 vm_prot_t prot; 1067{ 1068 register pt_entry_t *pte; 1069 register vm_offset_t va; 1070 int i386prot; 1071 register pt_entry_t *ptp; 1072 int evap = i386_btop(eva); 1073 int anyvalid = 0;; 1074 1075 if (pmap == NULL) 1076 return; 1077 1078 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1079 pmap_remove(pmap, sva, eva); 1080 return; 1081 } 1082 if (prot & VM_PROT_WRITE) 1083 return; 1084 1085 ptp = get_pt_entry(pmap); 1086 1087 va = sva; 1088 while (va < eva) { 1089 int found = 0; 1090 int svap; 1091 vm_offset_t nscan; 1092 1093 /* 1094 * Page table page is not allocated. Skip it, we don't want to 1095 * force allocation of unnecessary PTE pages just to set the 1096 * protection. 1097 */ 1098 if (!*pmap_pde(pmap, va)) { 1099 /* XXX: avoid address wrap around */ 1100 nextpde: 1101 if (va >= i386_trunc_pdr((vm_offset_t) - 1)) 1102 break; 1103 va = i386_round_pdr(va + PAGE_SIZE); 1104 continue; 1105 } 1106 pte = ptp + i386_btop(va); 1107 1108 if (*pte == 0) { 1109 /* 1110 * scan for a non-empty pte 1111 */ 1112 svap = pte - ptp; 1113 nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap; 1114 1115 if (nscan + svap > evap) 1116 nscan = evap - svap; 1117 1118 found = 0; 1119 if (nscan) 1120 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 1121 "=D"(pte), "=a"(found) : "c"(nscan), "0"(pte) : "cx"); 1122 1123 if (!found) 1124 goto nextpde; 1125 1126 pte -= 1; 1127 svap = pte - ptp; 1128 1129 va = i386_ptob(svap); 1130 } 1131 anyvalid++; 1132 1133 i386prot = pte_prot(pmap, prot); 1134 if (va < UPT_MAX_ADDRESS) { 1135 i386prot |= PG_u; 1136 if (va >= UPT_MIN_ADDRESS) 1137 i386prot |= PG_RW; 1138 } 1139 pmap_pte_set_prot(pte, i386prot); 1140 va += PAGE_SIZE; 1141 } 1142 if (anyvalid) 1143 pmap_update(); 1144} 1145 1146/* 1147 * Insert the given physical page (p) at 1148 * the specified virtual address (v) in the 1149 * target physical map with the protection requested. 1150 * 1151 * If specified, the page will be wired down, meaning 1152 * that the related pte can not be reclaimed. 1153 * 1154 * NB: This is the only routine which MAY NOT lazy-evaluate 1155 * or lose information. That is, this routine must actually 1156 * insert this page into the given map NOW. 1157 */ 1158void 1159pmap_enter(pmap, va, pa, prot, wired) 1160 register pmap_t pmap; 1161 vm_offset_t va; 1162 register vm_offset_t pa; 1163 vm_prot_t prot; 1164 boolean_t wired; 1165{ 1166 register pt_entry_t *pte; 1167 register pt_entry_t npte; 1168 vm_offset_t opa; 1169 int ptevalid = 0; 1170 1171 if (pmap == NULL) 1172 return; 1173 1174 va = i386_trunc_page(va); 1175 pa = i386_trunc_page(pa); 1176 if (va > VM_MAX_KERNEL_ADDRESS) 1177 panic("pmap_enter: toobig"); 1178 1179 /* 1180 * Page Directory table entry not valid, we need a new PT page 1181 */ 1182 if (*pmap_pde(pmap, va) == 0) { 1183 printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va); 1184 panic("invalid kernel page directory"); 1185 } 1186 pte = pmap_pte(pmap, va); 1187 opa = pmap_pte_pa(pte); 1188 1189 /* 1190 * Mapping has not changed, must be protection or wiring change. 1191 */ 1192 if (opa == pa) { 1193 /* 1194 * Wiring change, just update stats. We don't worry about 1195 * wiring PT pages as they remain resident as long as there 1196 * are valid mappings in them. Hence, if a user page is wired, 1197 * the PT page will be also. 1198 */ 1199 if (wired && !pmap_pte_w(pte)) 1200 pmap->pm_stats.wired_count++; 1201 else if (!wired && pmap_pte_w(pte)) 1202 pmap->pm_stats.wired_count--; 1203 1204 goto validate; 1205 } 1206 /* 1207 * Mapping has changed, invalidate old range and fall through to 1208 * handle validating new mapping. 1209 */ 1210 if (opa) { 1211 pmap_remove(pmap, va, va + PAGE_SIZE); 1212 } 1213 /* 1214 * Enter on the PV list if part of our managed memory Note that we 1215 * raise IPL while manipulating pv_table since pmap_enter can be 1216 * called at interrupt time. 1217 */ 1218 if (pmap_is_managed(pa)) { 1219 register pv_entry_t pv, npv; 1220 int s; 1221 1222 pv = pa_to_pvh(pa); 1223 s = splhigh(); 1224 /* 1225 * No entries yet, use header as the first entry 1226 */ 1227 if (pv->pv_pmap == NULL) { 1228 pv->pv_va = va; 1229 pv->pv_pmap = pmap; 1230 pv->pv_next = NULL; 1231 } 1232 /* 1233 * There is at least one other VA mapping this page. Place 1234 * this entry after the header. 1235 */ 1236 else { 1237 npv = get_pv_entry(); 1238 npv->pv_va = va; 1239 npv->pv_pmap = pmap; 1240 npv->pv_next = pv->pv_next; 1241 pv->pv_next = npv; 1242 } 1243 splx(s); 1244 } 1245 1246 /* 1247 * Increment counters 1248 */ 1249 pmap->pm_stats.resident_count++; 1250 if (wired) 1251 pmap->pm_stats.wired_count++; 1252 1253validate: 1254 /* 1255 * Now validate mapping with desired protection/wiring. 1256 */ 1257 npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V)); 1258 1259 /* 1260 * When forking (copy-on-write, etc): A process will turn off write 1261 * permissions for any of its writable pages. If the data (object) is 1262 * only referred to by one process, the processes map is modified 1263 * directly as opposed to using the object manipulation routine. When 1264 * using pmap_protect, the modified bits are not kept in the vm_page_t 1265 * data structure. Therefore, when using pmap_enter in vm_fault to 1266 * bring back writability of a page, there has been no memory of the 1267 * modified or referenced bits except at the pte level. this clause 1268 * supports the carryover of the modified and used (referenced) bits. 1269 */ 1270 if (pa == opa) 1271 (int) npte |= (int) *pte & (PG_M | PG_U); 1272 1273 1274 if (wired) 1275 (int) npte |= PG_W; 1276 if (va < UPT_MIN_ADDRESS) 1277 (int) npte |= PG_u; 1278 else if (va < UPT_MAX_ADDRESS) 1279 (int) npte |= PG_u | PG_RW; 1280 1281 if (*pte != npte) { 1282 if (*pte) 1283 ptevalid++; 1284 *pte = npte; 1285 } 1286 if (ptevalid) { 1287 pmap_update(); 1288 } else { 1289 pmap_use_pt(pmap, va); 1290 } 1291} 1292 1293/* 1294 * Add a list of wired pages to the kva 1295 * this routine is only used for temporary 1296 * kernel mappings that do not need to have 1297 * page modification or references recorded. 1298 * Note that old mappings are simply written 1299 * over. The page *must* be wired. 1300 */ 1301void 1302pmap_qenter(va, m, count) 1303 vm_offset_t va; 1304 vm_page_t *m; 1305 int count; 1306{ 1307 int i; 1308 int anyvalid = 0; 1309 register pt_entry_t *pte; 1310 1311 for (i = 0; i < count; i++) { 1312 pte = vtopte(va + i * NBPG); 1313 if (*pte) 1314 anyvalid++; 1315 *pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W)); 1316 } 1317 if (anyvalid) 1318 pmap_update(); 1319} 1320/* 1321 * this routine jerks page mappings from the 1322 * kernel -- it is meant only for temporary mappings. 1323 */ 1324void 1325pmap_qremove(va, count) 1326 vm_offset_t va; 1327 int count; 1328{ 1329 int i; 1330 register pt_entry_t *pte; 1331 1332 for (i = 0; i < count; i++) { 1333 pte = vtopte(va + i * NBPG); 1334 *pte = 0; 1335 } 1336 pmap_update(); 1337} 1338 1339/* 1340 * add a wired page to the kva 1341 * note that in order for the mapping to take effect -- you 1342 * should do a pmap_update after doing the pmap_kenter... 1343 */ 1344void 1345pmap_kenter(va, pa) 1346 vm_offset_t va; 1347 register vm_offset_t pa; 1348{ 1349 register pt_entry_t *pte; 1350 int wasvalid = 0; 1351 1352 pte = vtopte(va); 1353 1354 if (*pte) 1355 wasvalid++; 1356 1357 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_W)); 1358 1359 if (wasvalid) 1360 pmap_update(); 1361} 1362 1363/* 1364 * remove a page from the kernel pagetables 1365 */ 1366void 1367pmap_kremove(va) 1368 vm_offset_t va; 1369{ 1370 register pt_entry_t *pte; 1371 1372 pte = vtopte(va); 1373 1374 *pte = (pt_entry_t) 0; 1375 pmap_update(); 1376} 1377 1378/* 1379 * this code makes some *MAJOR* assumptions: 1380 * 1. Current pmap & pmap exists. 1381 * 2. Not wired. 1382 * 3. Read access. 1383 * 4. No page table pages. 1384 * 5. Tlbflush is deferred to calling procedure. 1385 * 6. Page IS managed. 1386 * but is *MUCH* faster than pmap_enter... 1387 */ 1388 1389static inline void 1390pmap_enter_quick(pmap, va, pa) 1391 register pmap_t pmap; 1392 vm_offset_t va; 1393 register vm_offset_t pa; 1394{ 1395 register pt_entry_t *pte; 1396 register pv_entry_t pv, npv; 1397 int s; 1398 1399 /* 1400 * Enter on the PV list if part of our managed memory Note that we 1401 * raise IPL while manipulating pv_table since pmap_enter can be 1402 * called at interrupt time. 1403 */ 1404 1405 pte = vtopte(va); 1406 1407 /* a fault on the page table might occur here */ 1408 if (*pte) { 1409 pmap_remove(pmap, va, va + PAGE_SIZE); 1410 } 1411 pv = pa_to_pvh(pa); 1412 s = splhigh(); 1413 /* 1414 * No entries yet, use header as the first entry 1415 */ 1416 if (pv->pv_pmap == NULL) { 1417 pv->pv_pmap = pmap; 1418 pv->pv_va = va; 1419 pv->pv_next = NULL; 1420 } 1421 /* 1422 * There is at least one other VA mapping this page. Place this entry 1423 * after the header. 1424 */ 1425 else { 1426 npv = get_pv_entry(); 1427 npv->pv_va = va; 1428 npv->pv_pmap = pmap; 1429 npv->pv_next = pv->pv_next; 1430 pv->pv_next = npv; 1431 } 1432 splx(s); 1433 1434 /* 1435 * Increment counters 1436 */ 1437 pmap->pm_stats.resident_count++; 1438 1439 /* 1440 * Now validate mapping with desired protection/wiring. 1441 */ 1442 *pte = (pt_entry_t) ((int) (pa | PG_V | PG_u)); 1443 1444 pmap_use_pt(pmap, va); 1445 1446 return; 1447} 1448 1449#define MAX_INIT_PT (1024*2048) 1450/* 1451 * pmap_object_init_pt preloads the ptes for a given object 1452 * into the specified pmap. This eliminates the blast of soft 1453 * faults on process startup and immediately after an mmap. 1454 */ 1455void 1456pmap_object_init_pt(pmap, addr, object, offset, size) 1457 pmap_t pmap; 1458 vm_offset_t addr; 1459 vm_object_t object; 1460 vm_offset_t offset; 1461 vm_offset_t size; 1462{ 1463 vm_offset_t tmpoff; 1464 vm_page_t p; 1465 int bits; 1466 int objbytes; 1467 1468 if (!pmap || ((size > MAX_INIT_PT) && 1469 (object->resident_page_count > (MAX_INIT_PT / NBPG)))) { 1470 return; 1471 } 1472 if (!vm_object_lock_try(object)) 1473 return; 1474 1475 /* 1476 * if we are processing a major portion of the object, then scan the 1477 * entire thing. 1478 */ 1479 if (size > (object->size >> 2)) { 1480 objbytes = size; 1481 1482 for (p = object->memq.tqh_first; 1483 ((objbytes > 0) && (p != NULL)); 1484 p = p->listq.tqe_next) { 1485 1486 tmpoff = p->offset; 1487 if (tmpoff < offset) { 1488 continue; 1489 } 1490 tmpoff -= offset; 1491 if (tmpoff >= size) { 1492 continue; 1493 } 1494 if (((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1495 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1496 (p->bmapped == 0) && 1497 (p->busy == 0) && 1498 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1499 vm_page_hold(p); 1500 p->flags |= PG_MAPPED; 1501 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1502 vm_page_unhold(p); 1503 } 1504 objbytes -= NBPG; 1505 } 1506 } else { 1507 /* 1508 * else lookup the pages one-by-one. 1509 */ 1510 for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) { 1511 p = vm_page_lookup(object, tmpoff + offset); 1512 if (p && ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1513 (p->bmapped == 0) && (p->busy == 0) && 1514 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1515 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1516 vm_page_hold(p); 1517 p->flags |= PG_MAPPED; 1518 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1519 vm_page_unhold(p); 1520 } 1521 } 1522 } 1523 vm_object_unlock(object); 1524} 1525 1526#if 0 1527/* 1528 * pmap_prefault provides a quick way of clustering 1529 * pagefaults into a processes address space. It is a "cousin" 1530 * of pmap_object_init_pt, except it runs at page fault time instead 1531 * of mmap time. 1532 */ 1533#define PFBAK 2 1534#define PFFOR 2 1535#define PAGEORDER_SIZE (PFBAK+PFFOR) 1536 1537static int pmap_prefault_pageorder[] = { 1538 -NBPG, NBPG, -2 * NBPG, 2 * NBPG 1539}; 1540 1541void 1542pmap_prefault(pmap, addra, entry, object) 1543 pmap_t pmap; 1544 vm_offset_t addra; 1545 vm_map_entry_t entry; 1546 vm_object_t object; 1547{ 1548 int i; 1549 vm_offset_t starta, enda; 1550 vm_offset_t offset, addr; 1551 vm_page_t m; 1552 int pageorder_index; 1553 1554 if (entry->object.vm_object != object) 1555 return; 1556 1557 if (pmap != &curproc->p_vmspace->vm_pmap) 1558 return; 1559 1560 starta = addra - PFBAK * NBPG; 1561 if (starta < entry->start) { 1562 starta = entry->start; 1563 } else if (starta > addra) 1564 starta = 0; 1565 1566 enda = addra + PFFOR * NBPG; 1567 if (enda > entry->end) 1568 enda = entry->end; 1569 1570 for (i = 0; i < PAGEORDER_SIZE; i++) { 1571 vm_object_t lobject; 1572 pt_entry_t *pte; 1573 1574 addr = addra + pmap_prefault_pageorder[i]; 1575 if (addr < starta || addr >= enda) 1576 continue; 1577 1578 pte = vtopte(addr); 1579 if (*pte) 1580 continue; 1581 1582 offset = (addr - entry->start) + entry->offset; 1583 lobject = object; 1584 for (m = vm_page_lookup(lobject, offset); 1585 (!m && lobject->shadow && !lobject->pager); 1586 lobject = lobject->shadow) { 1587 1588 offset += lobject->shadow_offset; 1589 m = vm_page_lookup(lobject->shadow, offset); 1590 } 1591 1592 /* 1593 * give-up when a page is not in memory 1594 */ 1595 if (m == NULL) 1596 break; 1597 1598 if (((m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) != 0) && 1599 ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1600 (m->busy == 0) && 1601 (m->bmapped == 0) && 1602 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1603 /* 1604 * test results show that the system is faster when 1605 * pages are activated. 1606 */ 1607 if ((m->flags & PG_ACTIVE) == 0) { 1608 if( m->flags & PG_CACHE) 1609 vm_page_deactivate(m); 1610 else 1611 vm_page_activate(m); 1612 } 1613 vm_page_hold(m); 1614 m->flags |= PG_MAPPED; 1615 pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); 1616 vm_page_unhold(m); 1617 } 1618 } 1619} 1620#endif 1621 1622/* 1623 * Routine: pmap_change_wiring 1624 * Function: Change the wiring attribute for a map/virtual-address 1625 * pair. 1626 * In/out conditions: 1627 * The mapping must already exist in the pmap. 1628 */ 1629void 1630pmap_change_wiring(pmap, va, wired) 1631 register pmap_t pmap; 1632 vm_offset_t va; 1633 boolean_t wired; 1634{ 1635 register pt_entry_t *pte; 1636 1637 if (pmap == NULL) 1638 return; 1639 1640 pte = pmap_pte(pmap, va); 1641 1642 if (wired && !pmap_pte_w(pte)) 1643 pmap->pm_stats.wired_count++; 1644 else if (!wired && pmap_pte_w(pte)) 1645 pmap->pm_stats.wired_count--; 1646 1647 /* 1648 * Wiring is not a hardware characteristic so there is no need to 1649 * invalidate TLB. 1650 */ 1651 pmap_pte_set_w(pte, wired); 1652 /* 1653 * When unwiring, set the modified bit in the pte -- could have been 1654 * changed by the kernel 1655 */ 1656 if (!wired) 1657 (int) *pte |= PG_M; 1658} 1659 1660 1661 1662/* 1663 * Copy the range specified by src_addr/len 1664 * from the source map to the range dst_addr/len 1665 * in the destination map. 1666 * 1667 * This routine is only advisory and need not do anything. 1668 */ 1669void 1670pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1671 pmap_t dst_pmap, src_pmap; 1672 vm_offset_t dst_addr; 1673 vm_size_t len; 1674 vm_offset_t src_addr; 1675{ 1676} 1677 1678/* 1679 * Routine: pmap_kernel 1680 * Function: 1681 * Returns the physical map handle for the kernel. 1682 */ 1683pmap_t 1684pmap_kernel() 1685{ 1686 return (kernel_pmap); 1687} 1688 1689/* 1690 * pmap_zero_page zeros the specified (machine independent) 1691 * page by mapping the page into virtual memory and using 1692 * bzero to clear its contents, one machine dependent page 1693 * at a time. 1694 */ 1695void 1696pmap_zero_page(phys) 1697 vm_offset_t phys; 1698{ 1699 if (*(int *) CMAP2) 1700 panic("pmap_zero_page: CMAP busy"); 1701 1702 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys); 1703 bzero(CADDR2, NBPG); 1704 1705 *(int *) CMAP2 = 0; 1706 pmap_update(); 1707} 1708 1709/* 1710 * pmap_copy_page copies the specified (machine independent) 1711 * page by mapping the page into virtual memory and using 1712 * bcopy to copy the page, one machine dependent page at a 1713 * time. 1714 */ 1715void 1716pmap_copy_page(src, dst) 1717 vm_offset_t src; 1718 vm_offset_t dst; 1719{ 1720 if (*(int *) CMAP1 || *(int *) CMAP2) 1721 panic("pmap_copy_page: CMAP busy"); 1722 1723 *(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src); 1724 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst); 1725 1726#if __GNUC__ > 1 1727 memcpy(CADDR2, CADDR1, NBPG); 1728#else 1729 bcopy(CADDR1, CADDR2, NBPG); 1730#endif 1731 *(int *) CMAP1 = 0; 1732 *(int *) CMAP2 = 0; 1733 pmap_update(); 1734} 1735 1736 1737/* 1738 * Routine: pmap_pageable 1739 * Function: 1740 * Make the specified pages (by pmap, offset) 1741 * pageable (or not) as requested. 1742 * 1743 * A page which is not pageable may not take 1744 * a fault; therefore, its page table entry 1745 * must remain valid for the duration. 1746 * 1747 * This routine is merely advisory; pmap_enter 1748 * will specify that these pages are to be wired 1749 * down (or not) as appropriate. 1750 */ 1751void 1752pmap_pageable(pmap, sva, eva, pageable) 1753 pmap_t pmap; 1754 vm_offset_t sva, eva; 1755 boolean_t pageable; 1756{ 1757} 1758 1759/* 1760 * this routine returns true if a physical page resides 1761 * in the given pmap. 1762 */ 1763boolean_t 1764pmap_page_exists(pmap, pa) 1765 pmap_t pmap; 1766 vm_offset_t pa; 1767{ 1768 register pv_entry_t pv; 1769 int s; 1770 1771 if (!pmap_is_managed(pa)) 1772 return FALSE; 1773 1774 pv = pa_to_pvh(pa); 1775 s = splhigh(); 1776 1777 /* 1778 * Not found, check current mappings returning immediately if found. 1779 */ 1780 if (pv->pv_pmap != NULL) { 1781 for (; pv; pv = pv->pv_next) { 1782 if (pv->pv_pmap == pmap) { 1783 splx(s); 1784 return TRUE; 1785 } 1786 } 1787 } 1788 splx(s); 1789 return (FALSE); 1790} 1791 1792/* 1793 * pmap_testbit tests bits in pte's 1794 * note that the testbit/changebit routines are inline, 1795 * and a lot of things compile-time evaluate. 1796 */ 1797__inline boolean_t 1798pmap_testbit(pa, bit) 1799 register vm_offset_t pa; 1800 int bit; 1801{ 1802 register pv_entry_t pv; 1803 pt_entry_t *pte; 1804 int s; 1805 1806 if (!pmap_is_managed(pa)) 1807 return FALSE; 1808 1809 pv = pa_to_pvh(pa); 1810 s = splhigh(); 1811 1812 /* 1813 * Not found, check current mappings returning immediately if found. 1814 */ 1815 if (pv->pv_pmap != NULL) { 1816 for (; pv; pv = pv->pv_next) { 1817 /* 1818 * if the bit being tested is the modified bit, then 1819 * mark UPAGES as always modified, and ptes as never 1820 * modified. 1821 */ 1822 if (bit & PG_U) { 1823 if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { 1824 continue; 1825 } 1826 } 1827 if (bit & PG_M) { 1828 if (pv->pv_va >= USRSTACK) { 1829 if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) { 1830 continue; 1831 } 1832 if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) { 1833 splx(s); 1834 return TRUE; 1835 } else if (pv->pv_va < KERNBASE) { 1836 splx(s); 1837 return FALSE; 1838 } 1839 } 1840 } 1841 if (!pv->pv_pmap) { 1842 printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 1843 continue; 1844 } 1845 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1846 if ((int) *pte & bit) { 1847 splx(s); 1848 return TRUE; 1849 } 1850 } 1851 } 1852 splx(s); 1853 return (FALSE); 1854} 1855 1856/* 1857 * this routine is used to modify bits in ptes 1858 */ 1859__inline void 1860pmap_changebit(pa, bit, setem) 1861 vm_offset_t pa; 1862 int bit; 1863 boolean_t setem; 1864{ 1865 register pv_entry_t pv; 1866 register pt_entry_t *pte, npte; 1867 vm_offset_t va; 1868 int s; 1869 1870 if (!pmap_is_managed(pa)) 1871 return; 1872 1873 pv = pa_to_pvh(pa); 1874 s = splhigh(); 1875 1876 /* 1877 * Loop over all current mappings setting/clearing as appropos If 1878 * setting RO do we need to clear the VAC? 1879 */ 1880 if (pv->pv_pmap != NULL) { 1881 for (; pv; pv = pv->pv_next) { 1882 va = pv->pv_va; 1883 1884 /* 1885 * don't write protect pager mappings 1886 */ 1887 if (!setem && (bit == PG_RW)) { 1888 if (va >= clean_sva && va < clean_eva) 1889 continue; 1890 } 1891 if (!pv->pv_pmap) { 1892 printf("Null pmap (cb) at va: 0x%lx\n", va); 1893 continue; 1894 } 1895 pte = pmap_pte(pv->pv_pmap, va); 1896 if (setem) 1897 (int) npte = (int) *pte | bit; 1898 else 1899 (int) npte = (int) *pte & ~bit; 1900 *pte = npte; 1901 } 1902 } 1903 splx(s); 1904 pmap_update(); 1905} 1906 1907/* 1908 * pmap_page_protect: 1909 * 1910 * Lower the permission for all mappings to a given page. 1911 */ 1912void 1913pmap_page_protect(phys, prot) 1914 vm_offset_t phys; 1915 vm_prot_t prot; 1916{ 1917 if ((prot & VM_PROT_WRITE) == 0) { 1918 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) 1919 pmap_changebit(phys, PG_RW, FALSE); 1920 else 1921 pmap_remove_all(phys); 1922 } 1923} 1924 1925vm_offset_t 1926pmap_phys_address(ppn) 1927 int ppn; 1928{ 1929 return (i386_ptob(ppn)); 1930} 1931 1932/* 1933 * pmap_is_referenced: 1934 * 1935 * Return whether or not the specified physical page was referenced 1936 * by any physical maps. 1937 */ 1938boolean_t 1939pmap_is_referenced(vm_offset_t pa) 1940{ 1941 return pmap_testbit((pa), PG_U); 1942} 1943 1944/* 1945 * pmap_is_modified: 1946 * 1947 * Return whether or not the specified physical page was modified 1948 * in any physical maps. 1949 */ 1950boolean_t 1951pmap_is_modified(vm_offset_t pa) 1952{ 1953 return pmap_testbit((pa), PG_M); 1954} 1955 1956/* 1957 * Clear the modify bits on the specified physical page. 1958 */ 1959void 1960pmap_clear_modify(vm_offset_t pa) 1961{ 1962 pmap_changebit((pa), PG_M, FALSE); 1963} 1964 1965/* 1966 * pmap_clear_reference: 1967 * 1968 * Clear the reference bit on the specified physical page. 1969 */ 1970void 1971pmap_clear_reference(vm_offset_t pa) 1972{ 1973 pmap_changebit((pa), PG_U, FALSE); 1974} 1975 1976/* 1977 * Routine: pmap_copy_on_write 1978 * Function: 1979 * Remove write privileges from all 1980 * physical maps for this physical page. 1981 */ 1982void 1983pmap_copy_on_write(vm_offset_t pa) 1984{ 1985 pmap_changebit((pa), PG_RW, FALSE); 1986} 1987 1988/* 1989 * Miscellaneous support routines follow 1990 */ 1991 1992void 1993i386_protection_init() 1994{ 1995 register int *kp, prot; 1996 1997 kp = protection_codes; 1998 for (prot = 0; prot < 8; prot++) { 1999 switch (prot) { 2000 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2001 /* 2002 * Read access is also 0. There isn't any execute bit, 2003 * so just make it readable. 2004 */ 2005 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2006 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2007 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2008 *kp++ = 0; 2009 break; 2010 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2011 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2012 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2013 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2014 *kp++ = PG_RW; 2015 break; 2016 } 2017 } 2018} 2019 2020/* 2021 * Map a set of physical memory pages into the kernel virtual 2022 * address space. Return a pointer to where it is mapped. This 2023 * routine is intended to be used for mapping device memory, 2024 * NOT real memory. The non-cacheable bits are set on each 2025 * mapped page. 2026 */ 2027void * 2028pmap_mapdev(pa, size) 2029 vm_offset_t pa; 2030 vm_size_t size; 2031{ 2032 vm_offset_t va, tmpva; 2033 pt_entry_t *pte; 2034 2035 pa = trunc_page(pa); 2036 size = roundup(size, PAGE_SIZE); 2037 2038 va = kmem_alloc_pageable(kernel_map, size); 2039 if (!va) 2040 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2041 2042 for (tmpva = va; size > 0;) { 2043 pte = vtopte(tmpva); 2044 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); 2045 size -= PAGE_SIZE; 2046 tmpva += PAGE_SIZE; 2047 pa += PAGE_SIZE; 2048 } 2049 pmap_update(); 2050 2051 return ((void *) va); 2052} 2053 2054#ifdef DEBUG 2055/* print address space of pmap*/ 2056void 2057pads(pm) 2058 pmap_t pm; 2059{ 2060 unsigned va, i, j; 2061 pt_entry_t *ptep; 2062 2063 if (pm == kernel_pmap) 2064 return; 2065 for (i = 0; i < 1024; i++) 2066 if (pm->pm_pdir[i]) 2067 for (j = 0; j < 1024; j++) { 2068 va = (i << PD_SHIFT) + (j << PG_SHIFT); 2069 if (pm == kernel_pmap && va < KERNBASE) 2070 continue; 2071 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 2072 continue; 2073 ptep = pmap_pte(pm, va); 2074 if (pmap_pte_v(ptep)) 2075 printf("%x:%x ", va, *(int *) ptep); 2076 }; 2077 2078} 2079 2080void 2081pmap_pvdump(pa) 2082 vm_offset_t pa; 2083{ 2084 register pv_entry_t pv; 2085 2086 printf("pa %x", pa); 2087 for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { 2088#ifdef used_to_be 2089 printf(" -> pmap %x, va %x, flags %x", 2090 pv->pv_pmap, pv->pv_va, pv->pv_flags); 2091#endif 2092 printf(" -> pmap %x, va %x", 2093 pv->pv_pmap, pv->pv_va); 2094 pads(pv->pv_pmap); 2095 } 2096 printf(" "); 2097} 2098#endif 2099