pmap.c revision 5943
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $Id: pmap.c,v 1.46 1995/01/26 01:45:02 davidg Exp $ 43 */ 44 45/* 46 * Derived from hp300 version by Mike Hibler, this version by William 47 * Jolitz uses a recursive map [a pde points to the page directory] to 48 * map the page tables using the pagetables themselves. This is done to 49 * reduce the impact on kernel virtual memory for lots of sparse address 50 * space, and to reduce the cost of memory to each process. 51 * 52 * Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90 53 */ 54/* 55 * Major modifications by John S. Dyson primarily to support 56 * pageable page tables, eliminating pmap_attributes, 57 * discontiguous memory pages, and using more efficient string 58 * instructions. Jan 13, 1994. Further modifications on Mar 2, 1994, 59 * general clean-up and efficiency mods. 60 */ 61 62/* 63 * Manages physical address maps. 64 * 65 * In addition to hardware address maps, this 66 * module is called upon to provide software-use-only 67 * maps which may or may not be stored in the same 68 * form as hardware maps. These pseudo-maps are 69 * used to store intermediate results from copy 70 * operations to and from address spaces. 71 * 72 * Since the information managed by this module is 73 * also stored by the logical address mapping module, 74 * this module may throw away valid virtual-to-physical 75 * mappings at almost any time. However, invalidations 76 * of virtual-to-physical mappings must be done as 77 * requested. 78 * 79 * In order to cope with hardware architectures which 80 * make virtual-to-physical map invalidates expensive, 81 * this module may delay invalidate or reduced protection 82 * operations until such time as they are actually 83 * necessary. This module is given full information as 84 * to which processors are currently using which maps, 85 * and to when physical maps must be made correct. 86 */ 87 88#include <sys/param.h> 89#include <sys/systm.h> 90#include <sys/proc.h> 91#include <sys/malloc.h> 92#include <sys/user.h> 93 94#include <vm/vm.h> 95#include <vm/vm_kern.h> 96#include <vm/vm_page.h> 97 98#include <i386/include/cputypes.h> 99 100#include <i386/isa/isa.h> 101 102/* 103 * Allocate various and sundry SYSMAPs used in the days of old VM 104 * and not yet converted. XXX. 105 */ 106#define BSDVM_COMPAT 1 107 108/* 109 * Get PDEs and PTEs for user/kernel address space 110 */ 111#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023])) 112#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023]) 113 114#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME) 115 116#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 117#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 118#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 119#define pmap_pte_u(pte) ((*(int *)pte & PG_U) != 0) 120#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 121 122#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 123#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 124 125/* 126 * Given a map and a machine independent protection code, 127 * convert to a vax protection code. 128 */ 129#define pte_prot(m, p) (protection_codes[p]) 130int protection_codes[8]; 131 132struct pmap kernel_pmap_store; 133pmap_t kernel_pmap; 134 135vm_offset_t phys_avail[6]; /* 2 entries + 1 null */ 136vm_offset_t avail_start; /* PA of first available physical page */ 137vm_offset_t avail_end; /* PA of last available physical page */ 138vm_size_t mem_size; /* memory size in bytes */ 139vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 140vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 141int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */ 142boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 143vm_offset_t vm_first_phys, vm_last_phys; 144 145static inline int pmap_is_managed(); 146static inline void *vm_get_pmap(); 147static inline void vm_put_pmap(); 148static void i386_protection_init(); 149static void pmap_alloc_pv_entry(); 150static inline pv_entry_t get_pv_entry(); 151inline void pmap_use_pt(); 152inline void pmap_unuse_pt(); 153int nkpt; 154 155 156extern vm_offset_t clean_sva, clean_eva; 157extern int cpu_class; 158 159#if BSDVM_COMPAT 160#include <sys/msgbuf.h> 161 162/* 163 * All those kernel PT submaps that BSD is so fond of 164 */ 165pt_entry_t *CMAP1, *CMAP2, *ptmmap; 166caddr_t CADDR1, CADDR2, ptvmmap; 167pt_entry_t *msgbufmap; 168struct msgbuf *msgbufp; 169 170#endif 171 172void 173init_pv_entries(int); 174 175/* 176 * Routine: pmap_pte 177 * Function: 178 * Extract the page table entry associated 179 * with the given map/virtual_address pair. 180 * [ what about induced faults -wfj] 181 */ 182 183inline pt_entry_t * const 184pmap_pte(pmap, va) 185 register pmap_t pmap; 186 vm_offset_t va; 187{ 188 189 if (pmap && *pmap_pde(pmap, va)) { 190 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 191 192 /* are we current address space or kernel? */ 193 if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME))) 194 return ((pt_entry_t *) vtopte(va)); 195 /* otherwise, we are alternate address space */ 196 else { 197 if (frame != ((int) APTDpde & PG_FRAME)) { 198 APTDpde = pmap->pm_pdir[PTDPTDI]; 199 pmap_update(); 200 } 201 return ((pt_entry_t *) avtopte(va)); 202 } 203 } 204 return (0); 205} 206 207/* 208 * Routine: pmap_extract 209 * Function: 210 * Extract the physical page address associated 211 * with the given map/virtual_address pair. 212 */ 213 214vm_offset_t 215pmap_extract(pmap, va) 216 register pmap_t pmap; 217 vm_offset_t va; 218{ 219 vm_offset_t pa; 220 221 if (pmap && *pmap_pde(pmap, va)) { 222 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 223 224 /* are we current address space or kernel? */ 225 if ((pmap == kernel_pmap) 226 || (frame == ((int) PTDpde & PG_FRAME))) { 227 pa = *(int *) vtopte(va); 228 /* otherwise, we are alternate address space */ 229 } else { 230 if (frame != ((int) APTDpde & PG_FRAME)) { 231 APTDpde = pmap->pm_pdir[PTDPTDI]; 232 pmap_update(); 233 } 234 pa = *(int *) avtopte(va); 235 } 236 return ((pa & PG_FRAME) | (va & ~PG_FRAME)); 237 } 238 return 0; 239 240} 241 242/* 243 * determine if a page is managed (memory vs. device) 244 */ 245static inline int 246pmap_is_managed(pa) 247 vm_offset_t pa; 248{ 249 int i; 250 251 if (!pmap_initialized) 252 return 0; 253 254 for (i = 0; phys_avail[i + 1]; i += 2) { 255 if (pa >= phys_avail[i] && pa < phys_avail[i + 1]) 256 return 1; 257 } 258 return 0; 259} 260 261/* 262 * find the vm_page_t of a pte (only) given va of pte and pmap 263 */ 264__inline vm_page_t 265pmap_pte_vm_page(pmap, pt) 266 pmap_t pmap; 267 vm_offset_t pt; 268{ 269 vm_page_t m; 270 271 pt = i386_trunc_page(pt); 272 pt = (pt - UPT_MIN_ADDRESS) / NBPG; 273 pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; 274 m = PHYS_TO_VM_PAGE(pt); 275 return m; 276} 277 278/* 279 * Wire a page table page 280 */ 281inline void 282pmap_use_pt(pmap, va) 283 pmap_t pmap; 284 vm_offset_t va; 285{ 286 vm_offset_t pt; 287 288 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 289 return; 290 291 pt = (vm_offset_t) vtopte(va); 292 vm_page_hold(pmap_pte_vm_page(pmap, pt)); 293} 294 295/* 296 * Unwire a page table page 297 */ 298inline void 299pmap_unuse_pt(pmap, va) 300 pmap_t pmap; 301 vm_offset_t va; 302{ 303 vm_offset_t pt; 304 vm_page_t m; 305 306 if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized) 307 return; 308 309 pt = (vm_offset_t) vtopte(va); 310 m = pmap_pte_vm_page(pmap, pt); 311 vm_page_unhold(m); 312 if (pmap != kernel_pmap && 313 (m->hold_count == 0) && 314 (m->wire_count == 0) && 315 (va < KPT_MIN_ADDRESS)) { 316 vm_page_deactivate(m); 317 } 318} 319 320/* [ macro again?, should I force kstack into user map here? -wfj ] */ 321void 322pmap_activate(pmap, pcbp) 323 register pmap_t pmap; 324 struct pcb *pcbp; 325{ 326 PMAP_ACTIVATE(pmap, pcbp); 327} 328 329/* 330 * Bootstrap the system enough to run with virtual memory. 331 * Map the kernel's code and data, and allocate the system page table. 332 * 333 * On the I386 this is called after mapping has already been enabled 334 * and just syncs the pmap module with what has already been done. 335 * [We can't call it easily with mapping off since the kernel is not 336 * mapped with PA == VA, hence we would have to relocate every address 337 * from the linked base (virtual) address "KERNBASE" to the actual 338 * (physical) address starting relative to 0] 339 */ 340 341#define DMAPAGES 8 342void 343pmap_bootstrap(firstaddr, loadaddr) 344 vm_offset_t firstaddr; 345 vm_offset_t loadaddr; 346{ 347#if BSDVM_COMPAT 348 vm_offset_t va; 349 pt_entry_t *pte; 350 351#endif 352 353 avail_start = firstaddr + DMAPAGES * NBPG; 354 355 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 356 virtual_end = VM_MAX_KERNEL_ADDRESS; 357 i386pagesperpage = PAGE_SIZE / NBPG; 358 359 /* 360 * Initialize protection array. 361 */ 362 i386_protection_init(); 363 364 /* 365 * The kernel's pmap is statically allocated so we don't have to use 366 * pmap_create, which is unlikely to work correctly at this part of 367 * the boot sequence. 368 */ 369 kernel_pmap = &kernel_pmap_store; 370 371 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); 372 373 simple_lock_init(&kernel_pmap->pm_lock); 374 kernel_pmap->pm_count = 1; 375 nkpt = NKPT; 376 377#if BSDVM_COMPAT 378 /* 379 * Allocate all the submaps we need 380 */ 381#define SYSMAP(c, p, v, n) \ 382 v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); 383 384 va = virtual_avail; 385 pte = pmap_pte(kernel_pmap, va); 386 387 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 388 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 389 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 390 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1) 391 virtual_avail = va; 392#endif 393 /* 394 * Reserve special hunk of memory for use by bus dma as a bounce 395 * buffer (contiguous virtual *and* physical memory). 396 */ 397 { 398 extern vm_offset_t isaphysmem; 399 400 isaphysmem = va; 401 402 virtual_avail = pmap_map(va, firstaddr, 403 firstaddr + DMAPAGES * NBPG, VM_PROT_ALL); 404 } 405 406 *(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0; 407 pmap_update(); 408 409} 410 411/* 412 * Initialize the pmap module. 413 * Called by vm_init, to initialize any structures that the pmap 414 * system needs to map virtual memory. 415 * pmap_init has been enhanced to support in a fairly consistant 416 * way, discontiguous physical memory. 417 */ 418void 419pmap_init(phys_start, phys_end) 420 vm_offset_t phys_start, phys_end; 421{ 422 vm_offset_t addr; 423 vm_size_t npg, s; 424 int i; 425 426 /* 427 * Now that kernel map has been allocated, we can mark as unavailable 428 * regions which we have mapped in locore. 429 */ 430 addr = atdevbase; 431 (void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0, 432 &addr, (0x100000 - 0xa0000), FALSE); 433 434 addr = (vm_offset_t) KERNBASE + IdlePTD; 435 vm_object_reference(kernel_object); 436 (void) vm_map_find(kernel_map, kernel_object, addr, 437 &addr, (4 + NKPDE) * NBPG, FALSE); 438 439 /* 440 * calculate the number of pv_entries needed 441 */ 442 vm_first_phys = phys_avail[0]; 443 for (i = 0; phys_avail[i + 1]; i += 2); 444 npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG; 445 446 /* 447 * Allocate memory for random pmap data structures. Includes the 448 * pv_head_table. 449 */ 450 s = (vm_size_t) (sizeof(struct pv_entry) * npg); 451 s = i386_round_page(s); 452 addr = (vm_offset_t) kmem_alloc(kernel_map, s); 453 pv_table = (pv_entry_t) addr; 454 455 /* 456 * init the pv free list 457 */ 458 init_pv_entries(npg); 459 /* 460 * Now it is safe to enable pv_table recording. 461 */ 462 pmap_initialized = TRUE; 463} 464 465/* 466 * Used to map a range of physical addresses into kernel 467 * virtual address space. 468 * 469 * For now, VM is already on, we only need to map the 470 * specified memory. 471 */ 472vm_offset_t 473pmap_map(virt, start, end, prot) 474 vm_offset_t virt; 475 vm_offset_t start; 476 vm_offset_t end; 477 int prot; 478{ 479 while (start < end) { 480 pmap_enter(kernel_pmap, virt, start, prot, FALSE); 481 virt += PAGE_SIZE; 482 start += PAGE_SIZE; 483 } 484 return (virt); 485} 486 487/* 488 * Create and return a physical map. 489 * 490 * If the size specified for the map 491 * is zero, the map is an actual physical 492 * map, and may be referenced by the 493 * hardware. 494 * 495 * If the size specified is non-zero, 496 * the map will be used in software only, and 497 * is bounded by that size. 498 * 499 * [ just allocate a ptd and mark it uninitialize -- should we track 500 * with a table which process has which ptd? -wfj ] 501 */ 502 503pmap_t 504pmap_create(size) 505 vm_size_t size; 506{ 507 register pmap_t pmap; 508 509 /* 510 * Software use map does not need a pmap 511 */ 512 if (size) 513 return (NULL); 514 515 pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); 516 bzero(pmap, sizeof(*pmap)); 517 pmap_pinit(pmap); 518 return (pmap); 519} 520 521 522struct pmaplist { 523 struct pmaplist *next; 524}; 525 526static inline void * 527vm_get_pmap() 528{ 529 struct pmaplist *rtval; 530 531 rtval = (struct pmaplist *) kmem_alloc(kernel_map, ctob(1)); 532 bzero(rtval, ctob(1)); 533 return rtval; 534} 535 536static inline void 537vm_put_pmap(up) 538 struct pmaplist *up; 539{ 540 kmem_free(kernel_map, (vm_offset_t) up, ctob(1)); 541} 542 543/* 544 * Initialize a preallocated and zeroed pmap structure, 545 * such as one in a vmspace structure. 546 */ 547void 548pmap_pinit(pmap) 549 register struct pmap *pmap; 550{ 551 /* 552 * No need to allocate page table space yet but we do need a valid 553 * page directory table. 554 */ 555 pmap->pm_pdir = (pd_entry_t *) vm_get_pmap(); 556 557 /* wire in kernel global address entries */ 558 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 559 560 /* install self-referential address mapping entry */ 561 *(int *) (pmap->pm_pdir + PTDPTDI) = 562 ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW; 563 564 pmap->pm_count = 1; 565 simple_lock_init(&pmap->pm_lock); 566} 567 568/* 569 * grow the number of kernel page table entries, if needed 570 */ 571 572vm_page_t nkpg; 573vm_offset_t kernel_vm_end; 574 575void 576pmap_growkernel(vm_offset_t addr) 577{ 578 struct proc *p; 579 struct pmap *pmap; 580 int s; 581 582 s = splhigh(); 583 if (kernel_vm_end == 0) { 584 kernel_vm_end = KERNBASE; 585 nkpt = 0; 586 while (pdir_pde(PTD, kernel_vm_end)) { 587 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 588 ++nkpt; 589 } 590 } 591 addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 592 while (kernel_vm_end < addr) { 593 if (pdir_pde(PTD, kernel_vm_end)) { 594 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 595 continue; 596 } 597 ++nkpt; 598 if (!nkpg) { 599 nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM); 600 if (!nkpg) 601 panic("pmap_growkernel: no memory to grow kernel"); 602 vm_page_wire(nkpg); 603 vm_page_remove(nkpg); 604 pmap_zero_page(VM_PAGE_TO_PHYS(nkpg)); 605 } 606 pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW); 607 nkpg = NULL; 608 609 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 610 if (p->p_vmspace) { 611 pmap = &p->p_vmspace->vm_pmap; 612 *pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 613 } 614 } 615 *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); 616 kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); 617 } 618 splx(s); 619} 620 621/* 622 * Retire the given physical map from service. 623 * Should only be called if the map contains 624 * no valid mappings. 625 */ 626void 627pmap_destroy(pmap) 628 register pmap_t pmap; 629{ 630 int count; 631 632 if (pmap == NULL) 633 return; 634 635 simple_lock(&pmap->pm_lock); 636 count = --pmap->pm_count; 637 simple_unlock(&pmap->pm_lock); 638 if (count == 0) { 639 pmap_release(pmap); 640 free((caddr_t) pmap, M_VMPMAP); 641 } 642} 643 644/* 645 * Release any resources held by the given physical map. 646 * Called when a pmap initialized by pmap_pinit is being released. 647 * Should only be called if the map contains no valid mappings. 648 */ 649void 650pmap_release(pmap) 651 register struct pmap *pmap; 652{ 653 vm_put_pmap((struct pmaplist *) pmap->pm_pdir); 654} 655 656/* 657 * Add a reference to the specified pmap. 658 */ 659void 660pmap_reference(pmap) 661 pmap_t pmap; 662{ 663 if (pmap != NULL) { 664 simple_lock(&pmap->pm_lock); 665 pmap->pm_count++; 666 simple_unlock(&pmap->pm_lock); 667 } 668} 669 670#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2) 671 672/* 673 * Data for the pv entry allocation mechanism 674 */ 675int pv_freelistcnt; 676pv_entry_t pv_freelist; 677vm_offset_t pvva; 678int npvvapg; 679 680/* 681 * free the pv_entry back to the free list 682 */ 683inline static void 684free_pv_entry(pv) 685 pv_entry_t pv; 686{ 687 if (!pv) 688 return; 689 ++pv_freelistcnt; 690 pv->pv_next = pv_freelist; 691 pv_freelist = pv; 692} 693 694/* 695 * get a new pv_entry, allocating a block from the system 696 * when needed. 697 * the memory allocation is performed bypassing the malloc code 698 * because of the possibility of allocations at interrupt time. 699 */ 700static inline pv_entry_t 701get_pv_entry() 702{ 703 pv_entry_t tmp; 704 705 /* 706 * get more pv_entry pages if needed 707 */ 708 if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { 709 pmap_alloc_pv_entry(); 710 } 711 /* 712 * get a pv_entry off of the free list 713 */ 714 --pv_freelistcnt; 715 tmp = pv_freelist; 716 pv_freelist = tmp->pv_next; 717 return tmp; 718} 719 720/* 721 * this *strange* allocation routine *statistically* eliminates the 722 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure. 723 * also -- this code is MUCH MUCH faster than the malloc equiv... 724 */ 725static void 726pmap_alloc_pv_entry() 727{ 728 /* 729 * do we have any pre-allocated map-pages left? 730 */ 731 if (npvvapg) { 732 vm_page_t m; 733 734 /* 735 * we do this to keep recursion away 736 */ 737 pv_freelistcnt += PV_FREELIST_MIN; 738 /* 739 * allocate a physical page out of the vm system 740 */ 741 m = vm_page_alloc(kernel_object, 742 pvva - vm_map_min(kernel_map), VM_ALLOC_INTERRUPT); 743 if (m) { 744 int newentries; 745 int i; 746 pv_entry_t entry; 747 748 newentries = (NBPG / sizeof(struct pv_entry)); 749 /* 750 * wire the page 751 */ 752 vm_page_wire(m); 753 m->flags &= ~PG_BUSY; 754 /* 755 * let the kernel see it 756 */ 757 pmap_kenter(pvva, VM_PAGE_TO_PHYS(m)); 758 759 entry = (pv_entry_t) pvva; 760 /* 761 * update the allocation pointers 762 */ 763 pvva += NBPG; 764 --npvvapg; 765 766 /* 767 * free the entries into the free list 768 */ 769 for (i = 0; i < newentries; i++) { 770 free_pv_entry(entry); 771 entry++; 772 } 773 } 774 pv_freelistcnt -= PV_FREELIST_MIN; 775 } 776 if (!pv_freelist) 777 panic("get_pv_entry: cannot get a pv_entry_t"); 778} 779 780 781 782/* 783 * init the pv_entry allocation system 784 */ 785#define PVSPERPAGE 64 786void 787init_pv_entries(npg) 788 int npg; 789{ 790 /* 791 * allocate enough kvm space for PVSPERPAGE entries per page (lots) 792 * kvm space is fairly cheap, be generous!!! (the system can panic if 793 * this is too small.) 794 */ 795 npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG; 796 pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG); 797 /* 798 * get the first batch of entries 799 */ 800 free_pv_entry(get_pv_entry()); 801} 802 803static pt_entry_t * 804get_pt_entry(pmap) 805 pmap_t pmap; 806{ 807 vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 808 809 /* are we current address space or kernel? */ 810 if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) { 811 return PTmap; 812 } 813 /* otherwise, we are alternate address space */ 814 if (frame != ((int) APTDpde & PG_FRAME)) { 815 APTDpde = pmap->pm_pdir[PTDPTDI]; 816 pmap_update(); 817 } 818 return APTmap; 819} 820 821/* 822 * If it is the first entry on the list, it is actually 823 * in the header and we must copy the following entry up 824 * to the header. Otherwise we must search the list for 825 * the entry. In either case we free the now unused entry. 826 */ 827void 828pmap_remove_entry(pmap, pv, va) 829 struct pmap *pmap; 830 pv_entry_t pv; 831 vm_offset_t va; 832{ 833 pv_entry_t npv; 834 int s; 835 836 s = splhigh(); 837 if (pmap == pv->pv_pmap && va == pv->pv_va) { 838 npv = pv->pv_next; 839 if (npv) { 840 *pv = *npv; 841 free_pv_entry(npv); 842 } else { 843 pv->pv_pmap = NULL; 844 } 845 } else { 846 for (npv = pv->pv_next; npv; npv = npv->pv_next) { 847 if (pmap == npv->pv_pmap && va == npv->pv_va) { 848 break; 849 } 850 pv = npv; 851 } 852 if (npv) { 853 pv->pv_next = npv->pv_next; 854 free_pv_entry(npv); 855 } 856 } 857 splx(s); 858} 859 860/* 861 * Remove the given range of addresses from the specified map. 862 * 863 * It is assumed that the start and end are properly 864 * rounded to the page size. 865 */ 866void 867pmap_remove(pmap, sva, eva) 868 struct pmap *pmap; 869 register vm_offset_t sva; 870 register vm_offset_t eva; 871{ 872 register pt_entry_t *ptp, *ptq; 873 vm_offset_t pa; 874 register pv_entry_t pv; 875 vm_offset_t va; 876 vm_page_t m; 877 pt_entry_t oldpte; 878 879 if (pmap == NULL) 880 return; 881 882 ptp = get_pt_entry(pmap); 883 884 /* 885 * special handling of removing one page. a very 886 * common operation and easy to short circuit some 887 * code. 888 */ 889 if ((sva + NBPG) == eva) { 890 891 if (*pmap_pde(pmap, sva) == 0) 892 return; 893 894 ptq = ptp + i386_btop(sva); 895 896 if (!*ptq) 897 return; 898 /* 899 * Update statistics 900 */ 901 if (pmap_pte_w(ptq)) 902 pmap->pm_stats.wired_count--; 903 pmap->pm_stats.resident_count--; 904 905 pa = pmap_pte_pa(ptq); 906 oldpte = *ptq; 907 *ptq = 0; 908 909 if (pmap_is_managed(pa)) { 910 if ((int) oldpte & PG_M) { 911 if ((sva < USRSTACK || sva > UPT_MAX_ADDRESS) || 912 (sva >= USRSTACK && sva < USRSTACK + (UPAGES * NBPG))) { 913 if (sva < clean_sva || sva >= clean_eva) { 914 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 915 } 916 } 917 } 918 pv = pa_to_pvh(pa); 919 pmap_remove_entry(pmap, pv, sva); 920 } 921 pmap_unuse_pt(pmap, sva); 922 pmap_update(); 923 return; 924 } 925 sva = i386_btop(sva); 926 eva = i386_btop(eva); 927 928 while (sva < eva) { 929 /* 930 * Weed out invalid mappings. Note: we assume that the page 931 * directory table is always allocated, and in kernel virtual. 932 */ 933 934 if (*pmap_pde(pmap, i386_ptob(sva)) == 0) { 935 /* We can race ahead here, straight to next pde.. */ 936 sva = ((sva + NPTEPG) & ~(NPTEPG - 1)); 937 continue; 938 } 939 ptq = ptp + sva; 940 941 /* 942 * search for page table entries, use string operations that 943 * are much faster than explicitly scanning when page tables 944 * are not fully populated. 945 */ 946 if (*ptq == 0) { 947 vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1)); 948 vm_offset_t nscan = pdnxt - sva; 949 int found = 0; 950 951 if ((nscan + sva) > eva) 952 nscan = eva - sva; 953 954 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 955 "=D"(ptq), "=a"(found) : "c"(nscan), "0"(ptq) : "cx"); 956 957 if (!found) { 958 sva = pdnxt; 959 continue; 960 } 961 ptq -= 1; 962 963 sva = ptq - ptp; 964 } 965 /* 966 * Update statistics 967 */ 968 oldpte = *ptq; 969 if (((int) oldpte) & PG_W) 970 pmap->pm_stats.wired_count--; 971 pmap->pm_stats.resident_count--; 972 973 /* 974 * Invalidate the PTEs. XXX: should cluster them up and 975 * invalidate as many as possible at once. 976 */ 977 *ptq = 0; 978 979 va = i386_ptob(sva); 980 981 /* 982 * Remove from the PV table (raise IPL since we may be called 983 * at interrupt time). 984 */ 985 pa = ((int) oldpte) & PG_FRAME; 986 if (!pmap_is_managed(pa)) { 987 pmap_unuse_pt(pmap, va); 988 ++sva; 989 continue; 990 } 991 if ((int) oldpte & PG_M) { 992 if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || 993 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 994 if (va < clean_sva || va >= clean_eva) { 995 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 996 } 997 } 998 } 999 pv = pa_to_pvh(pa); 1000 pmap_remove_entry(pmap, pv, va); 1001 pmap_unuse_pt(pmap, va); 1002 ++sva; 1003 } 1004 pmap_update(); 1005} 1006 1007/* 1008 * Routine: pmap_remove_all 1009 * Function: 1010 * Removes this physical page from 1011 * all physical maps in which it resides. 1012 * Reflects back modify bits to the pager. 1013 * 1014 * Notes: 1015 * Original versions of this routine were very 1016 * inefficient because they iteratively called 1017 * pmap_remove (slow...) 1018 */ 1019void 1020pmap_remove_all(pa) 1021 vm_offset_t pa; 1022{ 1023 register pv_entry_t pv, npv; 1024 register pt_entry_t *pte, *ptp; 1025 vm_offset_t va; 1026 struct pmap *pmap; 1027 vm_page_t m; 1028 int s; 1029 int anyvalid = 0; 1030 1031 /* 1032 * Not one of ours 1033 */ 1034 /* 1035 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1036 * pages! 1037 */ 1038 if (!pmap_is_managed(pa)) 1039 return; 1040 1041 pa = i386_trunc_page(pa); 1042 pv = pa_to_pvh(pa); 1043 m = PHYS_TO_VM_PAGE(pa); 1044 1045 s = splhigh(); 1046 while (pv->pv_pmap != NULL) { 1047 pmap = pv->pv_pmap; 1048 ptp = get_pt_entry(pmap); 1049 va = pv->pv_va; 1050 pte = ptp + i386_btop(va); 1051 if (pmap_pte_w(pte)) 1052 pmap->pm_stats.wired_count--; 1053 if (*pte) { 1054 pmap->pm_stats.resident_count--; 1055 anyvalid++; 1056 1057 /* 1058 * Update the vm_page_t clean and reference bits. 1059 */ 1060 if ((int) *pte & PG_M) { 1061 if ((va < USRSTACK || va > UPT_MAX_ADDRESS) || 1062 (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) { 1063 if (va < clean_sva || va >= clean_eva) { 1064 PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; 1065 } 1066 } 1067 } 1068 *pte = 0; 1069 pmap_unuse_pt(pmap, va); 1070 } 1071 npv = pv->pv_next; 1072 if (npv) { 1073 *pv = *npv; 1074 free_pv_entry(npv); 1075 } else { 1076 pv->pv_pmap = NULL; 1077 } 1078 } 1079 splx(s); 1080 if (anyvalid) 1081 pmap_update(); 1082} 1083 1084 1085/* 1086 * Set the physical protection on the 1087 * specified range of this map as requested. 1088 */ 1089void 1090pmap_protect(pmap, sva, eva, prot) 1091 register pmap_t pmap; 1092 vm_offset_t sva, eva; 1093 vm_prot_t prot; 1094{ 1095 register pt_entry_t *pte; 1096 register vm_offset_t va; 1097 int i386prot; 1098 register pt_entry_t *ptp; 1099 int evap = i386_btop(eva); 1100 int anyvalid = 0;; 1101 1102 if (pmap == NULL) 1103 return; 1104 1105 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1106 pmap_remove(pmap, sva, eva); 1107 return; 1108 } 1109 if (prot & VM_PROT_WRITE) 1110 return; 1111 1112 ptp = get_pt_entry(pmap); 1113 1114 va = sva; 1115 while (va < eva) { 1116 int found = 0; 1117 int svap; 1118 vm_offset_t nscan; 1119 1120 /* 1121 * Page table page is not allocated. Skip it, we don't want to 1122 * force allocation of unnecessary PTE pages just to set the 1123 * protection. 1124 */ 1125 if (!*pmap_pde(pmap, va)) { 1126 /* XXX: avoid address wrap around */ 1127 nextpde: 1128 if (va >= i386_trunc_pdr((vm_offset_t) - 1)) 1129 break; 1130 va = i386_round_pdr(va + PAGE_SIZE); 1131 continue; 1132 } 1133 pte = ptp + i386_btop(va); 1134 1135 if (*pte == 0) { 1136 /* 1137 * scan for a non-empty pte 1138 */ 1139 svap = pte - ptp; 1140 nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap; 1141 1142 if (nscan + svap > evap) 1143 nscan = evap - svap; 1144 1145 found = 0; 1146 if (nscan) 1147 asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" : 1148 "=D"(pte), "=a"(found) : "c"(nscan), "0"(pte) : "cx"); 1149 1150 if (!found) 1151 goto nextpde; 1152 1153 pte -= 1; 1154 svap = pte - ptp; 1155 1156 va = i386_ptob(svap); 1157 } 1158 anyvalid++; 1159 1160 i386prot = pte_prot(pmap, prot); 1161 if (va < UPT_MAX_ADDRESS) { 1162 i386prot |= PG_u; 1163 if (va >= UPT_MIN_ADDRESS) 1164 i386prot |= PG_RW; 1165 } 1166 pmap_pte_set_prot(pte, i386prot); 1167 va += PAGE_SIZE; 1168 } 1169 if (anyvalid) 1170 pmap_update(); 1171} 1172 1173/* 1174 * Insert the given physical page (p) at 1175 * the specified virtual address (v) in the 1176 * target physical map with the protection requested. 1177 * 1178 * If specified, the page will be wired down, meaning 1179 * that the related pte can not be reclaimed. 1180 * 1181 * NB: This is the only routine which MAY NOT lazy-evaluate 1182 * or lose information. That is, this routine must actually 1183 * insert this page into the given map NOW. 1184 */ 1185void 1186pmap_enter(pmap, va, pa, prot, wired) 1187 register pmap_t pmap; 1188 vm_offset_t va; 1189 register vm_offset_t pa; 1190 vm_prot_t prot; 1191 boolean_t wired; 1192{ 1193 register pt_entry_t *pte; 1194 register pt_entry_t npte; 1195 vm_offset_t opa; 1196 int ptevalid = 0; 1197 1198 if (pmap == NULL) 1199 return; 1200 1201 va = i386_trunc_page(va); 1202 pa = i386_trunc_page(pa); 1203 if (va > VM_MAX_KERNEL_ADDRESS) 1204 panic("pmap_enter: toobig"); 1205 1206 /* 1207 * Page Directory table entry not valid, we need a new PT page 1208 */ 1209 if (*pmap_pde(pmap, va) == 0) { 1210 printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va); 1211 panic("invalid kernel page directory"); 1212 } 1213 pte = pmap_pte(pmap, va); 1214 opa = pmap_pte_pa(pte); 1215 1216 /* 1217 * Mapping has not changed, must be protection or wiring change. 1218 */ 1219 if (opa == pa) { 1220 /* 1221 * Wiring change, just update stats. We don't worry about 1222 * wiring PT pages as they remain resident as long as there 1223 * are valid mappings in them. Hence, if a user page is wired, 1224 * the PT page will be also. 1225 */ 1226 if (wired && !pmap_pte_w(pte)) 1227 pmap->pm_stats.wired_count++; 1228 else if (!wired && pmap_pte_w(pte)) 1229 pmap->pm_stats.wired_count--; 1230 1231 goto validate; 1232 } 1233 /* 1234 * Mapping has changed, invalidate old range and fall through to 1235 * handle validating new mapping. 1236 */ 1237 if (opa) { 1238 pmap_remove(pmap, va, va + PAGE_SIZE); 1239 } 1240 /* 1241 * Enter on the PV list if part of our managed memory Note that we 1242 * raise IPL while manipulating pv_table since pmap_enter can be 1243 * called at interrupt time. 1244 */ 1245 if (pmap_is_managed(pa)) { 1246 register pv_entry_t pv, npv; 1247 int s; 1248 1249 pv = pa_to_pvh(pa); 1250 s = splhigh(); 1251 /* 1252 * No entries yet, use header as the first entry 1253 */ 1254 if (pv->pv_pmap == NULL) { 1255 pv->pv_va = va; 1256 pv->pv_pmap = pmap; 1257 pv->pv_next = NULL; 1258 } 1259 /* 1260 * There is at least one other VA mapping this page. Place 1261 * this entry after the header. 1262 */ 1263 else { 1264 npv = get_pv_entry(); 1265 npv->pv_va = va; 1266 npv->pv_pmap = pmap; 1267 npv->pv_next = pv->pv_next; 1268 pv->pv_next = npv; 1269 } 1270 splx(s); 1271 } 1272 1273 /* 1274 * Increment counters 1275 */ 1276 pmap->pm_stats.resident_count++; 1277 if (wired) 1278 pmap->pm_stats.wired_count++; 1279 1280validate: 1281 /* 1282 * Now validate mapping with desired protection/wiring. 1283 */ 1284 npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V)); 1285 1286 /* 1287 * When forking (copy-on-write, etc): A process will turn off write 1288 * permissions for any of its writable pages. If the data (object) is 1289 * only referred to by one process, the processes map is modified 1290 * directly as opposed to using the object manipulation routine. When 1291 * using pmap_protect, the modified bits are not kept in the vm_page_t 1292 * data structure. Therefore, when using pmap_enter in vm_fault to 1293 * bring back writability of a page, there has been no memory of the 1294 * modified or referenced bits except at the pte level. this clause 1295 * supports the carryover of the modified and used (referenced) bits. 1296 */ 1297 if (pa == opa) 1298 (int) npte |= (int) *pte & (PG_M | PG_U); 1299 1300 1301 if (wired) 1302 (int) npte |= PG_W; 1303 if (va < UPT_MIN_ADDRESS) 1304 (int) npte |= PG_u; 1305 else if (va < UPT_MAX_ADDRESS) 1306 (int) npte |= PG_u | PG_RW; 1307 1308 if (*pte != npte) { 1309 if (*pte) 1310 ptevalid++; 1311 *pte = npte; 1312 } 1313 if (ptevalid) { 1314 pmap_update(); 1315 } else { 1316 pmap_use_pt(pmap, va); 1317 } 1318} 1319 1320/* 1321 * Add a list of wired pages to the kva 1322 * this routine is only used for temporary 1323 * kernel mappings that do not need to have 1324 * page modification or references recorded. 1325 * Note that old mappings are simply written 1326 * over. The page *must* be wired. 1327 */ 1328void 1329pmap_qenter(va, m, count) 1330 vm_offset_t va; 1331 vm_page_t *m; 1332 int count; 1333{ 1334 int i; 1335 int anyvalid = 0; 1336 register pt_entry_t *pte; 1337 1338 for (i = 0; i < count; i++) { 1339 pte = vtopte(va + i * NBPG); 1340 if (*pte) 1341 anyvalid++; 1342 *pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W)); 1343 } 1344 if (anyvalid) 1345 pmap_update(); 1346} 1347/* 1348 * this routine jerks page mappings from the 1349 * kernel -- it is meant only for temporary mappings. 1350 */ 1351void 1352pmap_qremove(va, count) 1353 vm_offset_t va; 1354 int count; 1355{ 1356 int i; 1357 register pt_entry_t *pte; 1358 1359 for (i = 0; i < count; i++) { 1360 pte = vtopte(va + i * NBPG); 1361 *pte = 0; 1362 } 1363 pmap_update(); 1364} 1365 1366/* 1367 * add a wired page to the kva 1368 * note that in order for the mapping to take effect -- you 1369 * should do a pmap_update after doing the pmap_kenter... 1370 */ 1371void 1372pmap_kenter(va, pa) 1373 vm_offset_t va; 1374 register vm_offset_t pa; 1375{ 1376 register pt_entry_t *pte; 1377 int wasvalid = 0; 1378 1379 pte = vtopte(va); 1380 1381 if (*pte) 1382 wasvalid++; 1383 1384 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_W)); 1385 1386 if (wasvalid) 1387 pmap_update(); 1388} 1389 1390/* 1391 * remove a page from the kernel pagetables 1392 */ 1393void 1394pmap_kremove(va) 1395 vm_offset_t va; 1396{ 1397 register pt_entry_t *pte; 1398 1399 pte = vtopte(va); 1400 1401 *pte = (pt_entry_t) 0; 1402 pmap_update(); 1403} 1404 1405/* 1406 * this code makes some *MAJOR* assumptions: 1407 * 1. Current pmap & pmap exists. 1408 * 2. Not wired. 1409 * 3. Read access. 1410 * 4. No page table pages. 1411 * 5. Tlbflush is deferred to calling procedure. 1412 * 6. Page IS managed. 1413 * but is *MUCH* faster than pmap_enter... 1414 */ 1415 1416static inline void 1417pmap_enter_quick(pmap, va, pa) 1418 register pmap_t pmap; 1419 vm_offset_t va; 1420 register vm_offset_t pa; 1421{ 1422 register pt_entry_t *pte; 1423 register pv_entry_t pv, npv; 1424 int s; 1425 1426 /* 1427 * Enter on the PV list if part of our managed memory Note that we 1428 * raise IPL while manipulating pv_table since pmap_enter can be 1429 * called at interrupt time. 1430 */ 1431 1432 pte = vtopte(va); 1433 1434 /* a fault on the page table might occur here */ 1435 if (*pte) { 1436 pmap_remove(pmap, va, va + PAGE_SIZE); 1437 } 1438 pv = pa_to_pvh(pa); 1439 s = splhigh(); 1440 /* 1441 * No entries yet, use header as the first entry 1442 */ 1443 if (pv->pv_pmap == NULL) { 1444 pv->pv_pmap = pmap; 1445 pv->pv_va = va; 1446 pv->pv_next = NULL; 1447 } 1448 /* 1449 * There is at least one other VA mapping this page. Place this entry 1450 * after the header. 1451 */ 1452 else { 1453 npv = get_pv_entry(); 1454 npv->pv_va = va; 1455 npv->pv_pmap = pmap; 1456 npv->pv_next = pv->pv_next; 1457 pv->pv_next = npv; 1458 } 1459 splx(s); 1460 1461 /* 1462 * Increment counters 1463 */ 1464 pmap->pm_stats.resident_count++; 1465 1466 /* 1467 * Now validate mapping with desired protection/wiring. 1468 */ 1469 *pte = (pt_entry_t) ((int) (pa | PG_V | PG_u)); 1470 1471 pmap_use_pt(pmap, va); 1472 1473 return; 1474} 1475 1476#define MAX_INIT_PT (1024*2048) 1477/* 1478 * pmap_object_init_pt preloads the ptes for a given object 1479 * into the specified pmap. This eliminates the blast of soft 1480 * faults on process startup and immediately after an mmap. 1481 */ 1482void 1483pmap_object_init_pt(pmap, addr, object, offset, size) 1484 pmap_t pmap; 1485 vm_offset_t addr; 1486 vm_object_t object; 1487 vm_offset_t offset; 1488 vm_offset_t size; 1489{ 1490 vm_offset_t tmpoff; 1491 vm_page_t p; 1492 int bits; 1493 int objbytes; 1494 1495 if (!pmap || ((size > MAX_INIT_PT) && 1496 (object->resident_page_count > (MAX_INIT_PT / NBPG)))) { 1497 return; 1498 } 1499 if (!vm_object_lock_try(object)) 1500 return; 1501 1502 /* 1503 * if we are processing a major portion of the object, then scan the 1504 * entire thing. 1505 */ 1506 if (size > (object->size >> 2)) { 1507 objbytes = size; 1508 1509 for (p = object->memq.tqh_first; 1510 ((objbytes > 0) && (p != NULL)); 1511 p = p->listq.tqe_next) { 1512 1513 tmpoff = p->offset; 1514 if (tmpoff < offset) { 1515 continue; 1516 } 1517 tmpoff -= offset; 1518 if (tmpoff >= size) { 1519 continue; 1520 } 1521 if ((p->bmapped == 0) && 1522 (p->busy == 0) && 1523 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1524 ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1525 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1526 vm_page_hold(p); 1527 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1528 vm_page_unhold(p); 1529 } 1530 objbytes -= NBPG; 1531 } 1532 } else { 1533 /* 1534 * else lookup the pages one-by-one. 1535 */ 1536 for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) { 1537 p = vm_page_lookup(object, tmpoff + offset); 1538 if (p && (p->bmapped == 0) && 1539 (p->busy == 0) && 1540 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1541 ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1542 (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) { 1543 vm_page_hold(p); 1544 pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p)); 1545 vm_page_unhold(p); 1546 } 1547 } 1548 } 1549 vm_object_unlock(object); 1550} 1551 1552#if 0 1553/* 1554 * pmap_prefault provides a quick way of clustering 1555 * pagefaults into a processes address space. It is a "cousin" 1556 * of pmap_object_init_pt, except it runs at page fault time instead 1557 * of mmap time. 1558 */ 1559#define PFBAK 2 1560#define PFFOR 2 1561#define PAGEORDER_SIZE (PFBAK+PFFOR) 1562 1563static int pmap_prefault_pageorder[] = { 1564 -NBPG, NBPG, -2 * NBPG, 2 * NBPG 1565}; 1566 1567void 1568pmap_prefault(pmap, addra, entry, object) 1569 pmap_t pmap; 1570 vm_offset_t addra; 1571 vm_map_entry_t entry; 1572 vm_object_t object; 1573{ 1574 int i; 1575 vm_offset_t starta, enda; 1576 vm_offset_t offset, addr; 1577 vm_page_t m; 1578 int pageorder_index; 1579 1580 if (entry->object.vm_object != object) 1581 return; 1582 1583 if (pmap != &curproc->p_vmspace->vm_pmap) 1584 return; 1585 1586 starta = addra - PFBAK * NBPG; 1587 if (starta < entry->start) { 1588 starta = entry->start; 1589 } else if (starta > addra) 1590 starta = 0; 1591 1592 enda = addra + PFFOR * NBPG; 1593 if (enda > entry->end) 1594 enda = entry->end; 1595 1596 for (i = 0; i < PAGEORDER_SIZE; i++) { 1597 vm_object_t lobject; 1598 pt_entry_t *pte; 1599 1600 addr = addra + pmap_prefault_pageorder[i]; 1601 if (addr < starta || addr >= enda) 1602 continue; 1603 1604 pte = vtopte(addr); 1605 if (*pte) 1606 continue; 1607 1608 offset = (addr - entry->start) + entry->offset; 1609 lobject = object; 1610 for (m = vm_page_lookup(lobject, offset); 1611 (!m && lobject->shadow); 1612 lobject = lobject->shadow) { 1613 1614 offset += lobject->shadow_offset; 1615 m = vm_page_lookup(lobject->shadow, offset); 1616 } 1617 1618 /* 1619 * give-up when a page is not in memory 1620 */ 1621 if (m == NULL) 1622 break; 1623 1624 if ((m->bmapped == 0) && 1625 (m->busy == 0) && 1626 ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1627 ((m->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) && 1628 (m->flags & (PG_CACHE | PG_BUSY | PG_FICTITIOUS)) == 0) { 1629 /* 1630 * test results show that the system is faster when 1631 * pages are activated. 1632 */ 1633 if ((m->flags & PG_ACTIVE) == 0) 1634 vm_page_activate(m); 1635 vm_page_hold(m); 1636 pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); 1637 vm_page_unhold(m); 1638 } 1639 } 1640} 1641#endif 1642 1643/* 1644 * Routine: pmap_change_wiring 1645 * Function: Change the wiring attribute for a map/virtual-address 1646 * pair. 1647 * In/out conditions: 1648 * The mapping must already exist in the pmap. 1649 */ 1650void 1651pmap_change_wiring(pmap, va, wired) 1652 register pmap_t pmap; 1653 vm_offset_t va; 1654 boolean_t wired; 1655{ 1656 register pt_entry_t *pte; 1657 1658 if (pmap == NULL) 1659 return; 1660 1661 pte = pmap_pte(pmap, va); 1662 1663 if (wired && !pmap_pte_w(pte)) 1664 pmap->pm_stats.wired_count++; 1665 else if (!wired && pmap_pte_w(pte)) 1666 pmap->pm_stats.wired_count--; 1667 1668 /* 1669 * Wiring is not a hardware characteristic so there is no need to 1670 * invalidate TLB. 1671 */ 1672 pmap_pte_set_w(pte, wired); 1673 /* 1674 * When unwiring, set the modified bit in the pte -- could have been 1675 * changed by the kernel 1676 */ 1677 if (!wired) 1678 (int) *pte |= PG_M; 1679} 1680 1681 1682 1683/* 1684 * Copy the range specified by src_addr/len 1685 * from the source map to the range dst_addr/len 1686 * in the destination map. 1687 * 1688 * This routine is only advisory and need not do anything. 1689 */ 1690void 1691pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1692 pmap_t dst_pmap, src_pmap; 1693 vm_offset_t dst_addr; 1694 vm_size_t len; 1695 vm_offset_t src_addr; 1696{ 1697} 1698 1699/* 1700 * Routine: pmap_kernel 1701 * Function: 1702 * Returns the physical map handle for the kernel. 1703 */ 1704pmap_t 1705pmap_kernel() 1706{ 1707 return (kernel_pmap); 1708} 1709 1710/* 1711 * pmap_zero_page zeros the specified (machine independent) 1712 * page by mapping the page into virtual memory and using 1713 * bzero to clear its contents, one machine dependent page 1714 * at a time. 1715 */ 1716void 1717pmap_zero_page(phys) 1718 vm_offset_t phys; 1719{ 1720 if (*(int *) CMAP2) 1721 panic("pmap_zero_page: CMAP busy"); 1722 1723 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys); 1724 bzero(CADDR2, NBPG); 1725 1726 *(int *) CMAP2 = 0; 1727 pmap_update(); 1728} 1729 1730/* 1731 * pmap_copy_page copies the specified (machine independent) 1732 * page by mapping the page into virtual memory and using 1733 * bcopy to copy the page, one machine dependent page at a 1734 * time. 1735 */ 1736void 1737pmap_copy_page(src, dst) 1738 vm_offset_t src; 1739 vm_offset_t dst; 1740{ 1741 if (*(int *) CMAP1 || *(int *) CMAP2) 1742 panic("pmap_copy_page: CMAP busy"); 1743 1744 *(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src); 1745 *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst); 1746 1747#if __GNUC__ > 1 1748 memcpy(CADDR2, CADDR1, NBPG); 1749#else 1750 bcopy(CADDR1, CADDR2, NBPG); 1751#endif 1752 *(int *) CMAP1 = 0; 1753 *(int *) CMAP2 = 0; 1754 pmap_update(); 1755} 1756 1757 1758/* 1759 * Routine: pmap_pageable 1760 * Function: 1761 * Make the specified pages (by pmap, offset) 1762 * pageable (or not) as requested. 1763 * 1764 * A page which is not pageable may not take 1765 * a fault; therefore, its page table entry 1766 * must remain valid for the duration. 1767 * 1768 * This routine is merely advisory; pmap_enter 1769 * will specify that these pages are to be wired 1770 * down (or not) as appropriate. 1771 */ 1772void 1773pmap_pageable(pmap, sva, eva, pageable) 1774 pmap_t pmap; 1775 vm_offset_t sva, eva; 1776 boolean_t pageable; 1777{ 1778} 1779 1780/* 1781 * this routine returns true if a physical page resides 1782 * in the given pmap. 1783 */ 1784boolean_t 1785pmap_page_exists(pmap, pa) 1786 pmap_t pmap; 1787 vm_offset_t pa; 1788{ 1789 register pv_entry_t pv; 1790 int s; 1791 1792 if (!pmap_is_managed(pa)) 1793 return FALSE; 1794 1795 pv = pa_to_pvh(pa); 1796 s = splhigh(); 1797 1798 /* 1799 * Not found, check current mappings returning immediately if found. 1800 */ 1801 if (pv->pv_pmap != NULL) { 1802 for (; pv; pv = pv->pv_next) { 1803 if (pv->pv_pmap == pmap) { 1804 splx(s); 1805 return TRUE; 1806 } 1807 } 1808 } 1809 splx(s); 1810 return (FALSE); 1811} 1812 1813/* 1814 * pmap_testbit tests bits in pte's 1815 * note that the testbit/changebit routines are inline, 1816 * and a lot of things compile-time evaluate. 1817 */ 1818__inline boolean_t 1819pmap_testbit(pa, bit) 1820 register vm_offset_t pa; 1821 int bit; 1822{ 1823 register pv_entry_t pv; 1824 pt_entry_t *pte; 1825 int s; 1826 1827 if (!pmap_is_managed(pa)) 1828 return FALSE; 1829 1830 pv = pa_to_pvh(pa); 1831 s = splhigh(); 1832 1833 /* 1834 * Not found, check current mappings returning immediately if found. 1835 */ 1836 if (pv->pv_pmap != NULL) { 1837 for (; pv; pv = pv->pv_next) { 1838 /* 1839 * if the bit being tested is the modified bit, then 1840 * mark UPAGES as always modified, and ptes as never 1841 * modified. 1842 */ 1843 if (bit & PG_U) { 1844 if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { 1845 continue; 1846 } 1847 } 1848 if (bit & PG_M) { 1849 if (pv->pv_va >= USRSTACK) { 1850 if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) { 1851 continue; 1852 } 1853 if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) { 1854 splx(s); 1855 return TRUE; 1856 } else if (pv->pv_va < UPT_MAX_ADDRESS) { 1857 splx(s); 1858 return FALSE; 1859 } 1860 } 1861 } 1862 if (!pv->pv_pmap) { 1863 printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 1864 continue; 1865 } 1866 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1867 if ((int) *pte & bit) { 1868 splx(s); 1869 return TRUE; 1870 } 1871 } 1872 } 1873 splx(s); 1874 return (FALSE); 1875} 1876 1877/* 1878 * this routine is used to modify bits in ptes 1879 */ 1880__inline void 1881pmap_changebit(pa, bit, setem) 1882 vm_offset_t pa; 1883 int bit; 1884 boolean_t setem; 1885{ 1886 register pv_entry_t pv; 1887 register pt_entry_t *pte, npte; 1888 vm_offset_t va; 1889 int s; 1890 1891 if (!pmap_is_managed(pa)) 1892 return; 1893 1894 pv = pa_to_pvh(pa); 1895 s = splhigh(); 1896 1897 /* 1898 * Loop over all current mappings setting/clearing as appropos If 1899 * setting RO do we need to clear the VAC? 1900 */ 1901 if (pv->pv_pmap != NULL) { 1902 for (; pv; pv = pv->pv_next) { 1903 va = pv->pv_va; 1904 1905 /* 1906 * don't write protect pager mappings 1907 */ 1908 if (!setem && (bit == PG_RW)) { 1909 if (va >= clean_sva && va < clean_eva) 1910 continue; 1911 } 1912 if (!pv->pv_pmap) { 1913 printf("Null pmap (cb) at va: 0x%lx\n", va); 1914 continue; 1915 } 1916 pte = pmap_pte(pv->pv_pmap, va); 1917 if (setem) 1918 (int) npte = (int) *pte | bit; 1919 else 1920 (int) npte = (int) *pte & ~bit; 1921 *pte = npte; 1922 } 1923 } 1924 splx(s); 1925 pmap_update(); 1926} 1927 1928/* 1929 * pmap_page_protect: 1930 * 1931 * Lower the permission for all mappings to a given page. 1932 */ 1933void 1934pmap_page_protect(phys, prot) 1935 vm_offset_t phys; 1936 vm_prot_t prot; 1937{ 1938 if ((prot & VM_PROT_WRITE) == 0) { 1939 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) 1940 pmap_changebit(phys, PG_RW, FALSE); 1941 else 1942 pmap_remove_all(phys); 1943 } 1944} 1945 1946vm_offset_t 1947pmap_phys_address(ppn) 1948 int ppn; 1949{ 1950 return (i386_ptob(ppn)); 1951} 1952 1953/* 1954 * pmap_is_referenced: 1955 * 1956 * Return whether or not the specified physical page was referenced 1957 * by any physical maps. 1958 */ 1959boolean_t 1960pmap_is_referenced(vm_offset_t pa) 1961{ 1962 return pmap_testbit((pa), PG_U); 1963} 1964 1965/* 1966 * pmap_is_modified: 1967 * 1968 * Return whether or not the specified physical page was modified 1969 * in any physical maps. 1970 */ 1971boolean_t 1972pmap_is_modified(vm_offset_t pa) 1973{ 1974 return pmap_testbit((pa), PG_M); 1975} 1976 1977/* 1978 * Clear the modify bits on the specified physical page. 1979 */ 1980void 1981pmap_clear_modify(vm_offset_t pa) 1982{ 1983 pmap_changebit((pa), PG_M, FALSE); 1984} 1985 1986/* 1987 * pmap_clear_reference: 1988 * 1989 * Clear the reference bit on the specified physical page. 1990 */ 1991void 1992pmap_clear_reference(vm_offset_t pa) 1993{ 1994 pmap_changebit((pa), PG_U, FALSE); 1995} 1996 1997/* 1998 * Routine: pmap_copy_on_write 1999 * Function: 2000 * Remove write privileges from all 2001 * physical maps for this physical page. 2002 */ 2003void 2004pmap_copy_on_write(vm_offset_t pa) 2005{ 2006 pmap_changebit((pa), PG_RW, FALSE); 2007} 2008 2009/* 2010 * Miscellaneous support routines follow 2011 */ 2012 2013void 2014i386_protection_init() 2015{ 2016 register int *kp, prot; 2017 2018 kp = protection_codes; 2019 for (prot = 0; prot < 8; prot++) { 2020 switch (prot) { 2021 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2022 /* 2023 * Read access is also 0. There isn't any execute bit, 2024 * so just make it readable. 2025 */ 2026 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2027 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2028 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2029 *kp++ = 0; 2030 break; 2031 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2032 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2033 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2034 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2035 *kp++ = PG_RW; 2036 break; 2037 } 2038 } 2039} 2040 2041/* 2042 * Map a set of physical memory pages into the kernel virtual 2043 * address space. Return a pointer to where it is mapped. This 2044 * routine is intended to be used for mapping device memory, 2045 * NOT real memory. The non-cacheable bits are set on each 2046 * mapped page. 2047 */ 2048void * 2049pmap_mapdev(pa, size) 2050 vm_offset_t pa; 2051 vm_size_t size; 2052{ 2053 vm_offset_t va, tmpva; 2054 pt_entry_t *pte; 2055 2056 pa = trunc_page(pa); 2057 size = roundup(size, PAGE_SIZE); 2058 2059 va = kmem_alloc_pageable(kernel_map, size); 2060 if (!va) 2061 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2062 2063 for (tmpva = va; size > 0;) { 2064 pte = vtopte(tmpva); 2065 *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N)); 2066 size -= PAGE_SIZE; 2067 tmpva += PAGE_SIZE; 2068 pa += PAGE_SIZE; 2069 } 2070 pmap_update(); 2071 2072 return ((void *) va); 2073} 2074 2075#ifdef DEBUG 2076/* print address space of pmap*/ 2077void 2078pads(pm) 2079 pmap_t pm; 2080{ 2081 unsigned va, i, j; 2082 pt_entry_t *ptep; 2083 2084 if (pm == kernel_pmap) 2085 return; 2086 for (i = 0; i < 1024; i++) 2087 if (pm->pm_pdir[i]) 2088 for (j = 0; j < 1024; j++) { 2089 va = (i << PD_SHIFT) + (j << PG_SHIFT); 2090 if (pm == kernel_pmap && va < KERNBASE) 2091 continue; 2092 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 2093 continue; 2094 ptep = pmap_pte(pm, va); 2095 if (pmap_pte_v(ptep)) 2096 printf("%x:%x ", va, *(int *) ptep); 2097 }; 2098 2099} 2100 2101void 2102pmap_pvdump(pa) 2103 vm_offset_t pa; 2104{ 2105 register pv_entry_t pv; 2106 2107 printf("pa %x", pa); 2108 for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { 2109#ifdef used_to_be 2110 printf(" -> pmap %x, va %x, flags %x", 2111 pv->pv_pmap, pv->pv_va, pv->pv_flags); 2112#endif 2113 printf(" -> pmap %x, va %x", 2114 pv->pv_pmap, pv->pv_va); 2115 pads(pv->pv_pmap); 2116 } 2117 printf(" "); 2118} 2119#endif 2120