pmap.c revision 86486
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 * $FreeBSD: head/sys/i386/i386/pmap.c 86486 2001-11-17 01:56:04Z peter $ 43 */ 44 45/* 46 * Manages physical address maps. 47 * 48 * In addition to hardware address maps, this 49 * module is called upon to provide software-use-only 50 * maps which may or may not be stored in the same 51 * form as hardware maps. These pseudo-maps are 52 * used to store intermediate results from copy 53 * operations to and from address spaces. 54 * 55 * Since the information managed by this module is 56 * also stored by the logical address mapping module, 57 * this module may throw away valid virtual-to-physical 58 * mappings at almost any time. However, invalidations 59 * of virtual-to-physical mappings must be done as 60 * requested. 61 * 62 * In order to cope with hardware architectures which 63 * make virtual-to-physical map invalidates expensive, 64 * this module may delay invalidate or reduced protection 65 * operations until such time as they are actually 66 * necessary. This module is given full information as 67 * to which processors are currently using which maps, 68 * and to when physical maps must be made correct. 69 */ 70 71#include "opt_disable_pse.h" 72#include "opt_pmap.h" 73#include "opt_msgbuf.h" 74#include "opt_kstack_pages.h" 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/kernel.h> 79#include <sys/lock.h> 80#include <sys/mman.h> 81#include <sys/msgbuf.h> 82#include <sys/mutex.h> 83#include <sys/proc.h> 84#include <sys/sx.h> 85#include <sys/user.h> 86#include <sys/vmmeter.h> 87#include <sys/sysctl.h> 88 89#include <vm/vm.h> 90#include <vm/vm_param.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_page.h> 93#include <vm/vm_map.h> 94#include <vm/vm_object.h> 95#include <vm/vm_extern.h> 96#include <vm/vm_pageout.h> 97#include <vm/vm_pager.h> 98#include <vm/vm_zone.h> 99 100#include <machine/cputypes.h> 101#include <machine/md_var.h> 102#include <machine/specialreg.h> 103#if defined(SMP) || defined(APIC_IO) 104#include <machine/smp.h> 105#include <machine/apic.h> 106#include <machine/segments.h> 107#include <machine/tss.h> 108#include <machine/globaldata.h> 109#endif /* SMP || APIC_IO */ 110 111#define PMAP_KEEP_PDIRS 112#ifndef PMAP_SHPGPERPROC 113#define PMAP_SHPGPERPROC 200 114#endif 115 116#if defined(DIAGNOSTIC) 117#define PMAP_DIAGNOSTIC 118#endif 119 120#define MINPV 2048 121 122#if !defined(PMAP_DIAGNOSTIC) 123#define PMAP_INLINE __inline 124#else 125#define PMAP_INLINE 126#endif 127 128/* 129 * Get PDEs and PTEs for user/kernel address space 130 */ 131#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 132#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 133 134#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 135#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 136#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 137#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 138#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 139 140#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 141#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 142 143/* 144 * Given a map and a machine independent protection code, 145 * convert to a vax protection code. 146 */ 147#define pte_prot(m, p) (protection_codes[p]) 148static int protection_codes[8]; 149 150static struct pmap kernel_pmap_store; 151pmap_t kernel_pmap; 152LIST_HEAD(pmaplist, pmap); 153struct pmaplist allpmaps; 154 155vm_offset_t avail_start; /* PA of first available physical page */ 156vm_offset_t avail_end; /* PA of last available physical page */ 157vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 158vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 159static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 160static int pgeflag; /* PG_G or-in */ 161static int pseflag; /* PG_PS or-in */ 162 163static vm_object_t kptobj; 164 165static int nkpt; 166vm_offset_t kernel_vm_end; 167 168/* 169 * Data for the pv entry allocation mechanism 170 */ 171static vm_zone_t pvzone; 172static struct vm_zone pvzone_store; 173static struct vm_object pvzone_obj; 174static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 175static int pmap_pagedaemon_waken = 0; 176static struct pv_entry *pvinit; 177 178/* 179 * All those kernel PT submaps that BSD is so fond of 180 */ 181pt_entry_t *CMAP1 = 0; 182static pt_entry_t *CMAP2, *ptmmap; 183caddr_t CADDR1 = 0, ptvmmap = 0; 184static caddr_t CADDR2; 185static pt_entry_t *msgbufmap; 186struct msgbuf *msgbufp = 0; 187 188/* 189 * Crashdump maps. 190 */ 191static pt_entry_t *pt_crashdumpmap; 192static caddr_t crashdumpmap; 193 194#ifdef SMP 195extern pt_entry_t *SMPpt; 196#endif 197static pt_entry_t *PMAP1 = 0; 198static pt_entry_t *PADDR1 = 0; 199 200static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv)); 201static unsigned * get_ptbase __P((pmap_t pmap)); 202static pv_entry_t get_pv_entry __P((void)); 203static void i386_protection_init __P((void)); 204static __inline void pmap_changebit __P((vm_page_t m, int bit, boolean_t setem)); 205 206static void pmap_remove_all __P((vm_page_t m)); 207static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, 208 vm_page_t m, vm_page_t mpte)); 209static int pmap_remove_pte __P((pmap_t pmap, unsigned *ptq, vm_offset_t sva)); 210static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); 211static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m, 212 vm_offset_t va)); 213static boolean_t pmap_testbit __P((vm_page_t m, int bit)); 214static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, 215 vm_page_t mpte, vm_page_t m)); 216 217static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); 218 219static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); 220static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex)); 221static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); 222static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); 223static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); 224static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 225 226static unsigned pdir4mb; 227 228/* 229 * Routine: pmap_pte 230 * Function: 231 * Extract the page table entry associated 232 * with the given map/virtual_address pair. 233 */ 234 235PMAP_INLINE unsigned * 236pmap_pte(pmap, va) 237 register pmap_t pmap; 238 vm_offset_t va; 239{ 240 pd_entry_t *pdeaddr; 241 242 if (pmap) { 243 pdeaddr = pmap_pde(pmap, va); 244 if (*pdeaddr & PG_PS) 245 return pdeaddr; 246 if (*pdeaddr) { 247 return get_ptbase(pmap) + i386_btop(va); 248 } 249 } 250 return (0); 251} 252 253/* 254 * Move the kernel virtual free pointer to the next 255 * 4MB. This is used to help improve performance 256 * by using a large (4MB) page for much of the kernel 257 * (.text, .data, .bss) 258 */ 259static vm_offset_t 260pmap_kmem_choose(vm_offset_t addr) 261{ 262 vm_offset_t newaddr = addr; 263#ifndef DISABLE_PSE 264 if (cpu_feature & CPUID_PSE) { 265 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 266 } 267#endif 268 return newaddr; 269} 270 271/* 272 * Bootstrap the system enough to run with virtual memory. 273 * 274 * On the i386 this is called after mapping has already been enabled 275 * and just syncs the pmap module with what has already been done. 276 * [We can't call it easily with mapping off since the kernel is not 277 * mapped with PA == VA, hence we would have to relocate every address 278 * from the linked base (virtual) address "KERNBASE" to the actual 279 * (physical) address starting relative to 0] 280 */ 281void 282pmap_bootstrap(firstaddr, loadaddr) 283 vm_offset_t firstaddr; 284 vm_offset_t loadaddr; 285{ 286 vm_offset_t va; 287 pt_entry_t *pte; 288 int i; 289 290 avail_start = firstaddr; 291 292 /* 293 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 294 * large. It should instead be correctly calculated in locore.s and 295 * not based on 'first' (which is a physical address, not a virtual 296 * address, for the start of unused physical memory). The kernel 297 * page tables are NOT double mapped and thus should not be included 298 * in this calculation. 299 */ 300 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 301 virtual_avail = pmap_kmem_choose(virtual_avail); 302 303 virtual_end = VM_MAX_KERNEL_ADDRESS; 304 305 /* 306 * Initialize protection array. 307 */ 308 i386_protection_init(); 309 310 /* 311 * The kernel's pmap is statically allocated so we don't have to use 312 * pmap_create, which is unlikely to work correctly at this part of 313 * the boot sequence (XXX and which no longer exists). 314 */ 315 kernel_pmap = &kernel_pmap_store; 316 317 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 318 kernel_pmap->pm_count = 1; 319 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 320 TAILQ_INIT(&kernel_pmap->pm_pvlist); 321 LIST_INIT(&allpmaps); 322 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 323 nkpt = NKPT; 324 325 /* 326 * Reserve some special page table entries/VA space for temporary 327 * mapping of pages. 328 */ 329#define SYSMAP(c, p, v, n) \ 330 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 331 332 va = virtual_avail; 333 pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); 334 335 /* 336 * CMAP1/CMAP2 are used for zeroing and copying pages. 337 */ 338 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 339 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 340 341 /* 342 * Crashdump maps. 343 */ 344 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 345 346 /* 347 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 348 * XXX ptmmap is not used. 349 */ 350 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 351 352 /* 353 * msgbufp is used to map the system message buffer. 354 * XXX msgbufmap is not used. 355 */ 356 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 357 atop(round_page(MSGBUF_SIZE))) 358 359 /* 360 * ptemap is used for pmap_pte_quick 361 */ 362 SYSMAP(unsigned *, PMAP1, PADDR1, 1); 363 364 virtual_avail = va; 365 366 *CMAP1 = *CMAP2 = 0; 367 for (i = 0; i < NKPT; i++) 368 PTD[i] = 0; 369 370 pgeflag = 0; 371#if !defined(SMP) /* XXX - see also mp_machdep.c */ 372 if (cpu_feature & CPUID_PGE) { 373 pgeflag = PG_G; 374 } 375#endif 376 377/* 378 * Initialize the 4MB page size flag 379 */ 380 pseflag = 0; 381/* 382 * The 4MB page version of the initial 383 * kernel page mapping. 384 */ 385 pdir4mb = 0; 386 387#if !defined(DISABLE_PSE) 388 if (cpu_feature & CPUID_PSE) { 389 unsigned ptditmp; 390 /* 391 * Note that we have enabled PSE mode 392 */ 393 pseflag = PG_PS; 394 ptditmp = *(PTmap + i386_btop(KERNBASE)); 395 ptditmp &= ~(NBPDR - 1); 396 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 397 pdir4mb = ptditmp; 398 399#if !defined(SMP) 400 /* 401 * Enable the PSE mode. 402 */ 403 load_cr4(rcr4() | CR4_PSE); 404 405 /* 406 * We can do the mapping here for the single processor 407 * case. We simply ignore the old page table page from 408 * now on. 409 */ 410 /* 411 * For SMP, we still need 4K pages to bootstrap APs, 412 * PSE will be enabled as soon as all APs are up. 413 */ 414 PTD[KPTDI] = (pd_entry_t) ptditmp; 415 kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp; 416 invltlb(); 417#endif 418 } 419#endif 420 421#ifdef SMP 422 if (cpu_apic_address == 0) 423 panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); 424 425 /* local apic is mapped on last page */ 426 SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | 427 (cpu_apic_address & PG_FRAME)); 428#endif 429 430 invltlb(); 431} 432 433#ifdef SMP 434/* 435 * Set 4mb pdir for mp startup 436 */ 437void 438pmap_set_opt(void) 439{ 440 if (pseflag && (cpu_feature & CPUID_PSE)) { 441 load_cr4(rcr4() | CR4_PSE); 442 if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */ 443 kernel_pmap->pm_pdir[KPTDI] = 444 PTD[KPTDI] = (pd_entry_t)pdir4mb; 445 cpu_invltlb(); 446 } 447 } 448} 449#endif 450 451/* 452 * Initialize the pmap module. 453 * Called by vm_init, to initialize any structures that the pmap 454 * system needs to map virtual memory. 455 * pmap_init has been enhanced to support in a fairly consistant 456 * way, discontiguous physical memory. 457 */ 458void 459pmap_init(phys_start, phys_end) 460 vm_offset_t phys_start, phys_end; 461{ 462 int i; 463 int initial_pvs; 464 465 /* 466 * object for kernel page table pages 467 */ 468 kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE); 469 470 /* 471 * Allocate memory for random pmap data structures. Includes the 472 * pv_head_table. 473 */ 474 475 for(i = 0; i < vm_page_array_size; i++) { 476 vm_page_t m; 477 478 m = &vm_page_array[i]; 479 TAILQ_INIT(&m->md.pv_list); 480 m->md.pv_list_count = 0; 481 } 482 483 /* 484 * init the pv free list 485 */ 486 initial_pvs = vm_page_array_size; 487 if (initial_pvs < MINPV) 488 initial_pvs = MINPV; 489 pvzone = &pvzone_store; 490 pvinit = (struct pv_entry *) kmem_alloc(kernel_map, 491 initial_pvs * sizeof (struct pv_entry)); 492 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 493 vm_page_array_size); 494 495 /* 496 * Now it is safe to enable pv_table recording. 497 */ 498 pmap_initialized = TRUE; 499} 500 501/* 502 * Initialize the address space (zone) for the pv_entries. Set a 503 * high water mark so that the system can recover from excessive 504 * numbers of pv entries. 505 */ 506void 507pmap_init2() 508{ 509 int shpgperproc = PMAP_SHPGPERPROC; 510 511 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 512 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 513 pv_entry_high_water = 9 * (pv_entry_max / 10); 514 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 515} 516 517 518/*************************************************** 519 * Low level helper routines..... 520 ***************************************************/ 521 522#if defined(PMAP_DIAGNOSTIC) 523 524/* 525 * This code checks for non-writeable/modified pages. 526 * This should be an invalid condition. 527 */ 528static int 529pmap_nw_modified(pt_entry_t ptea) 530{ 531 int pte; 532 533 pte = (int) ptea; 534 535 if ((pte & (PG_M|PG_RW)) == PG_M) 536 return 1; 537 else 538 return 0; 539} 540#endif 541 542 543/* 544 * this routine defines the region(s) of memory that should 545 * not be tested for the modified bit. 546 */ 547static PMAP_INLINE int 548pmap_track_modified(vm_offset_t va) 549{ 550 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 551 return 1; 552 else 553 return 0; 554} 555 556static PMAP_INLINE void 557invltlb_1pg(vm_offset_t va) 558{ 559#ifdef I386_CPU 560 invltlb(); 561#else 562 invlpg(va); 563#endif 564} 565 566static __inline void 567pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 568{ 569#if defined(SMP) 570 if (pmap->pm_active & (1 << PCPU_GET(cpuid))) 571 cpu_invlpg((void *)va); 572 if (pmap->pm_active & PCPU_GET(other_cpus)) 573 smp_invltlb(); 574#else 575 if (pmap->pm_active) 576 invltlb_1pg(va); 577#endif 578} 579 580static __inline void 581pmap_invalidate_all(pmap_t pmap) 582{ 583#if defined(SMP) 584 if (pmap->pm_active & (1 << PCPU_GET(cpuid))) 585 cpu_invltlb(); 586 if (pmap->pm_active & PCPU_GET(other_cpus)) 587 smp_invltlb(); 588#else 589 if (pmap->pm_active) 590 invltlb(); 591#endif 592} 593 594/* 595 * Return an address which is the base of the Virtual mapping of 596 * all the PTEs for the given pmap. Note this doesn't say that 597 * all the PTEs will be present or that the pages there are valid. 598 * The PTEs are made available by the recursive mapping trick. 599 * It will map in the alternate PTE space if needed. 600 */ 601static pt_entry_t * 602get_ptbase(pmap) 603 pmap_t pmap; 604{ 605 unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 606 607 /* are we current address space or kernel? */ 608 if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { 609 return PTmap; 610 } 611 /* otherwise, we are alternate address space */ 612 if (frame != (((unsigned) APTDpde) & PG_FRAME)) { 613 APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); 614#if defined(SMP) 615 /* The page directory is not shared between CPUs */ 616 cpu_invltlb(); 617#else 618 invltlb(); 619#endif 620 } 621 return APTmap; 622} 623 624/* 625 * Super fast pmap_pte routine best used when scanning 626 * the pv lists. This eliminates many coarse-grained 627 * invltlb calls. Note that many of the pv list 628 * scans are across different pmaps. It is very wasteful 629 * to do an entire invltlb for checking a single mapping. 630 */ 631 632static pt_entry_t * 633pmap_pte_quick(pmap, va) 634 register pmap_t pmap; 635 vm_offset_t va; 636{ 637 pd_entry_t pde, newpf; 638 if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) { 639 pd_entry_t frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; 640 unsigned index = i386_btop(va); 641 /* are we current address space or kernel? */ 642 if ((pmap == kernel_pmap) || 643 (frame == (((unsigned) PTDpde) & PG_FRAME))) { 644 return PTmap + index; 645 } 646 newpf = pde & PG_FRAME; 647 if ( ((*PMAP1) & PG_FRAME) != newpf) { 648 *PMAP1 = newpf | PG_RW | PG_V; 649 invltlb_1pg((vm_offset_t) PADDR1); 650 } 651 return PADDR1 + (index & (NPTEPG - 1)); 652 } 653 return (0); 654} 655 656/* 657 * Routine: pmap_extract 658 * Function: 659 * Extract the physical page address associated 660 * with the given map/virtual_address pair. 661 */ 662vm_offset_t 663pmap_extract(pmap, va) 664 register pmap_t pmap; 665 vm_offset_t va; 666{ 667 vm_offset_t rtval; /* XXX FIXME */ 668 vm_offset_t pdirindex; 669 pdirindex = va >> PDRSHIFT; 670 if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) { 671 pt_entry_t *pte; 672 if ((rtval & PG_PS) != 0) { 673 rtval &= ~(NBPDR - 1); 674 rtval |= va & (NBPDR - 1); 675 return rtval; 676 } 677 pte = get_ptbase(pmap) + i386_btop(va); 678 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 679 return rtval; 680 } 681 return 0; 682 683} 684 685/*************************************************** 686 * Low level mapping routines..... 687 ***************************************************/ 688 689/* 690 * add a wired page to the kva 691 * note that in order for the mapping to take effect -- you 692 * should do a invltlb after doing the pmap_kenter... 693 */ 694PMAP_INLINE void 695pmap_kenter(vm_offset_t va, vm_offset_t pa) 696{ 697 pt_entry_t *pte; 698 pt_entry_t npte, opte; 699 700 npte = pa | PG_RW | PG_V | pgeflag; 701 pte = vtopte(va); 702 opte = *pte; 703 *pte = npte; 704 /*if (opte)*/ 705 invltlb_1pg(va); /* XXX what about SMP? */ 706} 707 708/* 709 * remove a page from the kernel pagetables 710 */ 711PMAP_INLINE void 712pmap_kremove(vm_offset_t va) 713{ 714 register pt_entry_t *pte; 715 716 pte = vtopte(va); 717 *pte = 0; 718 invltlb_1pg(va); /* XXX what about SMP? */ 719} 720 721/* 722 * Used to map a range of physical addresses into kernel 723 * virtual address space. 724 * 725 * The value passed in '*virt' is a suggested virtual address for 726 * the mapping. Architectures which can support a direct-mapped 727 * physical to virtual region can return the appropriate address 728 * within that region, leaving '*virt' unchanged. Other 729 * architectures should map the pages starting at '*virt' and 730 * update '*virt' with the first usable address after the mapped 731 * region. 732 */ 733vm_offset_t 734pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 735{ 736 vm_offset_t sva = *virt; 737 vm_offset_t va = sva; 738 while (start < end) { 739 pmap_kenter(va, start); 740 va += PAGE_SIZE; 741 start += PAGE_SIZE; 742 } 743 *virt = va; 744 return (sva); 745} 746 747 748/* 749 * Add a list of wired pages to the kva 750 * this routine is only used for temporary 751 * kernel mappings that do not need to have 752 * page modification or references recorded. 753 * Note that old mappings are simply written 754 * over. The page *must* be wired. 755 */ 756void 757pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 758{ 759 vm_offset_t end_va; 760 761 end_va = va + count * PAGE_SIZE; 762 763 while (va < end_va) { 764 pt_entry_t *pte; 765 766 pte = vtopte(va); 767 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; 768#ifdef SMP 769 cpu_invlpg((void *)va); 770#else 771 invltlb_1pg(va); 772#endif 773 va += PAGE_SIZE; 774 m++; 775 } 776#ifdef SMP 777 smp_invltlb(); 778#endif 779} 780 781/* 782 * this routine jerks page mappings from the 783 * kernel -- it is meant only for temporary mappings. 784 */ 785void 786pmap_qremove(vm_offset_t va, int count) 787{ 788 vm_offset_t end_va; 789 790 end_va = va + count*PAGE_SIZE; 791 792 while (va < end_va) { 793 pt_entry_t *pte; 794 795 pte = vtopte(va); 796 *pte = 0; 797#ifdef SMP 798 cpu_invlpg((void *)va); 799#else 800 invltlb_1pg(va); 801#endif 802 va += PAGE_SIZE; 803 } 804#ifdef SMP 805 smp_invltlb(); 806#endif 807} 808 809static vm_page_t 810pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 811{ 812 vm_page_t m; 813retry: 814 m = vm_page_lookup(object, pindex); 815 if (m && vm_page_sleep_busy(m, FALSE, "pplookp")) 816 goto retry; 817 return m; 818} 819 820/* 821 * Create the Uarea stack for a new process. 822 * This routine directly affects the fork perf for a process. 823 */ 824void 825pmap_new_proc(struct proc *p) 826{ 827#ifdef I386_CPU 828 int updateneeded = 0; 829#endif 830 int i; 831 vm_object_t upobj; 832 vm_offset_t up; 833 vm_page_t m; 834 pt_entry_t *ptek, oldpte; 835 836 /* 837 * allocate object for the upages 838 */ 839 upobj = p->p_upages_obj; 840 if (upobj == NULL) { 841 upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES); 842 p->p_upages_obj = upobj; 843 } 844 845 /* get a kernel virtual address for the U area for this thread */ 846 up = (vm_offset_t)p->p_uarea; 847 if (up == 0) { 848 up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE); 849 if (up == 0) 850 panic("pmap_new_proc: upage allocation failed"); 851 p->p_uarea = (struct user *)up; 852 } 853 854 ptek = vtopte(up); 855 856 for (i = 0; i < UAREA_PAGES; i++) { 857 /* 858 * Get a kernel stack page 859 */ 860 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 861 862 /* 863 * Wire the page 864 */ 865 m->wire_count++; 866 cnt.v_wire_count++; 867 868 oldpte = *(ptek + i); 869 /* 870 * Enter the page into the kernel address space. 871 */ 872 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 873 if (oldpte) { 874#ifdef I386_CPU 875 updateneeded = 1; 876#else 877 invlpg(up + i * PAGE_SIZE); 878#endif 879 } 880 881 vm_page_wakeup(m); 882 vm_page_flag_clear(m, PG_ZERO); 883 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 884 m->valid = VM_PAGE_BITS_ALL; 885 } 886#ifdef I386_CPU 887 if (updateneeded) 888 invltlb(); 889#endif 890} 891 892/* 893 * Dispose the U-Area for a process that has exited. 894 * This routine directly impacts the exit perf of a process. 895 */ 896void 897pmap_dispose_proc(p) 898 struct proc *p; 899{ 900 int i; 901 vm_object_t upobj; 902 vm_offset_t up; 903 vm_page_t m; 904 pt_entry_t *ptek, oldpte; 905 906 upobj = p->p_upages_obj; 907 up = (vm_offset_t)p->p_uarea; 908 ptek = vtopte(up); 909 for (i = 0; i < UAREA_PAGES; i++) { 910 m = vm_page_lookup(upobj, i); 911 if (m == NULL) 912 panic("pmap_dispose_proc: upage already missing?"); 913 vm_page_busy(m); 914 oldpte = *(ptek + i); 915 *(ptek + i) = 0; 916#ifndef I386_CPU 917 invlpg(up + i * PAGE_SIZE); 918#endif 919 vm_page_unwire(m, 0); 920 vm_page_free(m); 921 } 922#ifdef I386_CPU 923 invltlb(); 924#endif 925} 926 927/* 928 * Allow the U_AREA for a process to be prejudicially paged out. 929 */ 930void 931pmap_swapout_proc(p) 932 struct proc *p; 933{ 934 int i; 935 vm_object_t upobj; 936 vm_offset_t up; 937 vm_page_t m; 938 939 upobj = p->p_upages_obj; 940 up = (vm_offset_t)p->p_uarea; 941 for (i = 0; i < UAREA_PAGES; i++) { 942 m = vm_page_lookup(upobj, i); 943 if (m == NULL) 944 panic("pmap_swapout_proc: upage already missing?"); 945 vm_page_dirty(m); 946 vm_page_unwire(m, 0); 947 pmap_kremove(up + i * PAGE_SIZE); 948 } 949} 950 951/* 952 * Bring the U-Area for a specified process back in. 953 */ 954void 955pmap_swapin_proc(p) 956 struct proc *p; 957{ 958 int i, rv; 959 vm_object_t upobj; 960 vm_offset_t up; 961 vm_page_t m; 962 963 upobj = p->p_upages_obj; 964 up = (vm_offset_t)p->p_uarea; 965 for (i = 0; i < UAREA_PAGES; i++) { 966 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 967 pmap_kenter(up + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 968 if (m->valid != VM_PAGE_BITS_ALL) { 969 rv = vm_pager_get_pages(upobj, &m, 1, 0); 970 if (rv != VM_PAGER_OK) 971 panic("pmap_swapin_proc: cannot get upage for proc: %d\n", p->p_pid); 972 m = vm_page_lookup(upobj, i); 973 m->valid = VM_PAGE_BITS_ALL; 974 } 975 vm_page_wire(m); 976 vm_page_wakeup(m); 977 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 978 } 979} 980 981/* 982 * Create the kernel stack (including pcb for i386) for a new thread. 983 * This routine directly affects the fork perf for a process and 984 * create performance for a thread. 985 */ 986void 987pmap_new_thread(struct thread *td) 988{ 989#ifdef I386_CPU 990 int updateneeded = 0; 991#endif 992 int i; 993 vm_object_t ksobj; 994 vm_page_t m; 995 vm_offset_t ks; 996 pt_entry_t *ptek, oldpte; 997 998 /* 999 * allocate object for the kstack 1000 */ 1001 ksobj = td->td_kstack_obj; 1002 if (ksobj == NULL) { 1003 ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES); 1004 td->td_kstack_obj = ksobj; 1005 } 1006 1007#ifdef KSTACK_GUARD 1008 /* get a kernel virtual address for the kstack for this thread */ 1009 ks = td->td_kstack; 1010 if (ks == 0) { 1011 ks = kmem_alloc_nofault(kernel_map, 1012 (KSTACK_PAGES + 1) * PAGE_SIZE); 1013 if (ks == 0) 1014 panic("pmap_new_thread: kstack allocation failed"); 1015 ks += PAGE_SIZE; 1016 td->td_kstack = ks; 1017 } 1018 1019 ptek = vtopte(ks - PAGE_SIZE); 1020 oldpte = *ptek; 1021 *ptek = 0; 1022 if (oldpte) { 1023#ifdef I386_CPU 1024 updateneeded = 1; 1025#else 1026 invlpg(ks - PAGE_SIZE); 1027#endif 1028 } 1029 ptek++; 1030#else 1031 /* get a kernel virtual address for the kstack for this thread */ 1032 ks = td->td_kstack; 1033 if (ks == 0) { 1034 ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE); 1035 if (ks == 0) 1036 panic("pmap_new_thread: kstack allocation failed"); 1037 td->td_kstack = ks; 1038 } 1039 ptek = vtopte(ks); 1040#endif 1041 for (i = 0; i < KSTACK_PAGES; i++) { 1042 /* 1043 * Get a kernel stack page 1044 */ 1045 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1046 1047 /* 1048 * Wire the page 1049 */ 1050 m->wire_count++; 1051 cnt.v_wire_count++; 1052 1053 oldpte = *(ptek + i); 1054 /* 1055 * Enter the page into the kernel address space. 1056 */ 1057 *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; 1058 if (oldpte) { 1059#ifdef I386_CPU 1060 updateneeded = 1; 1061#else 1062 invlpg(ks + i * PAGE_SIZE); 1063#endif 1064 } 1065 1066 vm_page_wakeup(m); 1067 vm_page_flag_clear(m, PG_ZERO); 1068 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1069 m->valid = VM_PAGE_BITS_ALL; 1070 } 1071#ifdef I386_CPU 1072 if (updateneeded) 1073 invltlb(); 1074#endif 1075} 1076 1077/* 1078 * Dispose the kernel stack for a thread that has exited. 1079 * This routine directly impacts the exit perf of a process and thread. 1080 */ 1081void 1082pmap_dispose_thread(td) 1083 struct thread *td; 1084{ 1085 int i; 1086 vm_object_t ksobj; 1087 vm_offset_t ks; 1088 vm_page_t m; 1089 pt_entry_t *ptek, oldpte; 1090 1091 ksobj = td->td_kstack_obj; 1092 ks = td->td_kstack; 1093 ptek = vtopte(ks); 1094 for (i = 0; i < KSTACK_PAGES; i++) { 1095 m = vm_page_lookup(ksobj, i); 1096 if (m == NULL) 1097 panic("pmap_dispose_thread: kstack already missing?"); 1098 vm_page_busy(m); 1099 oldpte = *(ptek + i); 1100 *(ptek + i) = 0; 1101#ifndef I386_CPU 1102 invlpg(ks + i * PAGE_SIZE); 1103#endif 1104 vm_page_unwire(m, 0); 1105 vm_page_free(m); 1106 } 1107#ifdef I386_CPU 1108 invltlb(); 1109#endif 1110} 1111 1112/* 1113 * Allow the Kernel stack for a thread to be prejudicially paged out. 1114 */ 1115void 1116pmap_swapout_thread(td) 1117 struct thread *td; 1118{ 1119 int i; 1120 vm_object_t ksobj; 1121 vm_offset_t ks; 1122 vm_page_t m; 1123 1124 ksobj = td->td_kstack_obj; 1125 ks = td->td_kstack; 1126 for (i = 0; i < KSTACK_PAGES; i++) { 1127 m = vm_page_lookup(ksobj, i); 1128 if (m == NULL) 1129 panic("pmap_swapout_thread: kstack already missing?"); 1130 vm_page_dirty(m); 1131 vm_page_unwire(m, 0); 1132 pmap_kremove(ks + i * PAGE_SIZE); 1133 } 1134} 1135 1136/* 1137 * Bring the kernel stack for a specified thread back in. 1138 */ 1139void 1140pmap_swapin_thread(td) 1141 struct thread *td; 1142{ 1143 int i, rv; 1144 vm_object_t ksobj; 1145 vm_offset_t ks; 1146 vm_page_t m; 1147 1148 ksobj = td->td_kstack_obj; 1149 ks = td->td_kstack; 1150 for (i = 0; i < KSTACK_PAGES; i++) { 1151 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1152 pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 1153 if (m->valid != VM_PAGE_BITS_ALL) { 1154 rv = vm_pager_get_pages(ksobj, &m, 1, 0); 1155 if (rv != VM_PAGER_OK) 1156 panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); 1157 m = vm_page_lookup(ksobj, i); 1158 m->valid = VM_PAGE_BITS_ALL; 1159 } 1160 vm_page_wire(m); 1161 vm_page_wakeup(m); 1162 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1163 } 1164} 1165 1166/*************************************************** 1167 * Page table page management routines..... 1168 ***************************************************/ 1169 1170/* 1171 * This routine unholds page table pages, and if the hold count 1172 * drops to zero, then it decrements the wire count. 1173 */ 1174static int 1175_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1176{ 1177 1178 while (vm_page_sleep_busy(m, FALSE, "pmuwpt")) 1179 ; 1180 1181 if (m->hold_count == 0) { 1182 vm_offset_t pteva; 1183 /* 1184 * unmap the page table page 1185 */ 1186 pmap->pm_pdir[m->pindex] = 0; 1187 --pmap->pm_stats.resident_count; 1188 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1189 (PTDpde & PG_FRAME)) { 1190 /* 1191 * Do a invltlb to make the invalidated mapping 1192 * take effect immediately. 1193 */ 1194 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1195 pmap_invalidate_page(pmap, pteva); 1196 } 1197 1198 if (pmap->pm_ptphint == m) 1199 pmap->pm_ptphint = NULL; 1200 1201 /* 1202 * If the page is finally unwired, simply free it. 1203 */ 1204 --m->wire_count; 1205 if (m->wire_count == 0) { 1206 1207 vm_page_flash(m); 1208 vm_page_busy(m); 1209 vm_page_free_zero(m); 1210 --cnt.v_wire_count; 1211 } 1212 return 1; 1213 } 1214 return 0; 1215} 1216 1217static PMAP_INLINE int 1218pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1219{ 1220 vm_page_unhold(m); 1221 if (m->hold_count == 0) 1222 return _pmap_unwire_pte_hold(pmap, m); 1223 else 1224 return 0; 1225} 1226 1227/* 1228 * After removing a page table entry, this routine is used to 1229 * conditionally free the page, and manage the hold/wire counts. 1230 */ 1231static int 1232pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1233{ 1234 unsigned ptepindex; 1235 if (va >= VM_MAXUSER_ADDRESS) 1236 return 0; 1237 1238 if (mpte == NULL) { 1239 ptepindex = (va >> PDRSHIFT); 1240 if (pmap->pm_ptphint && 1241 (pmap->pm_ptphint->pindex == ptepindex)) { 1242 mpte = pmap->pm_ptphint; 1243 } else { 1244 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 1245 pmap->pm_ptphint = mpte; 1246 } 1247 } 1248 1249 return pmap_unwire_pte_hold(pmap, mpte); 1250} 1251 1252void 1253pmap_pinit0(pmap) 1254 struct pmap *pmap; 1255{ 1256 pmap->pm_pdir = 1257 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1258 pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD); 1259 pmap->pm_count = 1; 1260 pmap->pm_ptphint = NULL; 1261 pmap->pm_active = 0; 1262 TAILQ_INIT(&pmap->pm_pvlist); 1263 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1264 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1265} 1266 1267/* 1268 * Initialize a preallocated and zeroed pmap structure, 1269 * such as one in a vmspace structure. 1270 */ 1271void 1272pmap_pinit(pmap) 1273 register struct pmap *pmap; 1274{ 1275 vm_page_t ptdpg; 1276 1277 /* 1278 * No need to allocate page table space yet but we do need a valid 1279 * page directory table. 1280 */ 1281 if (pmap->pm_pdir == NULL) 1282 pmap->pm_pdir = 1283 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE); 1284 1285 /* 1286 * allocate object for the ptes 1287 */ 1288 if (pmap->pm_pteobj == NULL) 1289 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); 1290 1291 /* 1292 * allocate the page directory page 1293 */ 1294 ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI, 1295 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1296 1297 ptdpg->wire_count = 1; 1298 ++cnt.v_wire_count; 1299 1300 1301 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/ 1302 ptdpg->valid = VM_PAGE_BITS_ALL; 1303 1304 pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg)); 1305 if ((ptdpg->flags & PG_ZERO) == 0) 1306 bzero(pmap->pm_pdir, PAGE_SIZE); 1307 1308 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1309 /* Wire in kernel global address entries. */ 1310 /* XXX copies current process, does not fill in MPPTDI */ 1311 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); 1312#ifdef SMP 1313 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 1314#endif 1315 1316 /* install self-referential address mapping entry */ 1317 pmap->pm_pdir[PTDPTDI] = 1318 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M; 1319 1320 pmap->pm_count = 1; 1321 pmap->pm_active = 0; 1322 pmap->pm_ptphint = NULL; 1323 TAILQ_INIT(&pmap->pm_pvlist); 1324 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1325} 1326 1327/* 1328 * Wire in kernel global address entries. To avoid a race condition 1329 * between pmap initialization and pmap_growkernel, this procedure 1330 * should be called after the vmspace is attached to the process 1331 * but before this pmap is activated. 1332 */ 1333void 1334pmap_pinit2(pmap) 1335 struct pmap *pmap; 1336{ 1337 /* XXX: Remove this stub when no longer called */ 1338} 1339 1340static int 1341pmap_release_free_page(pmap_t pmap, vm_page_t p) 1342{ 1343 pd_entry_t *pde = pmap->pm_pdir; 1344 /* 1345 * This code optimizes the case of freeing non-busy 1346 * page-table pages. Those pages are zero now, and 1347 * might as well be placed directly into the zero queue. 1348 */ 1349 if (vm_page_sleep_busy(p, FALSE, "pmaprl")) 1350 return 0; 1351 1352 vm_page_busy(p); 1353 1354 /* 1355 * Remove the page table page from the processes address space. 1356 */ 1357 pde[p->pindex] = 0; 1358 pmap->pm_stats.resident_count--; 1359 1360 if (p->hold_count) { 1361 panic("pmap_release: freeing held page table page"); 1362 } 1363 /* 1364 * Page directory pages need to have the kernel 1365 * stuff cleared, so they can go into the zero queue also. 1366 */ 1367 if (p->pindex == PTDPTDI) { 1368 bzero(pde + KPTDI, nkpt * PTESIZE); 1369#ifdef SMP 1370 pde[MPPTDI] = 0; 1371#endif 1372 pde[APTDPTDI] = 0; 1373 pmap_kremove((vm_offset_t) pmap->pm_pdir); 1374 } 1375 1376 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1377 pmap->pm_ptphint = NULL; 1378 1379 p->wire_count--; 1380 cnt.v_wire_count--; 1381 vm_page_free_zero(p); 1382 return 1; 1383} 1384 1385/* 1386 * this routine is called if the page table page is not 1387 * mapped correctly. 1388 */ 1389static vm_page_t 1390_pmap_allocpte(pmap, ptepindex) 1391 pmap_t pmap; 1392 unsigned ptepindex; 1393{ 1394 vm_offset_t pteva, ptepa; /* XXXPA */ 1395 vm_page_t m; 1396 1397 /* 1398 * Find or fabricate a new pagetable page 1399 */ 1400 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1401 VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1402 1403 KASSERT(m->queue == PQ_NONE, 1404 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1405 1406 if (m->wire_count == 0) 1407 cnt.v_wire_count++; 1408 m->wire_count++; 1409 1410 /* 1411 * Increment the hold count for the page table page 1412 * (denoting a new mapping.) 1413 */ 1414 m->hold_count++; 1415 1416 /* 1417 * Map the pagetable page into the process address space, if 1418 * it isn't already there. 1419 */ 1420 1421 pmap->pm_stats.resident_count++; 1422 1423 ptepa = VM_PAGE_TO_PHYS(m); 1424 pmap->pm_pdir[ptepindex] = 1425 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1426 1427 /* 1428 * Set the page table hint 1429 */ 1430 pmap->pm_ptphint = m; 1431 1432 /* 1433 * Try to use the new mapping, but if we cannot, then 1434 * do it with the routine that maps the page explicitly. 1435 */ 1436 if ((m->flags & PG_ZERO) == 0) { 1437 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == 1438 (PTDpde & PG_FRAME)) { 1439 pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); 1440 bzero((caddr_t) pteva, PAGE_SIZE); 1441 } else { 1442 pmap_zero_page(ptepa); 1443 } 1444 } 1445 1446 m->valid = VM_PAGE_BITS_ALL; 1447 vm_page_flag_clear(m, PG_ZERO); 1448 vm_page_flag_set(m, PG_MAPPED); 1449 vm_page_wakeup(m); 1450 1451 return m; 1452} 1453 1454static vm_page_t 1455pmap_allocpte(pmap_t pmap, vm_offset_t va) 1456{ 1457 unsigned ptepindex; 1458 pd_entry_t ptepa; 1459 vm_page_t m; 1460 1461 /* 1462 * Calculate pagetable page index 1463 */ 1464 ptepindex = va >> PDRSHIFT; 1465 1466 /* 1467 * Get the page directory entry 1468 */ 1469 ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; 1470 1471 /* 1472 * This supports switching from a 4MB page to a 1473 * normal 4K page. 1474 */ 1475 if (ptepa & PG_PS) { 1476 pmap->pm_pdir[ptepindex] = 0; 1477 ptepa = 0; 1478 invltlb(); 1479 } 1480 1481 /* 1482 * If the page table page is mapped, we just increment the 1483 * hold count, and activate it. 1484 */ 1485 if (ptepa) { 1486 /* 1487 * In order to get the page table page, try the 1488 * hint first. 1489 */ 1490 if (pmap->pm_ptphint && 1491 (pmap->pm_ptphint->pindex == ptepindex)) { 1492 m = pmap->pm_ptphint; 1493 } else { 1494 m = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 1495 pmap->pm_ptphint = m; 1496 } 1497 m->hold_count++; 1498 return m; 1499 } 1500 /* 1501 * Here if the pte page isn't mapped, or if it has been deallocated. 1502 */ 1503 return _pmap_allocpte(pmap, ptepindex); 1504} 1505 1506 1507/*************************************************** 1508* Pmap allocation/deallocation routines. 1509 ***************************************************/ 1510 1511/* 1512 * Release any resources held by the given physical map. 1513 * Called when a pmap initialized by pmap_pinit is being released. 1514 * Should only be called if the map contains no valid mappings. 1515 */ 1516void 1517pmap_release(pmap_t pmap) 1518{ 1519 vm_page_t p,n,ptdpg; 1520 vm_object_t object = pmap->pm_pteobj; 1521 int curgeneration; 1522 1523#if defined(DIAGNOSTIC) 1524 if (object->ref_count != 1) 1525 panic("pmap_release: pteobj reference count != 1"); 1526#endif 1527 1528 ptdpg = NULL; 1529 LIST_REMOVE(pmap, pm_list); 1530retry: 1531 curgeneration = object->generation; 1532 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) { 1533 n = TAILQ_NEXT(p, listq); 1534 if (p->pindex == PTDPTDI) { 1535 ptdpg = p; 1536 continue; 1537 } 1538 while (1) { 1539 if (!pmap_release_free_page(pmap, p) && 1540 (object->generation != curgeneration)) 1541 goto retry; 1542 } 1543 } 1544 1545 if (ptdpg && !pmap_release_free_page(pmap, ptdpg)) 1546 goto retry; 1547} 1548 1549static int 1550kvm_size(SYSCTL_HANDLER_ARGS) 1551{ 1552 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1553 1554 return sysctl_handle_long(oidp, &ksize, 0, req); 1555} 1556SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1557 0, 0, kvm_size, "IU", "Size of KVM"); 1558 1559static int 1560kvm_free(SYSCTL_HANDLER_ARGS) 1561{ 1562 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1563 1564 return sysctl_handle_long(oidp, &kfree, 0, req); 1565} 1566SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1567 0, 0, kvm_free, "IU", "Amount of KVM free"); 1568 1569/* 1570 * grow the number of kernel page table entries, if needed 1571 */ 1572void 1573pmap_growkernel(vm_offset_t addr) 1574{ 1575 struct pmap *pmap; 1576 int s; 1577 vm_offset_t ptppaddr; 1578 vm_page_t nkpg; 1579 pd_entry_t newpdir; 1580 1581 s = splhigh(); 1582 if (kernel_vm_end == 0) { 1583 kernel_vm_end = KERNBASE; 1584 nkpt = 0; 1585 while (pdir_pde(PTD, kernel_vm_end)) { 1586 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1587 nkpt++; 1588 } 1589 } 1590 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1591 while (kernel_vm_end < addr) { 1592 if (pdir_pde(PTD, kernel_vm_end)) { 1593 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1594 continue; 1595 } 1596 1597 /* 1598 * This index is bogus, but out of the way 1599 */ 1600 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM); 1601 if (!nkpg) 1602 panic("pmap_growkernel: no memory to grow kernel"); 1603 1604 nkpt++; 1605 1606 vm_page_wire(nkpg); 1607 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1608 pmap_zero_page(ptppaddr); 1609 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1610 pdir_pde(PTD, kernel_vm_end) = newpdir; 1611 1612 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1613 *pmap_pde(pmap, kernel_vm_end) = newpdir; 1614 } 1615 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1616 } 1617 splx(s); 1618} 1619 1620/* 1621 * Retire the given physical map from service. 1622 * Should only be called if the map contains 1623 * no valid mappings. 1624 */ 1625void 1626pmap_destroy(pmap_t pmap) 1627{ 1628 int count; 1629 1630 if (pmap == NULL) 1631 return; 1632 1633 count = --pmap->pm_count; 1634 if (count == 0) { 1635 pmap_release(pmap); 1636 panic("destroying a pmap is not yet implemented"); 1637 } 1638} 1639 1640/* 1641 * Add a reference to the specified pmap. 1642 */ 1643void 1644pmap_reference(pmap_t pmap) 1645{ 1646 if (pmap != NULL) { 1647 pmap->pm_count++; 1648 } 1649} 1650 1651/*************************************************** 1652* page management routines. 1653 ***************************************************/ 1654 1655/* 1656 * free the pv_entry back to the free list 1657 */ 1658static PMAP_INLINE void 1659free_pv_entry(pv_entry_t pv) 1660{ 1661 pv_entry_count--; 1662 zfree(pvzone, pv); 1663} 1664 1665/* 1666 * get a new pv_entry, allocating a block from the system 1667 * when needed. 1668 * the memory allocation is performed bypassing the malloc code 1669 * because of the possibility of allocations at interrupt time. 1670 */ 1671static pv_entry_t 1672get_pv_entry(void) 1673{ 1674 pv_entry_count++; 1675 if (pv_entry_high_water && 1676 (pv_entry_count > pv_entry_high_water) && 1677 (pmap_pagedaemon_waken == 0)) { 1678 pmap_pagedaemon_waken = 1; 1679 wakeup (&vm_pages_needed); 1680 } 1681 return zalloc(pvzone); 1682} 1683 1684/* 1685 * This routine is very drastic, but can save the system 1686 * in a pinch. 1687 */ 1688void 1689pmap_collect() 1690{ 1691 int i; 1692 vm_page_t m; 1693 static int warningdone = 0; 1694 1695 if (pmap_pagedaemon_waken == 0) 1696 return; 1697 1698 if (warningdone < 5) { 1699 printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); 1700 warningdone++; 1701 } 1702 1703 for(i = 0; i < vm_page_array_size; i++) { 1704 m = &vm_page_array[i]; 1705 if (m->wire_count || m->hold_count || m->busy || 1706 (m->flags & (PG_BUSY | PG_UNMANAGED))) 1707 continue; 1708 pmap_remove_all(m); 1709 } 1710 pmap_pagedaemon_waken = 0; 1711} 1712 1713 1714/* 1715 * If it is the first entry on the list, it is actually 1716 * in the header and we must copy the following entry up 1717 * to the header. Otherwise we must search the list for 1718 * the entry. In either case we free the now unused entry. 1719 */ 1720 1721static int 1722pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1723{ 1724 pv_entry_t pv; 1725 int rtval; 1726 int s; 1727 1728 s = splvm(); 1729 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1730 for (pv = TAILQ_FIRST(&m->md.pv_list); 1731 pv; 1732 pv = TAILQ_NEXT(pv, pv_list)) { 1733 if (pmap == pv->pv_pmap && va == pv->pv_va) 1734 break; 1735 } 1736 } else { 1737 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); 1738 pv; 1739 pv = TAILQ_NEXT(pv, pv_plist)) { 1740 if (va == pv->pv_va) 1741 break; 1742 } 1743 } 1744 1745 rtval = 0; 1746 if (pv) { 1747 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1748 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1749 m->md.pv_list_count--; 1750 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1751 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1752 1753 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1754 free_pv_entry(pv); 1755 } 1756 1757 splx(s); 1758 return rtval; 1759} 1760 1761/* 1762 * Create a pv entry for page at pa for 1763 * (pmap, va). 1764 */ 1765static void 1766pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1767{ 1768 1769 int s; 1770 pv_entry_t pv; 1771 1772 s = splvm(); 1773 pv = get_pv_entry(); 1774 pv->pv_va = va; 1775 pv->pv_pmap = pmap; 1776 pv->pv_ptem = mpte; 1777 1778 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1779 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1780 m->md.pv_list_count++; 1781 1782 splx(s); 1783} 1784 1785/* 1786 * pmap_remove_pte: do the things to unmap a page in a process 1787 */ 1788static int 1789pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1790{ 1791 pt_entry_t oldpte; 1792 vm_page_t m; 1793 1794 oldpte = atomic_readandclear_int(ptq); 1795 if (oldpte & PG_W) 1796 pmap->pm_stats.wired_count -= 1; 1797 /* 1798 * Machines that don't support invlpg, also don't support 1799 * PG_G. 1800 */ 1801 if (oldpte & PG_G) 1802 invlpg(va); 1803 pmap->pm_stats.resident_count -= 1; 1804 if (oldpte & PG_MANAGED) { 1805 m = PHYS_TO_VM_PAGE(oldpte); 1806 if (oldpte & PG_M) { 1807#if defined(PMAP_DIAGNOSTIC) 1808 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1809 printf( 1810 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1811 va, oldpte); 1812 } 1813#endif 1814 if (pmap_track_modified(va)) 1815 vm_page_dirty(m); 1816 } 1817 if (oldpte & PG_A) 1818 vm_page_flag_set(m, PG_REFERENCED); 1819 return pmap_remove_entry(pmap, m, va); 1820 } else { 1821 return pmap_unuse_pt(pmap, va, NULL); 1822 } 1823 1824 return 0; 1825} 1826 1827/* 1828 * Remove a single page from a process address space 1829 */ 1830static void 1831pmap_remove_page(pmap_t pmap, vm_offset_t va) 1832{ 1833 register pt_entry_t *ptq; 1834 1835 /* 1836 * if there is no pte for this address, just skip it!!! 1837 */ 1838 if (*pmap_pde(pmap, va) == 0) { 1839 return; 1840 } 1841 1842 /* 1843 * get a local va for mappings for this pmap. 1844 */ 1845 ptq = get_ptbase(pmap) + i386_btop(va); 1846 if (*ptq) { 1847 (void) pmap_remove_pte(pmap, ptq, va); 1848 pmap_invalidate_page(pmap, va); 1849 } 1850 return; 1851} 1852 1853/* 1854 * Remove the given range of addresses from the specified map. 1855 * 1856 * It is assumed that the start and end are properly 1857 * rounded to the page size. 1858 */ 1859void 1860pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1861{ 1862 register pt_entry_t *ptbase; 1863 vm_offset_t pdnxt; 1864 pd_entry_t ptpaddr; 1865 vm_offset_t sindex, eindex; 1866 int anyvalid; 1867 1868 if (pmap == NULL) 1869 return; 1870 1871 if (pmap->pm_stats.resident_count == 0) 1872 return; 1873 1874 /* 1875 * special handling of removing one page. a very 1876 * common operation and easy to short circuit some 1877 * code. 1878 */ 1879 if ((sva + PAGE_SIZE == eva) && 1880 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1881 pmap_remove_page(pmap, sva); 1882 return; 1883 } 1884 1885 anyvalid = 0; 1886 1887 /* 1888 * Get a local virtual address for the mappings that are being 1889 * worked with. 1890 */ 1891 ptbase = get_ptbase(pmap); 1892 1893 sindex = i386_btop(sva); 1894 eindex = i386_btop(eva); 1895 1896 for (; sindex < eindex; sindex = pdnxt) { 1897 unsigned pdirindex; 1898 1899 /* 1900 * Calculate index for next page table. 1901 */ 1902 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 1903 if (pmap->pm_stats.resident_count == 0) 1904 break; 1905 1906 pdirindex = sindex / NPDEPG; 1907 ptpaddr = pmap->pm_pdir[pdirindex]; 1908 if ((ptpaddr & PG_PS) != 0) { 1909 pmap->pm_pdir[pdirindex] = 0; 1910 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1911 anyvalid++; 1912 continue; 1913 } 1914 1915 /* 1916 * Weed out invalid mappings. Note: we assume that the page 1917 * directory table is always allocated, and in kernel virtual. 1918 */ 1919 if (ptpaddr == 0) 1920 continue; 1921 1922 /* 1923 * Limit our scan to either the end of the va represented 1924 * by the current page table page, or to the end of the 1925 * range being removed. 1926 */ 1927 if (pdnxt > eindex) { 1928 pdnxt = eindex; 1929 } 1930 1931 for ( ;sindex != pdnxt; sindex++) { 1932 vm_offset_t va; 1933 if (ptbase[sindex] == 0) { 1934 continue; 1935 } 1936 va = i386_ptob(sindex); 1937 1938 anyvalid++; 1939 if (pmap_remove_pte(pmap, 1940 ptbase + sindex, va)) 1941 break; 1942 } 1943 } 1944 1945 if (anyvalid) 1946 pmap_invalidate_all(pmap); 1947} 1948 1949/* 1950 * Routine: pmap_remove_all 1951 * Function: 1952 * Removes this physical page from 1953 * all physical maps in which it resides. 1954 * Reflects back modify bits to the pager. 1955 * 1956 * Notes: 1957 * Original versions of this routine were very 1958 * inefficient because they iteratively called 1959 * pmap_remove (slow...) 1960 */ 1961 1962static void 1963pmap_remove_all(vm_page_t m) 1964{ 1965 register pv_entry_t pv; 1966 pt_entry_t *pte, tpte; 1967 int s; 1968 1969#if defined(PMAP_DIAGNOSTIC) 1970 /* 1971 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1972 * pages! 1973 */ 1974 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1975 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m)); 1976 } 1977#endif 1978 1979 s = splvm(); 1980 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1981 pv->pv_pmap->pm_stats.resident_count--; 1982 1983 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 1984 1985 tpte = atomic_readandclear_int(pte); 1986 if (tpte & PG_W) 1987 pv->pv_pmap->pm_stats.wired_count--; 1988 1989 if (tpte & PG_A) 1990 vm_page_flag_set(m, PG_REFERENCED); 1991 1992 /* 1993 * Update the vm_page_t clean and reference bits. 1994 */ 1995 if (tpte & PG_M) { 1996#if defined(PMAP_DIAGNOSTIC) 1997 if (pmap_nw_modified((pt_entry_t) tpte)) { 1998 printf( 1999 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 2000 pv->pv_va, tpte); 2001 } 2002#endif 2003 if (pmap_track_modified(pv->pv_va)) 2004 vm_page_dirty(m); 2005 } 2006 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2007 2008 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2009 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2010 m->md.pv_list_count--; 2011 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2012 free_pv_entry(pv); 2013 } 2014 2015 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2016 2017 splx(s); 2018} 2019 2020/* 2021 * Set the physical protection on the 2022 * specified range of this map as requested. 2023 */ 2024void 2025pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2026{ 2027 register pt_entry_t *ptbase; 2028 vm_offset_t pdnxt; 2029 pd_entry_t ptpaddr; 2030 vm_pindex_t sindex, eindex; 2031 int anychanged; 2032 2033 if (pmap == NULL) 2034 return; 2035 2036 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2037 pmap_remove(pmap, sva, eva); 2038 return; 2039 } 2040 2041 if (prot & VM_PROT_WRITE) 2042 return; 2043 2044 anychanged = 0; 2045 2046 ptbase = get_ptbase(pmap); 2047 2048 sindex = i386_btop(sva); 2049 eindex = i386_btop(eva); 2050 2051 for (; sindex < eindex; sindex = pdnxt) { 2052 2053 unsigned pdirindex; 2054 2055 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); 2056 2057 pdirindex = sindex / NPDEPG; 2058 ptpaddr = pmap->pm_pdir[pdirindex]; 2059 if ((ptpaddr & PG_PS) != 0) { 2060 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2061 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2062 anychanged++; 2063 continue; 2064 } 2065 2066 /* 2067 * Weed out invalid mappings. Note: we assume that the page 2068 * directory table is always allocated, and in kernel virtual. 2069 */ 2070 if (ptpaddr == 0) 2071 continue; 2072 2073 if (pdnxt > eindex) { 2074 pdnxt = eindex; 2075 } 2076 2077 for (; sindex != pdnxt; sindex++) { 2078 2079 pt_entry_t pbits; 2080 vm_page_t m; 2081 2082 pbits = ptbase[sindex]; 2083 2084 if (pbits & PG_MANAGED) { 2085 m = NULL; 2086 if (pbits & PG_A) { 2087 m = PHYS_TO_VM_PAGE(pbits); 2088 vm_page_flag_set(m, PG_REFERENCED); 2089 pbits &= ~PG_A; 2090 } 2091 if (pbits & PG_M) { 2092 if (pmap_track_modified(i386_ptob(sindex))) { 2093 if (m == NULL) 2094 m = PHYS_TO_VM_PAGE(pbits); 2095 vm_page_dirty(m); 2096 pbits &= ~PG_M; 2097 } 2098 } 2099 } 2100 2101 pbits &= ~PG_RW; 2102 2103 if (pbits != ptbase[sindex]) { 2104 ptbase[sindex] = pbits; 2105 anychanged = 1; 2106 } 2107 } 2108 } 2109 if (anychanged) 2110 pmap_invalidate_all(pmap); 2111} 2112 2113/* 2114 * Insert the given physical page (p) at 2115 * the specified virtual address (v) in the 2116 * target physical map with the protection requested. 2117 * 2118 * If specified, the page will be wired down, meaning 2119 * that the related pte can not be reclaimed. 2120 * 2121 * NB: This is the only routine which MAY NOT lazy-evaluate 2122 * or lose information. That is, this routine must actually 2123 * insert this page into the given map NOW. 2124 */ 2125void 2126pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2127 boolean_t wired) 2128{ 2129 vm_offset_t pa; 2130 register pt_entry_t *pte; 2131 vm_offset_t opa; 2132 pt_entry_t origpte, newpte; 2133 vm_page_t mpte; 2134 2135 if (pmap == NULL) 2136 return; 2137 2138 va &= PG_FRAME; 2139#ifdef PMAP_DIAGNOSTIC 2140 if (va > VM_MAX_KERNEL_ADDRESS) 2141 panic("pmap_enter: toobig"); 2142 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2143 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2144#endif 2145 2146 mpte = NULL; 2147 /* 2148 * In the case that a page table page is not 2149 * resident, we are creating it here. 2150 */ 2151 if (va < VM_MAXUSER_ADDRESS) { 2152 mpte = pmap_allocpte(pmap, va); 2153 } 2154#if 0 && defined(PMAP_DIAGNOSTIC) 2155 else { 2156 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2157 if (((origpte = *pdeaddr) & PG_V) == 0) { 2158 panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n", 2159 pmap->pm_pdir[PTDPTDI], origpte, va); 2160 } 2161 if (smp_active) { 2162 pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)]; 2163 if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) { 2164 if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr)) 2165 printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr); 2166 printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr); 2167 panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n", 2168 pmap->pm_pdir[PTDPTDI], newpte, origpte, va); 2169 } 2170 } 2171 } 2172#endif 2173 2174 pte = pmap_pte(pmap, va); 2175 2176 /* 2177 * Page Directory table entry not valid, we need a new PT page 2178 */ 2179 if (pte == NULL) { 2180 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 2181 (void *)pmap->pm_pdir[PTDPTDI], va); 2182 } 2183 2184 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 2185 origpte = *(vm_offset_t *)pte; 2186 opa = origpte & PG_FRAME; 2187 2188 if (origpte & PG_PS) 2189 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2190 2191 /* 2192 * Mapping has not changed, must be protection or wiring change. 2193 */ 2194 if (origpte && (opa == pa)) { 2195 /* 2196 * Wiring change, just update stats. We don't worry about 2197 * wiring PT pages as they remain resident as long as there 2198 * are valid mappings in them. Hence, if a user page is wired, 2199 * the PT page will be also. 2200 */ 2201 if (wired && ((origpte & PG_W) == 0)) 2202 pmap->pm_stats.wired_count++; 2203 else if (!wired && (origpte & PG_W)) 2204 pmap->pm_stats.wired_count--; 2205 2206#if defined(PMAP_DIAGNOSTIC) 2207 if (pmap_nw_modified((pt_entry_t) origpte)) { 2208 printf( 2209 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 2210 va, origpte); 2211 } 2212#endif 2213 2214 /* 2215 * Remove extra pte reference 2216 */ 2217 if (mpte) 2218 mpte->hold_count--; 2219 2220 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 2221 if ((origpte & PG_RW) == 0) { 2222 *pte |= PG_RW; 2223#ifdef SMP 2224 cpu_invlpg((void *)va); 2225 if (pmap->pm_active & PCPU_GET(other_cpus)) 2226 smp_invltlb(); 2227#else 2228 invltlb_1pg(va); 2229#endif 2230 } 2231 return; 2232 } 2233 2234 /* 2235 * We might be turning off write access to the page, 2236 * so we go ahead and sense modify status. 2237 */ 2238 if (origpte & PG_MANAGED) { 2239 if ((origpte & PG_M) && pmap_track_modified(va)) { 2240 vm_page_t om; 2241 om = PHYS_TO_VM_PAGE(opa); 2242 vm_page_dirty(om); 2243 } 2244 pa |= PG_MANAGED; 2245 } 2246 goto validate; 2247 } 2248 /* 2249 * Mapping has changed, invalidate old range and fall through to 2250 * handle validating new mapping. 2251 */ 2252 if (opa) { 2253 int err; 2254 err = pmap_remove_pte(pmap, pte, va); 2255 if (err) 2256 panic("pmap_enter: pte vanished, va: 0x%x", va); 2257 } 2258 2259 /* 2260 * Enter on the PV list if part of our managed memory. Note that we 2261 * raise IPL while manipulating pv_table since pmap_enter can be 2262 * called at interrupt time. 2263 */ 2264 if (pmap_initialized && 2265 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2266 pmap_insert_entry(pmap, va, mpte, m); 2267 pa |= PG_MANAGED; 2268 } 2269 2270 /* 2271 * Increment counters 2272 */ 2273 pmap->pm_stats.resident_count++; 2274 if (wired) 2275 pmap->pm_stats.wired_count++; 2276 2277validate: 2278 /* 2279 * Now validate mapping with desired protection/wiring. 2280 */ 2281 newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V); 2282 2283 if (wired) 2284 newpte |= PG_W; 2285 if (va < VM_MAXUSER_ADDRESS) 2286 newpte |= PG_U; 2287 if (pmap == kernel_pmap) 2288 newpte |= pgeflag; 2289 2290 /* 2291 * if the mapping or permission bits are different, we need 2292 * to update the pte. 2293 */ 2294 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2295 *pte = newpte | PG_A; 2296 /*if (origpte)*/ { 2297#ifdef SMP 2298 cpu_invlpg((void *)va); 2299 if (pmap->pm_active & PCPU_GET(other_cpus)) 2300 smp_invltlb(); 2301#else 2302 invltlb_1pg(va); 2303#endif 2304 } 2305 } 2306} 2307 2308/* 2309 * this code makes some *MAJOR* assumptions: 2310 * 1. Current pmap & pmap exists. 2311 * 2. Not wired. 2312 * 3. Read access. 2313 * 4. No page table pages. 2314 * 5. Tlbflush is deferred to calling procedure. 2315 * 6. Page IS managed. 2316 * but is *MUCH* faster than pmap_enter... 2317 */ 2318 2319static vm_page_t 2320pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2321{ 2322 pt_entry_t *pte; 2323 vm_offset_t pa; 2324 2325 /* 2326 * In the case that a page table page is not 2327 * resident, we are creating it here. 2328 */ 2329 if (va < VM_MAXUSER_ADDRESS) { 2330 unsigned ptepindex; 2331 pd_entry_t ptepa; 2332 2333 /* 2334 * Calculate pagetable page index 2335 */ 2336 ptepindex = va >> PDRSHIFT; 2337 if (mpte && (mpte->pindex == ptepindex)) { 2338 mpte->hold_count++; 2339 } else { 2340retry: 2341 /* 2342 * Get the page directory entry 2343 */ 2344 ptepa = pmap->pm_pdir[ptepindex]; 2345 2346 /* 2347 * If the page table page is mapped, we just increment 2348 * the hold count, and activate it. 2349 */ 2350 if (ptepa) { 2351 if (ptepa & PG_PS) 2352 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2353 if (pmap->pm_ptphint && 2354 (pmap->pm_ptphint->pindex == ptepindex)) { 2355 mpte = pmap->pm_ptphint; 2356 } else { 2357 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2358 pmap->pm_ptphint = mpte; 2359 } 2360 if (mpte == NULL) 2361 goto retry; 2362 mpte->hold_count++; 2363 } else { 2364 mpte = _pmap_allocpte(pmap, ptepindex); 2365 } 2366 } 2367 } else { 2368 mpte = NULL; 2369 } 2370 2371 /* 2372 * This call to vtopte makes the assumption that we are 2373 * entering the page into the current pmap. In order to support 2374 * quick entry into any pmap, one would likely use pmap_pte_quick. 2375 * But that isn't as quick as vtopte. 2376 */ 2377 pte = vtopte(va); 2378 if (*pte) { 2379 if (mpte) 2380 pmap_unwire_pte_hold(pmap, mpte); 2381 return 0; 2382 } 2383 2384 /* 2385 * Enter on the PV list if part of our managed memory. Note that we 2386 * raise IPL while manipulating pv_table since pmap_enter can be 2387 * called at interrupt time. 2388 */ 2389 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2390 pmap_insert_entry(pmap, va, mpte, m); 2391 2392 /* 2393 * Increment counters 2394 */ 2395 pmap->pm_stats.resident_count++; 2396 2397 pa = VM_PAGE_TO_PHYS(m); 2398 2399 /* 2400 * Now validate mapping with RO protection 2401 */ 2402 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2403 *pte = pa | PG_V | PG_U; 2404 else 2405 *pte = pa | PG_V | PG_U | PG_MANAGED; 2406 2407 return mpte; 2408} 2409 2410/* 2411 * Make a temporary mapping for a physical address. This is only intended 2412 * to be used for panic dumps. 2413 */ 2414void * 2415pmap_kenter_temporary(vm_offset_t pa, int i) 2416{ 2417 pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); 2418 return ((void *)crashdumpmap); 2419} 2420 2421#define MAX_INIT_PT (96) 2422/* 2423 * pmap_object_init_pt preloads the ptes for a given object 2424 * into the specified pmap. This eliminates the blast of soft 2425 * faults on process startup and immediately after an mmap. 2426 */ 2427void 2428pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2429 vm_object_t object, vm_pindex_t pindex, 2430 vm_size_t size, int limit) 2431{ 2432 vm_offset_t tmpidx; 2433 int psize; 2434 vm_page_t p, mpte; 2435 int objpgs; 2436 2437 if (pmap == NULL || object == NULL) 2438 return; 2439 2440 /* 2441 * This code maps large physical mmap regions into the 2442 * processor address space. Note that some shortcuts 2443 * are taken, but the code works. 2444 */ 2445 if (pseflag && (object->type == OBJT_DEVICE) && 2446 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2447 int i; 2448 vm_page_t m[1]; 2449 unsigned int ptepindex; 2450 int npdes; 2451 pd_entry_t ptepa; 2452 2453 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2454 return; 2455 2456retry: 2457 p = vm_page_lookup(object, pindex); 2458 if (p && vm_page_sleep_busy(p, FALSE, "init4p")) 2459 goto retry; 2460 2461 if (p == NULL) { 2462 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2463 if (p == NULL) 2464 return; 2465 m[0] = p; 2466 2467 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2468 vm_page_free(p); 2469 return; 2470 } 2471 2472 p = vm_page_lookup(object, pindex); 2473 vm_page_wakeup(p); 2474 } 2475 2476 ptepa = VM_PAGE_TO_PHYS(p); 2477 if (ptepa & (NBPDR - 1)) { 2478 return; 2479 } 2480 2481 p->valid = VM_PAGE_BITS_ALL; 2482 2483 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2484 npdes = size >> PDRSHIFT; 2485 for(i = 0; i < npdes; i++) { 2486 pmap->pm_pdir[ptepindex] = 2487 ptepa | PG_U | PG_RW | PG_V | PG_PS; 2488 ptepa += NBPDR; 2489 ptepindex += 1; 2490 } 2491 vm_page_flag_set(p, PG_MAPPED); 2492 invltlb(); 2493 return; 2494 } 2495 2496 psize = i386_btop(size); 2497 2498 if ((object->type != OBJT_VNODE) || 2499 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2500 (object->resident_page_count > MAX_INIT_PT))) { 2501 return; 2502 } 2503 2504 if (psize + pindex > object->size) { 2505 if (object->size < pindex) 2506 return; 2507 psize = object->size - pindex; 2508 } 2509 2510 mpte = NULL; 2511 /* 2512 * if we are processing a major portion of the object, then scan the 2513 * entire thing. 2514 */ 2515 if (psize > (object->resident_page_count >> 2)) { 2516 objpgs = psize; 2517 2518 for (p = TAILQ_FIRST(&object->memq); 2519 ((objpgs > 0) && (p != NULL)); 2520 p = TAILQ_NEXT(p, listq)) { 2521 2522 tmpidx = p->pindex; 2523 if (tmpidx < pindex) { 2524 continue; 2525 } 2526 tmpidx -= pindex; 2527 if (tmpidx >= psize) { 2528 continue; 2529 } 2530 /* 2531 * don't allow an madvise to blow away our really 2532 * free pages allocating pv entries. 2533 */ 2534 if ((limit & MAP_PREFAULT_MADVISE) && 2535 cnt.v_free_count < cnt.v_free_reserved) { 2536 break; 2537 } 2538 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2539 (p->busy == 0) && 2540 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2541 if ((p->queue - p->pc) == PQ_CACHE) 2542 vm_page_deactivate(p); 2543 vm_page_busy(p); 2544 mpte = pmap_enter_quick(pmap, 2545 addr + i386_ptob(tmpidx), p, mpte); 2546 vm_page_flag_set(p, PG_MAPPED); 2547 vm_page_wakeup(p); 2548 } 2549 objpgs -= 1; 2550 } 2551 } else { 2552 /* 2553 * else lookup the pages one-by-one. 2554 */ 2555 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 2556 /* 2557 * don't allow an madvise to blow away our really 2558 * free pages allocating pv entries. 2559 */ 2560 if ((limit & MAP_PREFAULT_MADVISE) && 2561 cnt.v_free_count < cnt.v_free_reserved) { 2562 break; 2563 } 2564 p = vm_page_lookup(object, tmpidx + pindex); 2565 if (p && 2566 ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2567 (p->busy == 0) && 2568 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2569 if ((p->queue - p->pc) == PQ_CACHE) 2570 vm_page_deactivate(p); 2571 vm_page_busy(p); 2572 mpte = pmap_enter_quick(pmap, 2573 addr + i386_ptob(tmpidx), p, mpte); 2574 vm_page_flag_set(p, PG_MAPPED); 2575 vm_page_wakeup(p); 2576 } 2577 } 2578 } 2579 return; 2580} 2581 2582/* 2583 * pmap_prefault provides a quick way of clustering 2584 * pagefaults into a processes address space. It is a "cousin" 2585 * of pmap_object_init_pt, except it runs at page fault time instead 2586 * of mmap time. 2587 */ 2588#define PFBAK 4 2589#define PFFOR 4 2590#define PAGEORDER_SIZE (PFBAK+PFFOR) 2591 2592static int pmap_prefault_pageorder[] = { 2593 -PAGE_SIZE, PAGE_SIZE, 2594 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2595 -3 * PAGE_SIZE, 3 * PAGE_SIZE 2596 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2597}; 2598 2599void 2600pmap_prefault(pmap, addra, entry) 2601 pmap_t pmap; 2602 vm_offset_t addra; 2603 vm_map_entry_t entry; 2604{ 2605 int i; 2606 vm_offset_t starta; 2607 vm_offset_t addr; 2608 vm_pindex_t pindex; 2609 vm_page_t m, mpte; 2610 vm_object_t object; 2611 2612 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2613 return; 2614 2615 object = entry->object.vm_object; 2616 2617 starta = addra - PFBAK * PAGE_SIZE; 2618 if (starta < entry->start) { 2619 starta = entry->start; 2620 } else if (starta > addra) { 2621 starta = 0; 2622 } 2623 2624 mpte = NULL; 2625 for (i = 0; i < PAGEORDER_SIZE; i++) { 2626 vm_object_t lobject; 2627 pt_entry_t *pte; 2628 2629 addr = addra + pmap_prefault_pageorder[i]; 2630 if (addr > addra + (PFFOR * PAGE_SIZE)) 2631 addr = 0; 2632 2633 if (addr < starta || addr >= entry->end) 2634 continue; 2635 2636 if ((*pmap_pde(pmap, addr)) == NULL) 2637 continue; 2638 2639 pte = vtopte(addr); 2640 if (*pte) 2641 continue; 2642 2643 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2644 lobject = object; 2645 for (m = vm_page_lookup(lobject, pindex); 2646 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2647 lobject = lobject->backing_object) { 2648 if (lobject->backing_object_offset & PAGE_MASK) 2649 break; 2650 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2651 m = vm_page_lookup(lobject->backing_object, pindex); 2652 } 2653 2654 /* 2655 * give-up when a page is not in memory 2656 */ 2657 if (m == NULL) 2658 break; 2659 2660 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2661 (m->busy == 0) && 2662 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2663 2664 if ((m->queue - m->pc) == PQ_CACHE) { 2665 vm_page_deactivate(m); 2666 } 2667 vm_page_busy(m); 2668 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2669 vm_page_flag_set(m, PG_MAPPED); 2670 vm_page_wakeup(m); 2671 } 2672 } 2673} 2674 2675/* 2676 * Routine: pmap_change_wiring 2677 * Function: Change the wiring attribute for a map/virtual-address 2678 * pair. 2679 * In/out conditions: 2680 * The mapping must already exist in the pmap. 2681 */ 2682void 2683pmap_change_wiring(pmap, va, wired) 2684 register pmap_t pmap; 2685 vm_offset_t va; 2686 boolean_t wired; 2687{ 2688 register pt_entry_t *pte; 2689 2690 if (pmap == NULL) 2691 return; 2692 2693 pte = pmap_pte(pmap, va); 2694 2695 if (wired && !pmap_pte_w(pte)) 2696 pmap->pm_stats.wired_count++; 2697 else if (!wired && pmap_pte_w(pte)) 2698 pmap->pm_stats.wired_count--; 2699 2700 /* 2701 * Wiring is not a hardware characteristic so there is no need to 2702 * invalidate TLB. 2703 */ 2704 pmap_pte_set_w(pte, wired); 2705} 2706 2707 2708 2709/* 2710 * Copy the range specified by src_addr/len 2711 * from the source map to the range dst_addr/len 2712 * in the destination map. 2713 * 2714 * This routine is only advisory and need not do anything. 2715 */ 2716 2717void 2718pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2719 vm_offset_t src_addr) 2720{ 2721 vm_offset_t addr; 2722 vm_offset_t end_addr = src_addr + len; 2723 vm_offset_t pdnxt; 2724 pd_entry_t src_frame, dst_frame; 2725 vm_page_t m; 2726 pd_entry_t saved_pde; 2727 2728 if (dst_addr != src_addr) 2729 return; 2730 2731 src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2732 if (src_frame != (PTDpde & PG_FRAME)) 2733 return; 2734 2735 dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; 2736 if (dst_frame != (APTDpde & PG_FRAME)) { 2737 APTDpde = dst_frame | PG_RW | PG_V; 2738#if defined(SMP) 2739 /* The page directory is not shared between CPUs */ 2740 cpu_invltlb(); 2741#else 2742 invltlb(); 2743#endif 2744 } 2745 saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V); 2746 for(addr = src_addr; addr < end_addr; addr = pdnxt) { 2747 pt_entry_t *src_pte, *dst_pte; 2748 vm_page_t dstmpte, srcmpte; 2749 pd_entry_t srcptepaddr; 2750 unsigned ptepindex; 2751 2752 if (addr >= UPT_MIN_ADDRESS) 2753 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2754 2755 /* 2756 * Don't let optional prefaulting of pages make us go 2757 * way below the low water mark of free pages or way 2758 * above high water mark of used pv entries. 2759 */ 2760 if (cnt.v_free_count < cnt.v_free_reserved || 2761 pv_entry_count > pv_entry_high_water) 2762 break; 2763 2764 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); 2765 ptepindex = addr >> PDRSHIFT; 2766 2767 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2768 if (srcptepaddr == 0) 2769 continue; 2770 2771 if (srcptepaddr & PG_PS) { 2772 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2773 dst_pmap->pm_pdir[ptepindex] = srcptepaddr; 2774 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 2775 } 2776 continue; 2777 } 2778 2779 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2780 if ((srcmpte == NULL) || 2781 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2782 continue; 2783 2784 if (pdnxt > end_addr) 2785 pdnxt = end_addr; 2786 2787 src_pte = vtopte(addr); 2788 dst_pte = avtopte(addr); 2789 while (addr < pdnxt) { 2790 pt_entry_t ptetemp; 2791 ptetemp = *src_pte; 2792 /* 2793 * we only virtual copy managed pages 2794 */ 2795 if ((ptetemp & PG_MANAGED) != 0) { 2796 /* 2797 * We have to check after allocpte for the 2798 * pte still being around... allocpte can 2799 * block. 2800 */ 2801 dstmpte = pmap_allocpte(dst_pmap, addr); 2802 if ((APTDpde & PG_FRAME) != 2803 (saved_pde & PG_FRAME)) { 2804 APTDpde = saved_pde; 2805printf ("IT HAPPENNED!"); 2806#if defined(SMP) 2807 cpu_invltlb(); 2808#else 2809 invltlb(); 2810#endif 2811 } 2812 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2813 /* 2814 * Clear the modified and 2815 * accessed (referenced) bits 2816 * during the copy. 2817 */ 2818 m = PHYS_TO_VM_PAGE(ptetemp); 2819 *dst_pte = ptetemp & ~(PG_M | PG_A); 2820 dst_pmap->pm_stats.resident_count++; 2821 pmap_insert_entry(dst_pmap, addr, 2822 dstmpte, m); 2823 } else { 2824 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2825 } 2826 if (dstmpte->hold_count >= srcmpte->hold_count) 2827 break; 2828 } 2829 addr += PAGE_SIZE; 2830 src_pte++; 2831 dst_pte++; 2832 } 2833 } 2834} 2835 2836/* 2837 * Routine: pmap_kernel 2838 * Function: 2839 * Returns the physical map handle for the kernel. 2840 */ 2841pmap_t 2842pmap_kernel() 2843{ 2844 return (kernel_pmap); 2845} 2846 2847/* 2848 * pmap_zero_page zeros the specified hardware page by mapping 2849 * the page into KVM and using bzero to clear its contents. 2850 */ 2851void 2852pmap_zero_page(vm_offset_t phys) 2853{ 2854 2855 if (*CMAP2) 2856 panic("pmap_zero_page: CMAP2 busy"); 2857 2858 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2859 invltlb_1pg((vm_offset_t)CADDR2); 2860 2861#if defined(I686_CPU) 2862 if (cpu_class == CPUCLASS_686) 2863 i686_pagezero(CADDR2); 2864 else 2865#endif 2866 bzero(CADDR2, PAGE_SIZE); 2867 *CMAP2 = 0; 2868} 2869 2870/* 2871 * pmap_zero_page_area zeros the specified hardware page by mapping 2872 * the page into KVM and using bzero to clear its contents. 2873 * 2874 * off and size may not cover an area beyond a single hardware page. 2875 */ 2876void 2877pmap_zero_page_area(vm_offset_t phys, int off, int size) 2878{ 2879 2880 if (*CMAP2) 2881 panic("pmap_zero_page: CMAP2 busy"); 2882 2883 *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; 2884 invltlb_1pg((vm_offset_t)CADDR2); 2885 2886#if defined(I686_CPU) 2887 if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) 2888 i686_pagezero(CADDR2); 2889 else 2890#endif 2891 bzero((char *)CADDR2 + off, size); 2892 *CMAP2 = 0; 2893} 2894 2895/* 2896 * pmap_copy_page copies the specified (machine independent) 2897 * page by mapping the page into virtual memory and using 2898 * bcopy to copy the page, one machine dependent page at a 2899 * time. 2900 */ 2901void 2902pmap_copy_page(vm_offset_t src, vm_offset_t dst) 2903{ 2904 2905 if (*CMAP1) 2906 panic("pmap_copy_page: CMAP1 busy"); 2907 if (*CMAP2) 2908 panic("pmap_copy_page: CMAP2 busy"); 2909 2910 *CMAP1 = PG_V | (src & PG_FRAME) | PG_A; 2911 *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; 2912#ifdef I386_CPU 2913 invltlb(); 2914#else 2915 invlpg((u_int)CADDR1); 2916 invlpg((u_int)CADDR2); 2917#endif 2918 2919 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2920 2921 *CMAP1 = 0; 2922 *CMAP2 = 0; 2923} 2924 2925 2926/* 2927 * Routine: pmap_pageable 2928 * Function: 2929 * Make the specified pages (by pmap, offset) 2930 * pageable (or not) as requested. 2931 * 2932 * A page which is not pageable may not take 2933 * a fault; therefore, its page table entry 2934 * must remain valid for the duration. 2935 * 2936 * This routine is merely advisory; pmap_enter 2937 * will specify that these pages are to be wired 2938 * down (or not) as appropriate. 2939 */ 2940void 2941pmap_pageable(pmap, sva, eva, pageable) 2942 pmap_t pmap; 2943 vm_offset_t sva, eva; 2944 boolean_t pageable; 2945{ 2946} 2947 2948/* 2949 * this routine returns true if a physical page resides 2950 * in the given pmap. 2951 */ 2952boolean_t 2953pmap_page_exists(pmap, m) 2954 pmap_t pmap; 2955 vm_page_t m; 2956{ 2957 register pv_entry_t pv; 2958 int s; 2959 2960 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2961 return FALSE; 2962 2963 s = splvm(); 2964 2965 /* 2966 * Not found, check current mappings returning immediately if found. 2967 */ 2968 for (pv = TAILQ_FIRST(&m->md.pv_list); 2969 pv; 2970 pv = TAILQ_NEXT(pv, pv_list)) { 2971 if (pv->pv_pmap == pmap) { 2972 splx(s); 2973 return TRUE; 2974 } 2975 } 2976 splx(s); 2977 return (FALSE); 2978} 2979 2980#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2981/* 2982 * Remove all pages from specified address space 2983 * this aids process exit speeds. Also, this code 2984 * is special cased for current process only, but 2985 * can have the more generic (and slightly slower) 2986 * mode enabled. This is much faster than pmap_remove 2987 * in the case of running down an entire address space. 2988 */ 2989void 2990pmap_remove_pages(pmap, sva, eva) 2991 pmap_t pmap; 2992 vm_offset_t sva, eva; 2993{ 2994 pt_entry_t *pte, tpte; 2995 vm_page_t m; 2996 pv_entry_t pv, npv; 2997 int s; 2998 2999#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 3000 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 3001 printf("warning: pmap_remove_pages called with non-current pmap\n"); 3002 return; 3003 } 3004#endif 3005 3006 s = splvm(); 3007 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 3008 3009 if (pv->pv_va >= eva || pv->pv_va < sva) { 3010 npv = TAILQ_NEXT(pv, pv_plist); 3011 continue; 3012 } 3013 3014#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 3015 pte = vtopte(pv->pv_va); 3016#else 3017 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3018#endif 3019 tpte = *pte; 3020 3021 if (tpte == 0) { 3022 printf("TPTE at %p IS ZERO @ VA %08x\n", 3023 pte, pv->pv_va); 3024 panic("bad pte"); 3025 } 3026 3027/* 3028 * We cannot remove wired pages from a process' mapping at this time 3029 */ 3030 if (tpte & PG_W) { 3031 npv = TAILQ_NEXT(pv, pv_plist); 3032 continue; 3033 } 3034 3035 m = PHYS_TO_VM_PAGE(tpte); 3036 KASSERT(m->phys_addr == (tpte & PG_FRAME), 3037 ("vm_page_t %p phys_addr mismatch %08x %08x", 3038 m, m->phys_addr, tpte)); 3039 3040 KASSERT(m < &vm_page_array[vm_page_array_size], 3041 ("pmap_remove_pages: bad tpte %x", tpte)); 3042 3043 pv->pv_pmap->pm_stats.resident_count--; 3044 3045 *pte = 0; 3046 3047 /* 3048 * Update the vm_page_t clean and reference bits. 3049 */ 3050 if (tpte & PG_M) { 3051 vm_page_dirty(m); 3052 } 3053 3054 npv = TAILQ_NEXT(pv, pv_plist); 3055 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 3056 3057 m->md.pv_list_count--; 3058 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3059 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 3060 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 3061 } 3062 3063 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 3064 free_pv_entry(pv); 3065 } 3066 splx(s); 3067 pmap_invalidate_all(pmap); 3068} 3069 3070/* 3071 * pmap_testbit tests bits in pte's 3072 * note that the testbit/changebit routines are inline, 3073 * and a lot of things compile-time evaluate. 3074 */ 3075static boolean_t 3076pmap_testbit(m, bit) 3077 vm_page_t m; 3078 int bit; 3079{ 3080 pv_entry_t pv; 3081 pt_entry_t *pte; 3082 int s; 3083 3084 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3085 return FALSE; 3086 3087 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3088 return FALSE; 3089 3090 s = splvm(); 3091 3092 for (pv = TAILQ_FIRST(&m->md.pv_list); 3093 pv; 3094 pv = TAILQ_NEXT(pv, pv_list)) { 3095 3096 /* 3097 * if the bit being tested is the modified bit, then 3098 * mark clean_map and ptes as never 3099 * modified. 3100 */ 3101 if (bit & (PG_A|PG_M)) { 3102 if (!pmap_track_modified(pv->pv_va)) 3103 continue; 3104 } 3105 3106#if defined(PMAP_DIAGNOSTIC) 3107 if (!pv->pv_pmap) { 3108 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 3109 continue; 3110 } 3111#endif 3112 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3113 if (*pte & bit) { 3114 splx(s); 3115 return TRUE; 3116 } 3117 } 3118 splx(s); 3119 return (FALSE); 3120} 3121 3122/* 3123 * this routine is used to modify bits in ptes 3124 */ 3125static __inline void 3126pmap_changebit(vm_page_t m, int bit, boolean_t setem) 3127{ 3128 register pv_entry_t pv; 3129 register pt_entry_t *pte; 3130 int s; 3131 3132 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3133 return; 3134 3135 s = splvm(); 3136 3137 /* 3138 * Loop over all current mappings setting/clearing as appropos If 3139 * setting RO do we need to clear the VAC? 3140 */ 3141 for (pv = TAILQ_FIRST(&m->md.pv_list); 3142 pv; 3143 pv = TAILQ_NEXT(pv, pv_list)) { 3144 3145 /* 3146 * don't write protect pager mappings 3147 */ 3148 if (!setem && (bit == PG_RW)) { 3149 if (!pmap_track_modified(pv->pv_va)) 3150 continue; 3151 } 3152 3153#if defined(PMAP_DIAGNOSTIC) 3154 if (!pv->pv_pmap) { 3155 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 3156 continue; 3157 } 3158#endif 3159 3160 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3161 3162 if (setem) { 3163 *pte |= bit; 3164 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3165 } else { 3166 pt_entry_t pbits = *pte; 3167 if (pbits & bit) { 3168 if (bit == PG_RW) { 3169 if (pbits & PG_M) { 3170 vm_page_dirty(m); 3171 } 3172 *pte = pbits & ~(PG_M|PG_RW); 3173 } else { 3174 *pte = pbits & ~bit; 3175 } 3176 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3177 } 3178 } 3179 } 3180 splx(s); 3181} 3182 3183/* 3184 * pmap_page_protect: 3185 * 3186 * Lower the permission for all mappings to a given page. 3187 */ 3188void 3189pmap_page_protect(vm_page_t m, vm_prot_t prot) 3190{ 3191 if ((prot & VM_PROT_WRITE) == 0) { 3192 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3193 pmap_changebit(m, PG_RW, FALSE); 3194 } else { 3195 pmap_remove_all(m); 3196 } 3197 } 3198} 3199 3200vm_offset_t 3201pmap_phys_address(ppn) 3202 int ppn; 3203{ 3204 return (i386_ptob(ppn)); 3205} 3206 3207/* 3208 * pmap_ts_referenced: 3209 * 3210 * Return the count of reference bits for a page, clearing all of them. 3211 */ 3212int 3213pmap_ts_referenced(vm_page_t m) 3214{ 3215 register pv_entry_t pv, pvf, pvn; 3216 pt_entry_t *pte; 3217 int s; 3218 int rtval = 0; 3219 3220 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3221 return (rtval); 3222 3223 s = splvm(); 3224 3225 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3226 3227 pvf = pv; 3228 3229 do { 3230 pvn = TAILQ_NEXT(pv, pv_list); 3231 3232 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3233 3234 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3235 3236 if (!pmap_track_modified(pv->pv_va)) 3237 continue; 3238 3239 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3240 3241 if (pte && (*pte & PG_A)) { 3242 *pte &= ~PG_A; 3243 3244 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3245 3246 rtval++; 3247 if (rtval > 4) { 3248 break; 3249 } 3250 } 3251 } while ((pv = pvn) != NULL && pv != pvf); 3252 } 3253 splx(s); 3254 3255 return (rtval); 3256} 3257 3258/* 3259 * pmap_is_modified: 3260 * 3261 * Return whether or not the specified physical page was modified 3262 * in any physical maps. 3263 */ 3264boolean_t 3265pmap_is_modified(vm_page_t m) 3266{ 3267 return pmap_testbit(m, PG_M); 3268} 3269 3270/* 3271 * Clear the modify bits on the specified physical page. 3272 */ 3273void 3274pmap_clear_modify(vm_page_t m) 3275{ 3276 pmap_changebit(m, PG_M, FALSE); 3277} 3278 3279/* 3280 * pmap_clear_reference: 3281 * 3282 * Clear the reference bit on the specified physical page. 3283 */ 3284void 3285pmap_clear_reference(vm_page_t m) 3286{ 3287 pmap_changebit(m, PG_A, FALSE); 3288} 3289 3290/* 3291 * Miscellaneous support routines follow 3292 */ 3293 3294static void 3295i386_protection_init() 3296{ 3297 register int *kp, prot; 3298 3299 kp = protection_codes; 3300 for (prot = 0; prot < 8; prot++) { 3301 switch (prot) { 3302 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 3303 /* 3304 * Read access is also 0. There isn't any execute bit, 3305 * so just make it readable. 3306 */ 3307 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 3308 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 3309 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 3310 *kp++ = 0; 3311 break; 3312 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 3313 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 3314 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 3315 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 3316 *kp++ = PG_RW; 3317 break; 3318 } 3319 } 3320} 3321 3322/* 3323 * Map a set of physical memory pages into the kernel virtual 3324 * address space. Return a pointer to where it is mapped. This 3325 * routine is intended to be used for mapping device memory, 3326 * NOT real memory. 3327 */ 3328void * 3329pmap_mapdev(pa, size) 3330 vm_offset_t pa; 3331 vm_size_t size; 3332{ 3333 vm_offset_t va, tmpva, offset; 3334 pt_entry_t *pte; 3335 3336 offset = pa & PAGE_MASK; 3337 size = roundup(offset + size, PAGE_SIZE); 3338 3339 GIANT_REQUIRED; 3340 3341 va = kmem_alloc_pageable(kernel_map, size); 3342 if (!va) 3343 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3344 3345 pa = pa & PG_FRAME; 3346 for (tmpva = va; size > 0;) { 3347 pte = vtopte(tmpva); 3348 *pte = pa | PG_RW | PG_V | pgeflag; 3349 size -= PAGE_SIZE; 3350 tmpva += PAGE_SIZE; 3351 pa += PAGE_SIZE; 3352 } 3353 invltlb(); 3354 3355 return ((void *)(va + offset)); 3356} 3357 3358void 3359pmap_unmapdev(va, size) 3360 vm_offset_t va; 3361 vm_size_t size; 3362{ 3363 vm_offset_t base, offset; 3364 3365 base = va & PG_FRAME; 3366 offset = va & PAGE_MASK; 3367 size = roundup(offset + size, PAGE_SIZE); 3368 kmem_free(kernel_map, base, size); 3369} 3370 3371/* 3372 * perform the pmap work for mincore 3373 */ 3374int 3375pmap_mincore(pmap, addr) 3376 pmap_t pmap; 3377 vm_offset_t addr; 3378{ 3379 3380 pt_entry_t *ptep, pte; 3381 vm_page_t m; 3382 int val = 0; 3383 3384 ptep = pmap_pte(pmap, addr); 3385 if (ptep == 0) { 3386 return 0; 3387 } 3388 3389 if ((pte = *ptep) != 0) { 3390 vm_offset_t pa; 3391 3392 val = MINCORE_INCORE; 3393 if ((pte & PG_MANAGED) == 0) 3394 return val; 3395 3396 pa = pte & PG_FRAME; 3397 3398 m = PHYS_TO_VM_PAGE(pa); 3399 3400 /* 3401 * Modified by us 3402 */ 3403 if (pte & PG_M) 3404 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3405 /* 3406 * Modified by someone 3407 */ 3408 else if (m->dirty || pmap_is_modified(m)) 3409 val |= MINCORE_MODIFIED_OTHER; 3410 /* 3411 * Referenced by us 3412 */ 3413 if (pte & PG_A) 3414 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3415 3416 /* 3417 * Referenced by someone 3418 */ 3419 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3420 val |= MINCORE_REFERENCED_OTHER; 3421 vm_page_flag_set(m, PG_REFERENCED); 3422 } 3423 } 3424 return val; 3425} 3426 3427void 3428pmap_activate(struct thread *td) 3429{ 3430 struct proc *p = td->td_proc; 3431 pmap_t pmap; 3432 u_int32_t cr3; 3433 3434 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3435#if defined(SMP) 3436 pmap->pm_active |= 1 << PCPU_GET(cpuid); 3437#else 3438 pmap->pm_active |= 1; 3439#endif 3440#if defined(SWTCH_OPTIM_STATS) 3441 tlb_flush_count++; 3442#endif 3443 cr3 = vtophys(pmap->pm_pdir); 3444 /* XXXKSE this is wrong. 3445 * pmap_activate is for the current thread on the current cpu 3446 */ 3447 if (p->p_flag & P_KSES) { 3448 /* Make sure all other cr3 entries are updated. */ 3449 /* what if they are running? XXXKSE (maybe abort them) */ 3450 FOREACH_THREAD_IN_PROC(p, td) { 3451 td->td_pcb->pcb_cr3 = cr3; 3452 } 3453 } else { 3454 td->td_pcb->pcb_cr3 = cr3; 3455 } 3456 load_cr3(cr3); 3457} 3458 3459vm_offset_t 3460pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3461{ 3462 3463 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3464 return addr; 3465 } 3466 3467 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3468 return addr; 3469} 3470 3471 3472#if defined(PMAP_DEBUG) 3473pmap_pid_dump(int pid) 3474{ 3475 pmap_t pmap; 3476 struct proc *p; 3477 int npte = 0; 3478 int index; 3479 3480 sx_slock(&allproc_lock); 3481 LIST_FOREACH(p, &allproc, p_list) { 3482 if (p->p_pid != pid) 3483 continue; 3484 3485 if (p->p_vmspace) { 3486 int i,j; 3487 index = 0; 3488 pmap = vmspace_pmap(p->p_vmspace); 3489 for (i = 0; i < NPDEPG; i++) { 3490 pd_entry_t *pde; 3491 pt_entry_t *pte; 3492 vm_offset_t base = i << PDRSHIFT; 3493 3494 pde = &pmap->pm_pdir[i]; 3495 if (pde && pmap_pde_v(pde)) { 3496 for (j = 0; j < NPTEPG; j++) { 3497 vm_offset_t va = base + (j << PAGE_SHIFT); 3498 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3499 if (index) { 3500 index = 0; 3501 printf("\n"); 3502 } 3503 sx_sunlock(&allproc_lock); 3504 return npte; 3505 } 3506 pte = pmap_pte_quick(pmap, va); 3507 if (pte && pmap_pte_v(pte)) { 3508 pt_entry_t pa; 3509 vm_page_t m; 3510 pa = *pte; 3511 m = PHYS_TO_VM_PAGE(pa); 3512 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3513 va, pa, m->hold_count, m->wire_count, m->flags); 3514 npte++; 3515 index++; 3516 if (index >= 2) { 3517 index = 0; 3518 printf("\n"); 3519 } else { 3520 printf(" "); 3521 } 3522 } 3523 } 3524 } 3525 } 3526 } 3527 } 3528 sx_sunlock(&allproc_lock); 3529 return npte; 3530} 3531#endif 3532 3533#if defined(DEBUG) 3534 3535static void pads __P((pmap_t pm)); 3536void pmap_pvdump __P((vm_offset_t pa)); 3537 3538/* print address space of pmap*/ 3539static void 3540pads(pm) 3541 pmap_t pm; 3542{ 3543 int i, j; 3544 vm_offset_t va; 3545 pt_entry_t *ptep; 3546 3547 if (pm == kernel_pmap) 3548 return; 3549 for (i = 0; i < NPDEPG; i++) 3550 if (pm->pm_pdir[i]) 3551 for (j = 0; j < NPTEPG; j++) { 3552 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3553 if (pm == kernel_pmap && va < KERNBASE) 3554 continue; 3555 if (pm != kernel_pmap && va > VM_MAXUSER_ADDRESS) 3556 continue; 3557 ptep = pmap_pte_quick(pm, va); 3558 if (pmap_pte_v(ptep)) 3559 printf("%x:%x ", va, *ptep); 3560 }; 3561 3562} 3563 3564void 3565pmap_pvdump(pa) 3566 vm_offset_t pa; 3567{ 3568 pv_entry_t pv; 3569 vm_page_t m; 3570 3571 printf("pa %x", pa); 3572 m = PHYS_TO_VM_PAGE(pa); 3573 for (pv = TAILQ_FIRST(&m->md.pv_list); 3574 pv; 3575 pv = TAILQ_NEXT(pv, pv_list)) { 3576 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3577 pads(pv->pv_pmap); 3578 } 3579 printf(" "); 3580} 3581#endif 3582