pmap.c revision 116304
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 */ 43 44#include <sys/cdefs.h> 45__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 116304 2003-06-13 19:27:52Z alc $"); 46/*- 47 * Copyright (c) 2003 Networks Associates Technology, Inc. 48 * All rights reserved. 49 * 50 * This software was developed for the FreeBSD Project by Jake Burkholder, 51 * Safeport Network Services, and Network Associates Laboratories, the 52 * Security Research Division of Network Associates, Inc. under 53 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 54 * CHATS research program. 55 * 56 * Redistribution and use in source and binary forms, with or without 57 * modification, are permitted provided that the following conditions 58 * are met: 59 * 1. Redistributions of source code must retain the above copyright 60 * notice, this list of conditions and the following disclaimer. 61 * 2. Redistributions in binary form must reproduce the above copyright 62 * notice, this list of conditions and the following disclaimer in the 63 * documentation and/or other materials provided with the distribution. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 */ 77 78/* 79 * Manages physical address maps. 80 * 81 * In addition to hardware address maps, this 82 * module is called upon to provide software-use-only 83 * maps which may or may not be stored in the same 84 * form as hardware maps. These pseudo-maps are 85 * used to store intermediate results from copy 86 * operations to and from address spaces. 87 * 88 * Since the information managed by this module is 89 * also stored by the logical address mapping module, 90 * this module may throw away valid virtual-to-physical 91 * mappings at almost any time. However, invalidations 92 * of virtual-to-physical mappings must be done as 93 * requested. 94 * 95 * In order to cope with hardware architectures which 96 * make virtual-to-physical map invalidates expensive, 97 * this module may delay invalidate or reduced protection 98 * operations until such time as they are actually 99 * necessary. This module is given full information as 100 * to which processors are currently using which maps, 101 * and to when physical maps must be made correct. 102 */ 103 104#include "opt_pmap.h" 105#include "opt_msgbuf.h" 106#include "opt_kstack_pages.h" 107#include "opt_swtch.h" 108 109#include <sys/param.h> 110#include <sys/systm.h> 111#include <sys/kernel.h> 112#include <sys/lock.h> 113#include <sys/mman.h> 114#include <sys/msgbuf.h> 115#include <sys/mutex.h> 116#include <sys/proc.h> 117#include <sys/sx.h> 118#include <sys/user.h> 119#include <sys/vmmeter.h> 120#include <sys/sysctl.h> 121#ifdef SMP 122#include <sys/smp.h> 123#endif 124 125#include <vm/vm.h> 126#include <vm/vm_param.h> 127#include <vm/vm_kern.h> 128#include <vm/vm_page.h> 129#include <vm/vm_map.h> 130#include <vm/vm_object.h> 131#include <vm/vm_extern.h> 132#include <vm/vm_pageout.h> 133#include <vm/vm_pager.h> 134#include <vm/uma.h> 135 136#include <machine/cpu.h> 137#include <machine/cputypes.h> 138#include <machine/md_var.h> 139#include <machine/specialreg.h> 140#if defined(SMP) || defined(APIC_IO) 141#include <machine/smp.h> 142#include <machine/apic.h> 143#include <machine/segments.h> 144#include <machine/tss.h> 145#endif /* SMP || APIC_IO */ 146 147#define PMAP_KEEP_PDIRS 148#ifndef PMAP_SHPGPERPROC 149#define PMAP_SHPGPERPROC 200 150#endif 151 152#if defined(DIAGNOSTIC) 153#define PMAP_DIAGNOSTIC 154#endif 155 156#define MINPV 2048 157 158#if !defined(PMAP_DIAGNOSTIC) 159#define PMAP_INLINE __inline 160#else 161#define PMAP_INLINE 162#endif 163 164/* 165 * Get PDEs and PTEs for user/kernel address space 166 */ 167#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 168#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 169 170#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 171#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 172#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 173#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 174#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 175 176#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W)) 177#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 178 179/* 180 * Given a map and a machine independent protection code, 181 * convert to a vax protection code. 182 */ 183#define pte_prot(m, p) (protection_codes[p]) 184static int protection_codes[8]; 185 186struct pmap kernel_pmap_store; 187LIST_HEAD(pmaplist, pmap); 188static struct pmaplist allpmaps; 189static struct mtx allpmaps_lock; 190#if defined(SMP) && defined(LAZY_SWITCH) 191static struct mtx lazypmap_lock; 192#endif 193 194vm_paddr_t avail_start; /* PA of first available physical page */ 195vm_paddr_t avail_end; /* PA of last available physical page */ 196vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 197vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 198static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 199static int pgeflag; /* PG_G or-in */ 200static int pseflag; /* PG_PS or-in */ 201 202static int nkpt; 203vm_offset_t kernel_vm_end; 204extern u_int32_t KERNend; 205 206#ifdef PAE 207static uma_zone_t pdptzone; 208#endif 209 210/* 211 * Data for the pv entry allocation mechanism 212 */ 213static uma_zone_t pvzone; 214static struct vm_object pvzone_obj; 215static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 216int pmap_pagedaemon_waken; 217 218/* 219 * All those kernel PT submaps that BSD is so fond of 220 */ 221pt_entry_t *CMAP1 = 0; 222static pt_entry_t *CMAP2, *CMAP3, *ptmmap; 223caddr_t CADDR1 = 0, ptvmmap = 0; 224static caddr_t CADDR2, CADDR3; 225static struct mtx CMAPCADDR12_lock; 226static pt_entry_t *msgbufmap; 227struct msgbuf *msgbufp = 0; 228 229/* 230 * Crashdump maps. 231 */ 232static pt_entry_t *pt_crashdumpmap; 233static caddr_t crashdumpmap; 234 235#ifdef SMP 236extern pt_entry_t *SMPpt; 237#endif 238static pt_entry_t *PMAP1 = 0; 239static pt_entry_t *PADDR1 = 0; 240 241static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 242static pv_entry_t get_pv_entry(void); 243static void i386_protection_init(void); 244static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 245 246static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va, 247 vm_page_t m, vm_page_t mpte); 248static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); 249static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 250static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, 251 vm_offset_t va); 252static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, 253 vm_page_t mpte, vm_page_t m); 254 255static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); 256 257static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); 258static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); 259static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 260static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 261static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 262#ifdef PAE 263static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 264#endif 265 266static pd_entry_t pdir4mb; 267 268CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 269CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 270 271/* 272 * Move the kernel virtual free pointer to the next 273 * 4MB. This is used to help improve performance 274 * by using a large (4MB) page for much of the kernel 275 * (.text, .data, .bss) 276 */ 277static vm_offset_t 278pmap_kmem_choose(vm_offset_t addr) 279{ 280 vm_offset_t newaddr = addr; 281 282#ifdef I686_CPU_not /* Problem seems to have gone away */ 283 /* Deal with un-resolved Pentium4 issues */ 284 if (cpu_class == CPUCLASS_686 && 285 strcmp(cpu_vendor, "GenuineIntel") == 0 && 286 (cpu_id & 0xf00) == 0xf00) 287 return newaddr; 288#endif 289#ifndef DISABLE_PSE 290 if (cpu_feature & CPUID_PSE) 291 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 292#endif 293 return newaddr; 294} 295 296/* 297 * Bootstrap the system enough to run with virtual memory. 298 * 299 * On the i386 this is called after mapping has already been enabled 300 * and just syncs the pmap module with what has already been done. 301 * [We can't call it easily with mapping off since the kernel is not 302 * mapped with PA == VA, hence we would have to relocate every address 303 * from the linked base (virtual) address "KERNBASE" to the actual 304 * (physical) address starting relative to 0] 305 */ 306void 307pmap_bootstrap(firstaddr, loadaddr) 308 vm_paddr_t firstaddr; 309 vm_paddr_t loadaddr; 310{ 311 vm_offset_t va; 312 pt_entry_t *pte; 313 int i; 314 315 avail_start = firstaddr; 316 317 /* 318 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 319 * large. It should instead be correctly calculated in locore.s and 320 * not based on 'first' (which is a physical address, not a virtual 321 * address, for the start of unused physical memory). The kernel 322 * page tables are NOT double mapped and thus should not be included 323 * in this calculation. 324 */ 325 virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 326 virtual_avail = pmap_kmem_choose(virtual_avail); 327 328 virtual_end = VM_MAX_KERNEL_ADDRESS; 329 330 /* 331 * Initialize protection array. 332 */ 333 i386_protection_init(); 334 335 /* 336 * Initialize the kernel pmap (which is statically allocated). 337 */ 338 kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 339#ifdef PAE 340 kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 341#endif 342 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 343 TAILQ_INIT(&kernel_pmap->pm_pvlist); 344 LIST_INIT(&allpmaps); 345#if defined(SMP) && defined(LAZY_SWITCH) 346 mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN); 347#endif 348 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 349 mtx_lock_spin(&allpmaps_lock); 350 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 351 mtx_unlock_spin(&allpmaps_lock); 352 nkpt = NKPT; 353 354 /* 355 * Reserve some special page table entries/VA space for temporary 356 * mapping of pages. 357 */ 358#define SYSMAP(c, p, v, n) \ 359 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 360 361 va = virtual_avail; 362 pte = vtopte(va); 363 364 /* 365 * CMAP1/CMAP2 are used for zeroing and copying pages. 366 * CMAP3 is used for the idle process page zeroing. 367 */ 368 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 369 SYSMAP(caddr_t, CMAP2, CADDR2, 1) 370 SYSMAP(caddr_t, CMAP3, CADDR3, 1) 371 372 mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); 373 374 /* 375 * Crashdump maps. 376 */ 377 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 378 379 /* 380 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 381 * XXX ptmmap is not used. 382 */ 383 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 384 385 /* 386 * msgbufp is used to map the system message buffer. 387 * XXX msgbufmap is not used. 388 */ 389 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 390 atop(round_page(MSGBUF_SIZE))) 391 392 /* 393 * ptemap is used for pmap_pte_quick 394 */ 395 SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 396 397 virtual_avail = va; 398 399 *CMAP1 = *CMAP2 = 0; 400 for (i = 0; i < NKPT; i++) 401 PTD[i] = 0; 402 403 pgeflag = 0; 404#ifndef DISABLE_PG_G 405 if (cpu_feature & CPUID_PGE) 406 pgeflag = PG_G; 407#endif 408#ifdef I686_CPU_not /* Problem seems to have gone away */ 409 /* Deal with un-resolved Pentium4 issues */ 410 if (cpu_class == CPUCLASS_686 && 411 strcmp(cpu_vendor, "GenuineIntel") == 0 && 412 (cpu_id & 0xf00) == 0xf00) { 413 printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n"); 414 pgeflag = 0; 415 } 416#endif 417 418/* 419 * Initialize the 4MB page size flag 420 */ 421 pseflag = 0; 422/* 423 * The 4MB page version of the initial 424 * kernel page mapping. 425 */ 426 pdir4mb = 0; 427 428#ifndef DISABLE_PSE 429 if (cpu_feature & CPUID_PSE) 430 pseflag = PG_PS; 431#endif 432#ifdef I686_CPU_not /* Problem seems to have gone away */ 433 /* Deal with un-resolved Pentium4 issues */ 434 if (cpu_class == CPUCLASS_686 && 435 strcmp(cpu_vendor, "GenuineIntel") == 0 && 436 (cpu_id & 0xf00) == 0xf00) { 437 printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n"); 438 pseflag = 0; 439 } 440#endif 441#ifndef DISABLE_PSE 442 if (pseflag) { 443 pd_entry_t ptditmp; 444 /* 445 * Note that we have enabled PSE mode 446 */ 447 ptditmp = *(PTmap + i386_btop(KERNBASE)); 448 ptditmp &= ~(NBPDR - 1); 449 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 450 pdir4mb = ptditmp; 451 } 452#endif 453#ifndef SMP 454 /* 455 * Turn on PGE/PSE. SMP does this later on since the 456 * 4K page tables are required for AP boot (for now). 457 * XXX fixme. 458 */ 459 pmap_set_opt(); 460#endif 461#ifdef SMP 462 if (cpu_apic_address == 0) 463 panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); 464 465 /* local apic is mapped on last page */ 466 SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | 467 (cpu_apic_address & PG_FRAME)); 468#endif 469 invltlb(); 470} 471 472/* 473 * Enable 4MB page mode for MP startup. Turn on PG_G support. 474 * BSP will run this after all the AP's have started up. 475 */ 476void 477pmap_set_opt(void) 478{ 479 pt_entry_t *pte; 480 vm_offset_t va, endva; 481 482 if (pgeflag && (cpu_feature & CPUID_PGE)) { 483 load_cr4(rcr4() | CR4_PGE); 484 invltlb(); /* Insurance */ 485 } 486#ifndef DISABLE_PSE 487 if (pseflag && (cpu_feature & CPUID_PSE)) { 488 load_cr4(rcr4() | CR4_PSE); 489 invltlb(); /* Insurance */ 490 } 491#endif 492 if (PCPU_GET(cpuid) == 0) { 493#ifndef DISABLE_PSE 494 if (pdir4mb) { 495 kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; 496 invltlb(); /* Insurance */ 497 } 498#endif 499 if (pgeflag) { 500 /* Turn on PG_G for text, data, bss pages. */ 501 va = (vm_offset_t)btext; 502#ifndef DISABLE_PSE 503 if (pseflag && (cpu_feature & CPUID_PSE)) { 504 if (va < KERNBASE + (1 << PDRSHIFT)) 505 va = KERNBASE + (1 << PDRSHIFT); 506 } 507#endif 508 endva = KERNBASE + KERNend; 509 while (va < endva) { 510 pte = vtopte(va); 511 if (*pte) 512 *pte |= pgeflag; 513 va += PAGE_SIZE; 514 } 515 invltlb(); /* Insurance */ 516 } 517 /* 518 * We do not need to broadcast the invltlb here, because 519 * each AP does it the moment it is released from the boot 520 * lock. See ap_init(). 521 */ 522 } 523} 524 525static void * 526pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 527{ 528 *flags = UMA_SLAB_PRIV; 529 return (void *)kmem_alloc(kernel_map, bytes); 530} 531 532#ifdef PAE 533static void * 534pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 535{ 536 *flags = UMA_SLAB_PRIV; 537 return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); 538} 539#endif 540 541/* 542 * Initialize the pmap module. 543 * Called by vm_init, to initialize any structures that the pmap 544 * system needs to map virtual memory. 545 * pmap_init has been enhanced to support in a fairly consistant 546 * way, discontiguous physical memory. 547 */ 548void 549pmap_init(phys_start, phys_end) 550 vm_paddr_t phys_start, phys_end; 551{ 552 int i; 553 int initial_pvs; 554 555 /* 556 * Allocate memory for random pmap data structures. Includes the 557 * pv_head_table. 558 */ 559 560 for(i = 0; i < vm_page_array_size; i++) { 561 vm_page_t m; 562 563 m = &vm_page_array[i]; 564 TAILQ_INIT(&m->md.pv_list); 565 m->md.pv_list_count = 0; 566 } 567 568 /* 569 * init the pv free list 570 */ 571 initial_pvs = vm_page_array_size; 572 if (initial_pvs < MINPV) 573 initial_pvs = MINPV; 574 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 575 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); 576 uma_zone_set_allocf(pvzone, pmap_pv_allocf); 577 uma_prealloc(pvzone, initial_pvs); 578 579#ifdef PAE 580 pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 581 NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); 582 uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 583#endif 584 585 /* 586 * Now it is safe to enable pv_table recording. 587 */ 588 pmap_initialized = TRUE; 589} 590 591/* 592 * Initialize the address space (zone) for the pv_entries. Set a 593 * high water mark so that the system can recover from excessive 594 * numbers of pv entries. 595 */ 596void 597pmap_init2() 598{ 599 int shpgperproc = PMAP_SHPGPERPROC; 600 601 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 602 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 603 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 604 pv_entry_high_water = 9 * (pv_entry_max / 10); 605 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 606} 607 608 609/*************************************************** 610 * Low level helper routines..... 611 ***************************************************/ 612 613#if defined(PMAP_DIAGNOSTIC) 614 615/* 616 * This code checks for non-writeable/modified pages. 617 * This should be an invalid condition. 618 */ 619static int 620pmap_nw_modified(pt_entry_t ptea) 621{ 622 int pte; 623 624 pte = (int) ptea; 625 626 if ((pte & (PG_M|PG_RW)) == PG_M) 627 return 1; 628 else 629 return 0; 630} 631#endif 632 633 634/* 635 * this routine defines the region(s) of memory that should 636 * not be tested for the modified bit. 637 */ 638static PMAP_INLINE int 639pmap_track_modified(vm_offset_t va) 640{ 641 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 642 return 1; 643 else 644 return 0; 645} 646 647#ifdef I386_CPU 648/* 649 * i386 only has "invalidate everything" and no SMP to worry about. 650 */ 651PMAP_INLINE void 652pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 653{ 654 655 if (pmap == kernel_pmap || pmap->pm_active) 656 invltlb(); 657} 658 659PMAP_INLINE void 660pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 661{ 662 663 if (pmap == kernel_pmap || pmap->pm_active) 664 invltlb(); 665} 666 667PMAP_INLINE void 668pmap_invalidate_all(pmap_t pmap) 669{ 670 671 if (pmap == kernel_pmap || pmap->pm_active) 672 invltlb(); 673} 674#else /* !I386_CPU */ 675#ifdef SMP 676/* 677 * For SMP, these functions have to use the IPI mechanism for coherence. 678 */ 679void 680pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 681{ 682 u_int cpumask; 683 u_int other_cpus; 684 685 critical_enter(); 686 /* 687 * We need to disable interrupt preemption but MUST NOT have 688 * interrupts disabled here. 689 * XXX we may need to hold schedlock to get a coherent pm_active 690 */ 691 if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { 692 invlpg(va); 693 smp_invlpg(va); 694 } else { 695 cpumask = PCPU_GET(cpumask); 696 other_cpus = PCPU_GET(other_cpus); 697 if (pmap->pm_active & cpumask) 698 invlpg(va); 699 if (pmap->pm_active & other_cpus) 700 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 701 } 702 critical_exit(); 703} 704 705void 706pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 707{ 708 u_int cpumask; 709 u_int other_cpus; 710 vm_offset_t addr; 711 712 critical_enter(); 713 /* 714 * We need to disable interrupt preemption but MUST NOT have 715 * interrupts disabled here. 716 * XXX we may need to hold schedlock to get a coherent pm_active 717 */ 718 if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { 719 for (addr = sva; addr < eva; addr += PAGE_SIZE) 720 invlpg(addr); 721 smp_invlpg_range(sva, eva); 722 } else { 723 cpumask = PCPU_GET(cpumask); 724 other_cpus = PCPU_GET(other_cpus); 725 if (pmap->pm_active & cpumask) 726 for (addr = sva; addr < eva; addr += PAGE_SIZE) 727 invlpg(addr); 728 if (pmap->pm_active & other_cpus) 729 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 730 sva, eva); 731 } 732 critical_exit(); 733} 734 735void 736pmap_invalidate_all(pmap_t pmap) 737{ 738 u_int cpumask; 739 u_int other_cpus; 740 741#ifdef SWTCH_OPTIM_STATS 742 tlb_flush_count++; 743#endif 744 critical_enter(); 745 /* 746 * We need to disable interrupt preemption but MUST NOT have 747 * interrupts disabled here. 748 * XXX we may need to hold schedlock to get a coherent pm_active 749 */ 750 if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { 751 invltlb(); 752 smp_invltlb(); 753 } else { 754 cpumask = PCPU_GET(cpumask); 755 other_cpus = PCPU_GET(other_cpus); 756 if (pmap->pm_active & cpumask) 757 invltlb(); 758 if (pmap->pm_active & other_cpus) 759 smp_masked_invltlb(pmap->pm_active & other_cpus); 760 } 761 critical_exit(); 762} 763#else /* !SMP */ 764/* 765 * Normal, non-SMP, 486+ invalidation functions. 766 * We inline these within pmap.c for speed. 767 */ 768PMAP_INLINE void 769pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 770{ 771 772 if (pmap == kernel_pmap || pmap->pm_active) 773 invlpg(va); 774} 775 776PMAP_INLINE void 777pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 778{ 779 vm_offset_t addr; 780 781 if (pmap == kernel_pmap || pmap->pm_active) 782 for (addr = sva; addr < eva; addr += PAGE_SIZE) 783 invlpg(addr); 784} 785 786PMAP_INLINE void 787pmap_invalidate_all(pmap_t pmap) 788{ 789 790 if (pmap == kernel_pmap || pmap->pm_active) 791 invltlb(); 792} 793#endif /* !SMP */ 794#endif /* !I386_CPU */ 795 796/* 797 * Are we current address space or kernel? 798 */ 799static __inline int 800pmap_is_current(pmap_t pmap) 801{ 802 return (pmap == kernel_pmap || 803 (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); 804} 805 806/* 807 * Super fast pmap_pte routine best used when scanning 808 * the pv lists. This eliminates many coarse-grained 809 * invltlb calls. Note that many of the pv list 810 * scans are across different pmaps. It is very wasteful 811 * to do an entire invltlb for checking a single mapping. 812 */ 813pt_entry_t * 814pmap_pte_quick(pmap, va) 815 register pmap_t pmap; 816 vm_offset_t va; 817{ 818 pd_entry_t newpf; 819 pd_entry_t *pde; 820 821 pde = pmap_pde(pmap, va); 822 if (*pde & PG_PS) 823 return (pde); 824 if (*pde != 0) { 825 /* are we current address space or kernel? */ 826 if (pmap_is_current(pmap)) 827 return vtopte(va); 828 newpf = *pde & PG_FRAME; 829 if (((*PMAP1) & PG_FRAME) != newpf) { 830 *PMAP1 = newpf | PG_RW | PG_V; 831 pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); 832 } 833 return PADDR1 + (i386_btop(va) & (NPTEPG - 1)); 834 } 835 return (0); 836} 837 838/* 839 * Routine: pmap_extract 840 * Function: 841 * Extract the physical page address associated 842 * with the given map/virtual_address pair. 843 */ 844vm_paddr_t 845pmap_extract(pmap, va) 846 register pmap_t pmap; 847 vm_offset_t va; 848{ 849 vm_paddr_t rtval; 850 pt_entry_t *pte; 851 pd_entry_t pde; 852 853 if (pmap == 0) 854 return 0; 855 pde = pmap->pm_pdir[va >> PDRSHIFT]; 856 if (pde != 0) { 857 if ((pde & PG_PS) != 0) { 858 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 859 return rtval; 860 } 861 pte = pmap_pte_quick(pmap, va); 862 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 863 return rtval; 864 } 865 return 0; 866 867} 868 869/*************************************************** 870 * Low level mapping routines..... 871 ***************************************************/ 872 873/* 874 * Add a wired page to the kva. 875 * Note: not SMP coherent. 876 */ 877PMAP_INLINE void 878pmap_kenter(vm_offset_t va, vm_paddr_t pa) 879{ 880 pt_entry_t *pte; 881 882 pte = vtopte(va); 883 pte_store(pte, pa | PG_RW | PG_V | pgeflag); 884} 885 886/* 887 * Remove a page from the kernel pagetables. 888 * Note: not SMP coherent. 889 */ 890PMAP_INLINE void 891pmap_kremove(vm_offset_t va) 892{ 893 pt_entry_t *pte; 894 895 pte = vtopte(va); 896 pte_clear(pte); 897} 898 899/* 900 * Used to map a range of physical addresses into kernel 901 * virtual address space. 902 * 903 * The value passed in '*virt' is a suggested virtual address for 904 * the mapping. Architectures which can support a direct-mapped 905 * physical to virtual region can return the appropriate address 906 * within that region, leaving '*virt' unchanged. Other 907 * architectures should map the pages starting at '*virt' and 908 * update '*virt' with the first usable address after the mapped 909 * region. 910 */ 911vm_offset_t 912pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 913{ 914 vm_offset_t va, sva; 915 916 va = sva = *virt; 917 while (start < end) { 918 pmap_kenter(va, start); 919 va += PAGE_SIZE; 920 start += PAGE_SIZE; 921 } 922 pmap_invalidate_range(kernel_pmap, sva, va); 923 *virt = va; 924 return (sva); 925} 926 927 928/* 929 * Add a list of wired pages to the kva 930 * this routine is only used for temporary 931 * kernel mappings that do not need to have 932 * page modification or references recorded. 933 * Note that old mappings are simply written 934 * over. The page *must* be wired. 935 * Note: SMP coherent. Uses a ranged shootdown IPI. 936 */ 937void 938pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 939{ 940 vm_offset_t va; 941 942 va = sva; 943 while (count-- > 0) { 944 pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 945 va += PAGE_SIZE; 946 m++; 947 } 948 pmap_invalidate_range(kernel_pmap, sva, va); 949} 950 951/* 952 * This routine tears out page mappings from the 953 * kernel -- it is meant only for temporary mappings. 954 * Note: SMP coherent. Uses a ranged shootdown IPI. 955 */ 956void 957pmap_qremove(vm_offset_t sva, int count) 958{ 959 vm_offset_t va; 960 961 va = sva; 962 while (count-- > 0) { 963 pmap_kremove(va); 964 va += PAGE_SIZE; 965 } 966 pmap_invalidate_range(kernel_pmap, sva, va); 967} 968 969static vm_page_t 970pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 971{ 972 vm_page_t m; 973 974retry: 975 m = vm_page_lookup(object, pindex); 976 if (m != NULL) { 977 vm_page_lock_queues(); 978 if (vm_page_sleep_if_busy(m, FALSE, "pplookp")) 979 goto retry; 980 vm_page_unlock_queues(); 981 } 982 return m; 983} 984 985#ifndef KSTACK_MAX_PAGES 986#define KSTACK_MAX_PAGES 32 987#endif 988 989/* 990 * Create the kernel stack (including pcb for i386) for a new thread. 991 * This routine directly affects the fork perf for a process and 992 * create performance for a thread. 993 */ 994void 995pmap_new_thread(struct thread *td, int pages) 996{ 997 int i; 998 vm_page_t ma[KSTACK_MAX_PAGES]; 999 vm_object_t ksobj; 1000 vm_page_t m; 1001 vm_offset_t ks; 1002 1003 /* Bounds check */ 1004 if (pages <= 1) 1005 pages = KSTACK_PAGES; 1006 else if (pages > KSTACK_MAX_PAGES) 1007 pages = KSTACK_MAX_PAGES; 1008 1009 /* 1010 * allocate object for the kstack 1011 */ 1012 ksobj = vm_object_allocate(OBJT_DEFAULT, pages); 1013 td->td_kstack_obj = ksobj; 1014 1015 /* get a kernel virtual address for the kstack for this thread */ 1016#ifdef KSTACK_GUARD 1017 ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE); 1018 if (ks == 0) 1019 panic("pmap_new_thread: kstack allocation failed"); 1020 if (*vtopte(ks) != 0) 1021 pmap_qremove(ks, 1); 1022 ks += PAGE_SIZE; 1023 td->td_kstack = ks; 1024#else 1025 /* get a kernel virtual address for the kstack for this thread */ 1026 ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE); 1027 if (ks == 0) 1028 panic("pmap_new_thread: kstack allocation failed"); 1029 td->td_kstack = ks; 1030#endif 1031 /* 1032 * Knowing the number of pages allocated is useful when you 1033 * want to deallocate them. 1034 */ 1035 td->td_kstack_pages = pages; 1036 1037 /* 1038 * For the length of the stack, link in a real page of ram for each 1039 * page of stack. 1040 */ 1041 VM_OBJECT_LOCK(ksobj); 1042 for (i = 0; i < pages; i++) { 1043 /* 1044 * Get a kernel stack page 1045 */ 1046 m = vm_page_grab(ksobj, i, 1047 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); 1048 ma[i] = m; 1049 1050 vm_page_lock_queues(); 1051 vm_page_wakeup(m); 1052 vm_page_flag_clear(m, PG_ZERO); 1053 m->valid = VM_PAGE_BITS_ALL; 1054 vm_page_unlock_queues(); 1055 } 1056 VM_OBJECT_UNLOCK(ksobj); 1057 pmap_qenter(ks, ma, pages); 1058} 1059 1060/* 1061 * Dispose the kernel stack for a thread that has exited. 1062 * This routine directly impacts the exit perf of a process and thread. 1063 */ 1064void 1065pmap_dispose_thread(td) 1066 struct thread *td; 1067{ 1068 int i; 1069 int pages; 1070 vm_object_t ksobj; 1071 vm_offset_t ks; 1072 vm_page_t m; 1073 1074 pages = td->td_kstack_pages; 1075 ksobj = td->td_kstack_obj; 1076 ks = td->td_kstack; 1077 pmap_qremove(ks, pages); 1078 VM_OBJECT_LOCK(ksobj); 1079 for (i = 0; i < pages; i++) { 1080 m = vm_page_lookup(ksobj, i); 1081 if (m == NULL) 1082 panic("pmap_dispose_thread: kstack already missing?"); 1083 vm_page_lock_queues(); 1084 vm_page_busy(m); 1085 vm_page_unwire(m, 0); 1086 vm_page_free(m); 1087 vm_page_unlock_queues(); 1088 } 1089 VM_OBJECT_UNLOCK(ksobj); 1090 /* 1091 * Free the space that this stack was mapped to in the kernel 1092 * address map. 1093 */ 1094#ifdef KSTACK_GUARD 1095 kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE); 1096#else 1097 kmem_free(kernel_map, ks, pages * PAGE_SIZE); 1098#endif 1099 vm_object_deallocate(ksobj); 1100} 1101 1102/* 1103 * Set up a variable sized alternate kstack. Though it may look MI, it may 1104 * need to be different on certain arches like ia64. 1105 */ 1106void 1107pmap_new_altkstack(struct thread *td, int pages) 1108{ 1109 /* shuffle the original stack */ 1110 td->td_altkstack_obj = td->td_kstack_obj; 1111 td->td_altkstack = td->td_kstack; 1112 td->td_altkstack_pages = td->td_kstack_pages; 1113 1114 pmap_new_thread(td, pages); 1115} 1116 1117void 1118pmap_dispose_altkstack(td) 1119 struct thread *td; 1120{ 1121 pmap_dispose_thread(td); 1122 1123 /* restore the original kstack */ 1124 td->td_kstack = td->td_altkstack; 1125 td->td_kstack_obj = td->td_altkstack_obj; 1126 td->td_kstack_pages = td->td_altkstack_pages; 1127 td->td_altkstack = 0; 1128 td->td_altkstack_obj = NULL; 1129 td->td_altkstack_pages = 0; 1130} 1131 1132/* 1133 * Allow the Kernel stack for a thread to be prejudicially paged out. 1134 */ 1135void 1136pmap_swapout_thread(td) 1137 struct thread *td; 1138{ 1139 int i; 1140 int pages; 1141 vm_object_t ksobj; 1142 vm_offset_t ks; 1143 vm_page_t m; 1144 1145 pages = td->td_kstack_pages; 1146 ksobj = td->td_kstack_obj; 1147 ks = td->td_kstack; 1148 pmap_qremove(ks, pages); 1149 VM_OBJECT_LOCK(ksobj); 1150 for (i = 0; i < pages; i++) { 1151 m = vm_page_lookup(ksobj, i); 1152 if (m == NULL) 1153 panic("pmap_swapout_thread: kstack already missing?"); 1154 vm_page_lock_queues(); 1155 vm_page_dirty(m); 1156 vm_page_unwire(m, 0); 1157 vm_page_unlock_queues(); 1158 } 1159 VM_OBJECT_UNLOCK(ksobj); 1160} 1161 1162/* 1163 * Bring the kernel stack for a specified thread back in. 1164 */ 1165void 1166pmap_swapin_thread(td) 1167 struct thread *td; 1168{ 1169 int i, rv; 1170 int pages; 1171 vm_page_t ma[KSTACK_MAX_PAGES]; 1172 vm_object_t ksobj; 1173 vm_offset_t ks; 1174 vm_page_t m; 1175 1176 pages = td->td_kstack_pages; 1177 ksobj = td->td_kstack_obj; 1178 ks = td->td_kstack; 1179 VM_OBJECT_LOCK(ksobj); 1180 for (i = 0; i < pages; i++) { 1181 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 1182 if (m->valid != VM_PAGE_BITS_ALL) { 1183 rv = vm_pager_get_pages(ksobj, &m, 1, 0); 1184 if (rv != VM_PAGER_OK) 1185 panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid); 1186 m = vm_page_lookup(ksobj, i); 1187 m->valid = VM_PAGE_BITS_ALL; 1188 } 1189 ma[i] = m; 1190 vm_page_lock_queues(); 1191 vm_page_wire(m); 1192 vm_page_wakeup(m); 1193 vm_page_unlock_queues(); 1194 } 1195 VM_OBJECT_UNLOCK(ksobj); 1196 pmap_qenter(ks, ma, pages); 1197} 1198 1199/*************************************************** 1200 * Page table page management routines..... 1201 ***************************************************/ 1202 1203/* 1204 * This routine unholds page table pages, and if the hold count 1205 * drops to zero, then it decrements the wire count. 1206 */ 1207static int 1208_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1209{ 1210 1211 while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) 1212 vm_page_lock_queues(); 1213 1214 if (m->hold_count == 0) { 1215 vm_offset_t pteva; 1216 /* 1217 * unmap the page table page 1218 */ 1219 pmap->pm_pdir[m->pindex] = 0; 1220 --pmap->pm_stats.resident_count; 1221 if (pmap_is_current(pmap)) { 1222 /* 1223 * Do an invltlb to make the invalidated mapping 1224 * take effect immediately. 1225 */ 1226 pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1227 pmap_invalidate_page(pmap, pteva); 1228 } 1229 1230 /* 1231 * If the page is finally unwired, simply free it. 1232 */ 1233 --m->wire_count; 1234 if (m->wire_count == 0) { 1235 vm_page_busy(m); 1236 vm_page_free_zero(m); 1237 atomic_subtract_int(&cnt.v_wire_count, 1); 1238 } 1239 return 1; 1240 } 1241 return 0; 1242} 1243 1244static PMAP_INLINE int 1245pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 1246{ 1247 vm_page_unhold(m); 1248 if (m->hold_count == 0) 1249 return _pmap_unwire_pte_hold(pmap, m); 1250 else 1251 return 0; 1252} 1253 1254/* 1255 * After removing a page table entry, this routine is used to 1256 * conditionally free the page, and manage the hold/wire counts. 1257 */ 1258static int 1259pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1260{ 1261 unsigned ptepindex; 1262 if (va >= VM_MAXUSER_ADDRESS) 1263 return 0; 1264 1265 if (mpte == NULL) { 1266 ptepindex = (va >> PDRSHIFT); 1267 if (pmap->pm_pteobj->root && 1268 (pmap->pm_pteobj->root->pindex == ptepindex)) { 1269 mpte = pmap->pm_pteobj->root; 1270 } else { 1271 while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && 1272 vm_page_sleep_if_busy(mpte, FALSE, "pulook")) 1273 vm_page_lock_queues(); 1274 } 1275 } 1276 1277 return pmap_unwire_pte_hold(pmap, mpte); 1278} 1279 1280void 1281pmap_pinit0(pmap) 1282 struct pmap *pmap; 1283{ 1284 1285 pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1286#ifdef PAE 1287 pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1288#endif 1289 pmap->pm_active = 0; 1290 TAILQ_INIT(&pmap->pm_pvlist); 1291 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1292 mtx_lock_spin(&allpmaps_lock); 1293 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1294 mtx_unlock_spin(&allpmaps_lock); 1295} 1296 1297/* 1298 * Initialize a preallocated and zeroed pmap structure, 1299 * such as one in a vmspace structure. 1300 */ 1301void 1302pmap_pinit(pmap) 1303 register struct pmap *pmap; 1304{ 1305 vm_page_t ptdpg[NPGPTD]; 1306 vm_paddr_t pa; 1307 int i; 1308 1309 /* 1310 * No need to allocate page table space yet but we do need a valid 1311 * page directory table. 1312 */ 1313 if (pmap->pm_pdir == NULL) { 1314 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, 1315 NBPTD); 1316#ifdef PAE 1317 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1318 KASSERT(((vm_offset_t)pmap->pm_pdpt & 1319 ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1320 ("pmap_pinit: pdpt misaligned")); 1321 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1322 ("pmap_pinit: pdpt above 4g")); 1323#endif 1324 } 1325 1326 /* 1327 * allocate object for the ptes 1328 */ 1329 if (pmap->pm_pteobj == NULL) 1330 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1331 NPGPTD); 1332 1333 /* 1334 * allocate the page directory page(s) 1335 */ 1336 for (i = 0; i < NPGPTD; i++) { 1337 ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i, 1338 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | 1339 VM_ALLOC_ZERO); 1340 vm_page_lock_queues(); 1341 vm_page_flag_clear(ptdpg[i], PG_BUSY); 1342 ptdpg[i]->valid = VM_PAGE_BITS_ALL; 1343 vm_page_unlock_queues(); 1344 } 1345 1346 pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1347 1348 for (i = 0; i < NPGPTD; i++) { 1349 if ((ptdpg[i]->flags & PG_ZERO) == 0) 1350 bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); 1351 } 1352 1353 mtx_lock_spin(&allpmaps_lock); 1354 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1355 mtx_unlock_spin(&allpmaps_lock); 1356 /* Wire in kernel global address entries. */ 1357 /* XXX copies current process, does not fill in MPPTDI */ 1358 bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1359#ifdef SMP 1360 pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 1361#endif 1362 1363 /* install self-referential address mapping entry(s) */ 1364 for (i = 0; i < NPGPTD; i++) { 1365 pa = VM_PAGE_TO_PHYS(ptdpg[i]); 1366 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 1367#ifdef PAE 1368 pmap->pm_pdpt[i] = pa | PG_V; 1369#endif 1370 } 1371 1372 pmap->pm_active = 0; 1373 TAILQ_INIT(&pmap->pm_pvlist); 1374 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1375} 1376 1377/* 1378 * Wire in kernel global address entries. To avoid a race condition 1379 * between pmap initialization and pmap_growkernel, this procedure 1380 * should be called after the vmspace is attached to the process 1381 * but before this pmap is activated. 1382 */ 1383void 1384pmap_pinit2(pmap) 1385 struct pmap *pmap; 1386{ 1387 /* XXX: Remove this stub when no longer called */ 1388} 1389 1390/* 1391 * this routine is called if the page table page is not 1392 * mapped correctly. 1393 */ 1394static vm_page_t 1395_pmap_allocpte(pmap, ptepindex) 1396 pmap_t pmap; 1397 unsigned ptepindex; 1398{ 1399 vm_paddr_t ptepa; 1400 vm_offset_t pteva; 1401 vm_page_t m; 1402 1403 /* 1404 * Find or fabricate a new pagetable page 1405 */ 1406 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1407 VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1408 1409 KASSERT(m->queue == PQ_NONE, 1410 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1411 1412 /* 1413 * Increment the hold count for the page table page 1414 * (denoting a new mapping.) 1415 */ 1416 m->hold_count++; 1417 1418 /* 1419 * Map the pagetable page into the process address space, if 1420 * it isn't already there. 1421 */ 1422 1423 pmap->pm_stats.resident_count++; 1424 1425 ptepa = VM_PAGE_TO_PHYS(m); 1426 pmap->pm_pdir[ptepindex] = 1427 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1428 1429 /* 1430 * Try to use the new mapping, but if we cannot, then 1431 * do it with the routine that maps the page explicitly. 1432 */ 1433 if ((m->flags & PG_ZERO) == 0) { 1434 if (pmap_is_current(pmap)) { 1435 pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); 1436 bzero((caddr_t) pteva, PAGE_SIZE); 1437 } else { 1438 pmap_zero_page(m); 1439 } 1440 } 1441 vm_page_lock_queues(); 1442 m->valid = VM_PAGE_BITS_ALL; 1443 vm_page_flag_clear(m, PG_ZERO); 1444 vm_page_wakeup(m); 1445 vm_page_unlock_queues(); 1446 1447 return m; 1448} 1449 1450static vm_page_t 1451pmap_allocpte(pmap_t pmap, vm_offset_t va) 1452{ 1453 unsigned ptepindex; 1454 pd_entry_t ptepa; 1455 vm_page_t m; 1456 1457 /* 1458 * Calculate pagetable page index 1459 */ 1460 ptepindex = va >> PDRSHIFT; 1461 1462 /* 1463 * Get the page directory entry 1464 */ 1465 ptepa = pmap->pm_pdir[ptepindex]; 1466 1467 /* 1468 * This supports switching from a 4MB page to a 1469 * normal 4K page. 1470 */ 1471 if (ptepa & PG_PS) { 1472 pmap->pm_pdir[ptepindex] = 0; 1473 ptepa = 0; 1474 pmap_invalidate_all(kernel_pmap); 1475 } 1476 1477 /* 1478 * If the page table page is mapped, we just increment the 1479 * hold count, and activate it. 1480 */ 1481 if (ptepa) { 1482 /* 1483 * In order to get the page table page, try the 1484 * hint first. 1485 */ 1486 if (pmap->pm_pteobj->root && 1487 (pmap->pm_pteobj->root->pindex == ptepindex)) { 1488 m = pmap->pm_pteobj->root; 1489 } else { 1490 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1491 } 1492 m->hold_count++; 1493 return m; 1494 } 1495 /* 1496 * Here if the pte page isn't mapped, or if it has been deallocated. 1497 */ 1498 return _pmap_allocpte(pmap, ptepindex); 1499} 1500 1501 1502/*************************************************** 1503* Pmap allocation/deallocation routines. 1504 ***************************************************/ 1505 1506#ifdef LAZY_SWITCH 1507#ifdef SMP 1508/* 1509 * Deal with a SMP shootdown of other users of the pmap that we are 1510 * trying to dispose of. This can be a bit hairy. 1511 */ 1512static u_int *lazymask; 1513static u_int lazyptd; 1514static volatile u_int lazywait; 1515 1516void pmap_lazyfix_action(void); 1517 1518void 1519pmap_lazyfix_action(void) 1520{ 1521 u_int mymask = PCPU_GET(cpumask); 1522 1523 if (rcr3() == lazyptd) { 1524 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1525#ifdef SWTCH_OPTIM_STATS 1526 atomic_add_int(&lazy_flush_smpfixup, 1); 1527 } else { 1528 if (*lazymask & mymask) 1529 lazy_flush_smpbadcr3++; 1530 else 1531 lazy_flush_smpmiss++; 1532#endif 1533 } 1534 atomic_clear_int(lazymask, mymask); 1535 atomic_store_rel_int(&lazywait, 1); 1536} 1537 1538static void 1539pmap_lazyfix_self(u_int mymask) 1540{ 1541 1542 if (rcr3() == lazyptd) { 1543 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1544#ifdef SWTCH_OPTIM_STATS 1545 lazy_flush_fixup++; 1546 } else { 1547 if (*lazymask & mymask) 1548 lazy_flush_smpbadcr3++; 1549 else 1550 lazy_flush_smpmiss++; 1551#endif 1552 } 1553 atomic_clear_int(lazymask, mymask); 1554} 1555 1556 1557static void 1558pmap_lazyfix(pmap_t pmap) 1559{ 1560 u_int mymask = PCPU_GET(cpumask); 1561 u_int mask; 1562 register u_int spins; 1563 1564 while ((mask = pmap->pm_active) != 0) { 1565 spins = 50000000; 1566 mask = mask & -mask; /* Find least significant set bit */ 1567 mtx_lock_spin(&lazypmap_lock); 1568#ifdef PAE 1569 lazyptd = vtophys(pmap->pm_pdpt); 1570#else 1571 lazyptd = vtophys(pmap->pm_pdir); 1572#endif 1573 if (mask == mymask) { 1574 lazymask = &pmap->pm_active; 1575 pmap_lazyfix_self(mymask); 1576 } else { 1577 atomic_store_rel_int((u_int *)&lazymask, 1578 (u_int)&pmap->pm_active); 1579 atomic_store_rel_int(&lazywait, 0); 1580 ipi_selected(mask, IPI_LAZYPMAP); 1581 while (lazywait == 0) { 1582 ia32_pause(); 1583 if (--spins == 0) 1584 break; 1585 } 1586#ifdef SWTCH_OPTIM_STATS 1587 lazy_flush_smpipi++; 1588#endif 1589 } 1590 mtx_unlock_spin(&lazypmap_lock); 1591 if (spins == 0) 1592 printf("pmap_lazyfix: spun for 50000000\n"); 1593 } 1594} 1595 1596#else /* SMP */ 1597 1598/* 1599 * Cleaning up on uniprocessor is easy. For various reasons, we're 1600 * unlikely to have to even execute this code, including the fact 1601 * that the cleanup is deferred until the parent does a wait(2), which 1602 * means that another userland process has run. 1603 */ 1604static void 1605pmap_lazyfix(pmap_t pmap) 1606{ 1607 u_int cr3; 1608 1609 cr3 = vtophys(pmap->pm_pdir); 1610 if (cr3 == rcr3()) { 1611 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1612 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1613#ifdef SWTCH_OPTIM_STATS 1614 lazy_flush_fixup++; 1615#endif 1616 } 1617} 1618#endif /* SMP */ 1619#endif /* LAZY_SWITCH */ 1620 1621/* 1622 * Release any resources held by the given physical map. 1623 * Called when a pmap initialized by pmap_pinit is being released. 1624 * Should only be called if the map contains no valid mappings. 1625 */ 1626void 1627pmap_release(pmap_t pmap) 1628{ 1629 vm_object_t object; 1630 vm_page_t m; 1631 int i; 1632 1633 object = pmap->pm_pteobj; 1634 1635 KASSERT(object->ref_count == 1, 1636 ("pmap_release: pteobj reference count %d != 1", 1637 object->ref_count)); 1638 KASSERT(pmap->pm_stats.resident_count == 0, 1639 ("pmap_release: pmap resident count %ld != 0", 1640 pmap->pm_stats.resident_count)); 1641 1642#ifdef LAZY_SWITCH 1643 pmap_lazyfix(pmap); 1644#endif 1645 mtx_lock_spin(&allpmaps_lock); 1646 LIST_REMOVE(pmap, pm_list); 1647 mtx_unlock_spin(&allpmaps_lock); 1648 1649 bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 1650 sizeof(*pmap->pm_pdir)); 1651#ifdef SMP 1652 pmap->pm_pdir[MPPTDI] = 0; 1653#endif 1654 1655 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1656 1657 vm_page_lock_queues(); 1658 for (i = 0; i < NPGPTD; i++) { 1659 m = TAILQ_FIRST(&object->memq); 1660#ifdef PAE 1661 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1662 ("pmap_release: got wrong ptd page")); 1663#endif 1664 m->wire_count--; 1665 atomic_subtract_int(&cnt.v_wire_count, 1); 1666 vm_page_busy(m); 1667 vm_page_free_zero(m); 1668 } 1669 KASSERT(TAILQ_EMPTY(&object->memq), 1670 ("pmap_release: leaking page table pages")); 1671 vm_page_unlock_queues(); 1672} 1673 1674static int 1675kvm_size(SYSCTL_HANDLER_ARGS) 1676{ 1677 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1678 1679 return sysctl_handle_long(oidp, &ksize, 0, req); 1680} 1681SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1682 0, 0, kvm_size, "IU", "Size of KVM"); 1683 1684static int 1685kvm_free(SYSCTL_HANDLER_ARGS) 1686{ 1687 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1688 1689 return sysctl_handle_long(oidp, &kfree, 0, req); 1690} 1691SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1692 0, 0, kvm_free, "IU", "Amount of KVM free"); 1693 1694/* 1695 * grow the number of kernel page table entries, if needed 1696 */ 1697void 1698pmap_growkernel(vm_offset_t addr) 1699{ 1700 struct pmap *pmap; 1701 int s; 1702 vm_paddr_t ptppaddr; 1703 vm_page_t nkpg; 1704 pd_entry_t newpdir; 1705 pt_entry_t *pde; 1706 1707 s = splhigh(); 1708 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1709 if (kernel_vm_end == 0) { 1710 kernel_vm_end = KERNBASE; 1711 nkpt = 0; 1712 while (pdir_pde(PTD, kernel_vm_end)) { 1713 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1714 nkpt++; 1715 } 1716 } 1717 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1718 while (kernel_vm_end < addr) { 1719 if (pdir_pde(PTD, kernel_vm_end)) { 1720 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1721 continue; 1722 } 1723 1724 /* 1725 * This index is bogus, but out of the way 1726 */ 1727 nkpg = vm_page_alloc(NULL, nkpt, 1728 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1729 if (!nkpg) 1730 panic("pmap_growkernel: no memory to grow kernel"); 1731 1732 nkpt++; 1733 1734 pmap_zero_page(nkpg); 1735 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1736 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1737 pdir_pde(PTD, kernel_vm_end) = newpdir; 1738 1739 mtx_lock_spin(&allpmaps_lock); 1740 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1741 pde = pmap_pde(pmap, kernel_vm_end); 1742 pde_store(pde, newpdir); 1743 } 1744 mtx_unlock_spin(&allpmaps_lock); 1745 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1746 } 1747 splx(s); 1748} 1749 1750 1751/*************************************************** 1752 * page management routines. 1753 ***************************************************/ 1754 1755/* 1756 * free the pv_entry back to the free list 1757 */ 1758static PMAP_INLINE void 1759free_pv_entry(pv_entry_t pv) 1760{ 1761 pv_entry_count--; 1762 uma_zfree(pvzone, pv); 1763} 1764 1765/* 1766 * get a new pv_entry, allocating a block from the system 1767 * when needed. 1768 * the memory allocation is performed bypassing the malloc code 1769 * because of the possibility of allocations at interrupt time. 1770 */ 1771static pv_entry_t 1772get_pv_entry(void) 1773{ 1774 pv_entry_count++; 1775 if (pv_entry_high_water && 1776 (pv_entry_count > pv_entry_high_water) && 1777 (pmap_pagedaemon_waken == 0)) { 1778 pmap_pagedaemon_waken = 1; 1779 wakeup (&vm_pages_needed); 1780 } 1781 return uma_zalloc(pvzone, M_NOWAIT); 1782} 1783 1784/* 1785 * If it is the first entry on the list, it is actually 1786 * in the header and we must copy the following entry up 1787 * to the header. Otherwise we must search the list for 1788 * the entry. In either case we free the now unused entry. 1789 */ 1790 1791static int 1792pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1793{ 1794 pv_entry_t pv; 1795 int rtval; 1796 int s; 1797 1798 s = splvm(); 1799 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1800 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1801 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1802 if (pmap == pv->pv_pmap && va == pv->pv_va) 1803 break; 1804 } 1805 } else { 1806 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1807 if (va == pv->pv_va) 1808 break; 1809 } 1810 } 1811 1812 rtval = 0; 1813 if (pv) { 1814 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1815 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1816 m->md.pv_list_count--; 1817 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1818 vm_page_flag_clear(m, PG_WRITEABLE); 1819 1820 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1821 free_pv_entry(pv); 1822 } 1823 1824 splx(s); 1825 return rtval; 1826} 1827 1828/* 1829 * Create a pv entry for page at pa for 1830 * (pmap, va). 1831 */ 1832static void 1833pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1834{ 1835 1836 int s; 1837 pv_entry_t pv; 1838 1839 s = splvm(); 1840 pv = get_pv_entry(); 1841 pv->pv_va = va; 1842 pv->pv_pmap = pmap; 1843 pv->pv_ptem = mpte; 1844 1845 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1846 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1847 m->md.pv_list_count++; 1848 1849 splx(s); 1850} 1851 1852/* 1853 * pmap_remove_pte: do the things to unmap a page in a process 1854 */ 1855static int 1856pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1857{ 1858 pt_entry_t oldpte; 1859 vm_page_t m; 1860 1861 oldpte = pte_load_clear(ptq); 1862 if (oldpte & PG_W) 1863 pmap->pm_stats.wired_count -= 1; 1864 /* 1865 * Machines that don't support invlpg, also don't support 1866 * PG_G. 1867 */ 1868 if (oldpte & PG_G) 1869 pmap_invalidate_page(kernel_pmap, va); 1870 pmap->pm_stats.resident_count -= 1; 1871 if (oldpte & PG_MANAGED) { 1872 m = PHYS_TO_VM_PAGE(oldpte); 1873 if (oldpte & PG_M) { 1874#if defined(PMAP_DIAGNOSTIC) 1875 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1876 printf( 1877 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1878 va, oldpte); 1879 } 1880#endif 1881 if (pmap_track_modified(va)) 1882 vm_page_dirty(m); 1883 } 1884 if (oldpte & PG_A) 1885 vm_page_flag_set(m, PG_REFERENCED); 1886 return pmap_remove_entry(pmap, m, va); 1887 } else { 1888 return pmap_unuse_pt(pmap, va, NULL); 1889 } 1890 1891 return 0; 1892} 1893 1894/* 1895 * Remove a single page from a process address space 1896 */ 1897static void 1898pmap_remove_page(pmap_t pmap, vm_offset_t va) 1899{ 1900 pt_entry_t *pte; 1901 1902 if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) 1903 return; 1904 pmap_remove_pte(pmap, pte, va); 1905 pmap_invalidate_page(pmap, va); 1906} 1907 1908/* 1909 * Remove the given range of addresses from the specified map. 1910 * 1911 * It is assumed that the start and end are properly 1912 * rounded to the page size. 1913 */ 1914void 1915pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1916{ 1917 vm_offset_t pdnxt; 1918 pd_entry_t ptpaddr; 1919 pt_entry_t *pte; 1920 int anyvalid; 1921 1922 if (pmap == NULL) 1923 return; 1924 1925 if (pmap->pm_stats.resident_count == 0) 1926 return; 1927 1928 /* 1929 * special handling of removing one page. a very 1930 * common operation and easy to short circuit some 1931 * code. 1932 */ 1933 if ((sva + PAGE_SIZE == eva) && 1934 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1935 pmap_remove_page(pmap, sva); 1936 return; 1937 } 1938 1939 anyvalid = 0; 1940 1941 for (; sva < eva; sva = pdnxt) { 1942 unsigned pdirindex; 1943 1944 /* 1945 * Calculate index for next page table. 1946 */ 1947 pdnxt = (sva + NBPDR) & ~PDRMASK; 1948 if (pmap->pm_stats.resident_count == 0) 1949 break; 1950 1951 pdirindex = sva >> PDRSHIFT; 1952 ptpaddr = pmap->pm_pdir[pdirindex]; 1953 1954 /* 1955 * Weed out invalid mappings. Note: we assume that the page 1956 * directory table is always allocated, and in kernel virtual. 1957 */ 1958 if (ptpaddr == 0) 1959 continue; 1960 1961 /* 1962 * Check for large page. 1963 */ 1964 if ((ptpaddr & PG_PS) != 0) { 1965 pmap->pm_pdir[pdirindex] = 0; 1966 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1967 anyvalid = 1; 1968 continue; 1969 } 1970 1971 /* 1972 * Limit our scan to either the end of the va represented 1973 * by the current page table page, or to the end of the 1974 * range being removed. 1975 */ 1976 if (pdnxt > eva) 1977 pdnxt = eva; 1978 1979 for (; sva != pdnxt; sva += PAGE_SIZE) { 1980 if ((pte = pmap_pte_quick(pmap, sva)) == NULL || 1981 *pte == 0) 1982 continue; 1983 anyvalid = 1; 1984 if (pmap_remove_pte(pmap, pte, sva)) 1985 break; 1986 } 1987 } 1988 1989 if (anyvalid) 1990 pmap_invalidate_all(pmap); 1991} 1992 1993/* 1994 * Routine: pmap_remove_all 1995 * Function: 1996 * Removes this physical page from 1997 * all physical maps in which it resides. 1998 * Reflects back modify bits to the pager. 1999 * 2000 * Notes: 2001 * Original versions of this routine were very 2002 * inefficient because they iteratively called 2003 * pmap_remove (slow...) 2004 */ 2005 2006void 2007pmap_remove_all(vm_page_t m) 2008{ 2009 register pv_entry_t pv; 2010 pt_entry_t *pte, tpte; 2011 int s; 2012 2013#if defined(PMAP_DIAGNOSTIC) 2014 /* 2015 * XXX This makes pmap_remove_all() illegal for non-managed pages! 2016 */ 2017 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2018 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", 2019 VM_PAGE_TO_PHYS(m)); 2020 } 2021#endif 2022 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2023 s = splvm(); 2024 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2025 pv->pv_pmap->pm_stats.resident_count--; 2026 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2027 tpte = pte_load_clear(pte); 2028 if (tpte & PG_W) 2029 pv->pv_pmap->pm_stats.wired_count--; 2030 if (tpte & PG_A) 2031 vm_page_flag_set(m, PG_REFERENCED); 2032 2033 /* 2034 * Update the vm_page_t clean and reference bits. 2035 */ 2036 if (tpte & PG_M) { 2037#if defined(PMAP_DIAGNOSTIC) 2038 if (pmap_nw_modified((pt_entry_t) tpte)) { 2039 printf( 2040 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 2041 pv->pv_va, tpte); 2042 } 2043#endif 2044 if (pmap_track_modified(pv->pv_va)) 2045 vm_page_dirty(m); 2046 } 2047 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2048 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2049 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2050 m->md.pv_list_count--; 2051 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2052 free_pv_entry(pv); 2053 } 2054 vm_page_flag_clear(m, PG_WRITEABLE); 2055 splx(s); 2056} 2057 2058/* 2059 * Set the physical protection on the 2060 * specified range of this map as requested. 2061 */ 2062void 2063pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2064{ 2065 vm_offset_t pdnxt; 2066 pd_entry_t ptpaddr; 2067 int anychanged; 2068 2069 if (pmap == NULL) 2070 return; 2071 2072 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2073 pmap_remove(pmap, sva, eva); 2074 return; 2075 } 2076 2077 if (prot & VM_PROT_WRITE) 2078 return; 2079 2080 anychanged = 0; 2081 2082 for (; sva < eva; sva = pdnxt) { 2083 unsigned pdirindex; 2084 2085 pdnxt = (sva + NBPDR) & ~PDRMASK; 2086 2087 pdirindex = sva >> PDRSHIFT; 2088 ptpaddr = pmap->pm_pdir[pdirindex]; 2089 2090 /* 2091 * Weed out invalid mappings. Note: we assume that the page 2092 * directory table is always allocated, and in kernel virtual. 2093 */ 2094 if (ptpaddr == 0) 2095 continue; 2096 2097 /* 2098 * Check for large page. 2099 */ 2100 if ((ptpaddr & PG_PS) != 0) { 2101 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2102 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2103 anychanged = 1; 2104 continue; 2105 } 2106 2107 if (pdnxt > eva) 2108 pdnxt = eva; 2109 2110 for (; sva != pdnxt; sva += PAGE_SIZE) { 2111 pt_entry_t pbits; 2112 pt_entry_t *pte; 2113 vm_page_t m; 2114 2115 if ((pte = pmap_pte_quick(pmap, sva)) == NULL) 2116 continue; 2117 pbits = *pte; 2118 if (pbits & PG_MANAGED) { 2119 m = NULL; 2120 if (pbits & PG_A) { 2121 m = PHYS_TO_VM_PAGE(pbits); 2122 vm_page_flag_set(m, PG_REFERENCED); 2123 pbits &= ~PG_A; 2124 } 2125 if ((pbits & PG_M) != 0 && 2126 pmap_track_modified(sva)) { 2127 if (m == NULL) 2128 m = PHYS_TO_VM_PAGE(pbits); 2129 vm_page_dirty(m); 2130 pbits &= ~PG_M; 2131 } 2132 } 2133 2134 pbits &= ~PG_RW; 2135 2136 if (pbits != *pte) { 2137 pte_store(pte, pbits); 2138 anychanged = 1; 2139 } 2140 } 2141 } 2142 if (anychanged) 2143 pmap_invalidate_all(pmap); 2144} 2145 2146/* 2147 * Insert the given physical page (p) at 2148 * the specified virtual address (v) in the 2149 * target physical map with the protection requested. 2150 * 2151 * If specified, the page will be wired down, meaning 2152 * that the related pte can not be reclaimed. 2153 * 2154 * NB: This is the only routine which MAY NOT lazy-evaluate 2155 * or lose information. That is, this routine must actually 2156 * insert this page into the given map NOW. 2157 */ 2158void 2159pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2160 boolean_t wired) 2161{ 2162 vm_paddr_t pa; 2163 register pt_entry_t *pte; 2164 vm_paddr_t opa; 2165 pt_entry_t origpte, newpte; 2166 vm_page_t mpte; 2167 2168 if (pmap == NULL) 2169 return; 2170 2171 va &= PG_FRAME; 2172#ifdef PMAP_DIAGNOSTIC 2173 if (va > VM_MAX_KERNEL_ADDRESS) 2174 panic("pmap_enter: toobig"); 2175 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 2176 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 2177#endif 2178 2179 mpte = NULL; 2180 /* 2181 * In the case that a page table page is not 2182 * resident, we are creating it here. 2183 */ 2184 if (va < VM_MAXUSER_ADDRESS) { 2185 mpte = pmap_allocpte(pmap, va); 2186 } 2187#if 0 && defined(PMAP_DIAGNOSTIC) 2188 else { 2189 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2190 origpte = *pdeaddr; 2191 if ((origpte & PG_V) == 0) { 2192 panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 2193 pmap->pm_pdir[PTDPTDI], origpte, va); 2194 } 2195 } 2196#endif 2197 2198 pte = pmap_pte_quick(pmap, va); 2199 2200 /* 2201 * Page Directory table entry not valid, we need a new PT page 2202 */ 2203 if (pte == NULL) { 2204 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", 2205 (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 2206 } 2207 2208 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 2209 origpte = *pte; 2210 opa = origpte & PG_FRAME; 2211 2212 if (origpte & PG_PS) 2213 panic("pmap_enter: attempted pmap_enter on 4MB page"); 2214 2215 /* 2216 * Mapping has not changed, must be protection or wiring change. 2217 */ 2218 if (origpte && (opa == pa)) { 2219 /* 2220 * Wiring change, just update stats. We don't worry about 2221 * wiring PT pages as they remain resident as long as there 2222 * are valid mappings in them. Hence, if a user page is wired, 2223 * the PT page will be also. 2224 */ 2225 if (wired && ((origpte & PG_W) == 0)) 2226 pmap->pm_stats.wired_count++; 2227 else if (!wired && (origpte & PG_W)) 2228 pmap->pm_stats.wired_count--; 2229 2230#if defined(PMAP_DIAGNOSTIC) 2231 if (pmap_nw_modified((pt_entry_t) origpte)) { 2232 printf( 2233 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 2234 va, origpte); 2235 } 2236#endif 2237 2238 /* 2239 * Remove extra pte reference 2240 */ 2241 if (mpte) 2242 mpte->hold_count--; 2243 2244 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 2245 if ((origpte & PG_RW) == 0) { 2246 pte_store(pte, origpte | PG_RW); 2247 pmap_invalidate_page(pmap, va); 2248 } 2249 return; 2250 } 2251 2252 /* 2253 * We might be turning off write access to the page, 2254 * so we go ahead and sense modify status. 2255 */ 2256 if (origpte & PG_MANAGED) { 2257 if ((origpte & PG_M) && pmap_track_modified(va)) { 2258 vm_page_t om; 2259 om = PHYS_TO_VM_PAGE(opa); 2260 vm_page_dirty(om); 2261 } 2262 pa |= PG_MANAGED; 2263 } 2264 goto validate; 2265 } 2266 /* 2267 * Mapping has changed, invalidate old range and fall through to 2268 * handle validating new mapping. 2269 */ 2270 if (opa) { 2271 int err; 2272 vm_page_lock_queues(); 2273 err = pmap_remove_pte(pmap, pte, va); 2274 vm_page_unlock_queues(); 2275 if (err) 2276 panic("pmap_enter: pte vanished, va: 0x%x", va); 2277 } 2278 2279 /* 2280 * Enter on the PV list if part of our managed memory. Note that we 2281 * raise IPL while manipulating pv_table since pmap_enter can be 2282 * called at interrupt time. 2283 */ 2284 if (pmap_initialized && 2285 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2286 pmap_insert_entry(pmap, va, mpte, m); 2287 pa |= PG_MANAGED; 2288 } 2289 2290 /* 2291 * Increment counters 2292 */ 2293 pmap->pm_stats.resident_count++; 2294 if (wired) 2295 pmap->pm_stats.wired_count++; 2296 2297validate: 2298 /* 2299 * Now validate mapping with desired protection/wiring. 2300 */ 2301 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); 2302 2303 if (wired) 2304 newpte |= PG_W; 2305 if (va < VM_MAXUSER_ADDRESS) 2306 newpte |= PG_U; 2307 if (pmap == kernel_pmap) 2308 newpte |= pgeflag; 2309 2310 /* 2311 * if the mapping or permission bits are different, we need 2312 * to update the pte. 2313 */ 2314 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2315 pte_store(pte, newpte | PG_A); 2316 /*if (origpte)*/ { 2317 pmap_invalidate_page(pmap, va); 2318 } 2319 } 2320} 2321 2322/* 2323 * this code makes some *MAJOR* assumptions: 2324 * 1. Current pmap & pmap exists. 2325 * 2. Not wired. 2326 * 3. Read access. 2327 * 4. No page table pages. 2328 * 5. Tlbflush is deferred to calling procedure. 2329 * 6. Page IS managed. 2330 * but is *MUCH* faster than pmap_enter... 2331 */ 2332 2333static vm_page_t 2334pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2335{ 2336 pt_entry_t *pte; 2337 vm_paddr_t pa; 2338 2339 /* 2340 * In the case that a page table page is not 2341 * resident, we are creating it here. 2342 */ 2343 if (va < VM_MAXUSER_ADDRESS) { 2344 unsigned ptepindex; 2345 pd_entry_t ptepa; 2346 2347 /* 2348 * Calculate pagetable page index 2349 */ 2350 ptepindex = va >> PDRSHIFT; 2351 if (mpte && (mpte->pindex == ptepindex)) { 2352 mpte->hold_count++; 2353 } else { 2354retry: 2355 /* 2356 * Get the page directory entry 2357 */ 2358 ptepa = pmap->pm_pdir[ptepindex]; 2359 2360 /* 2361 * If the page table page is mapped, we just increment 2362 * the hold count, and activate it. 2363 */ 2364 if (ptepa) { 2365 if (ptepa & PG_PS) 2366 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2367 if (pmap->pm_pteobj->root && 2368 (pmap->pm_pteobj->root->pindex == ptepindex)) { 2369 mpte = pmap->pm_pteobj->root; 2370 } else { 2371 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 2372 } 2373 if (mpte == NULL) 2374 goto retry; 2375 mpte->hold_count++; 2376 } else { 2377 mpte = _pmap_allocpte(pmap, ptepindex); 2378 } 2379 } 2380 } else { 2381 mpte = NULL; 2382 } 2383 2384 /* 2385 * This call to vtopte makes the assumption that we are 2386 * entering the page into the current pmap. In order to support 2387 * quick entry into any pmap, one would likely use pmap_pte_quick. 2388 * But that isn't as quick as vtopte. 2389 */ 2390 pte = vtopte(va); 2391 if (*pte) { 2392 if (mpte != NULL) { 2393 vm_page_lock_queues(); 2394 pmap_unwire_pte_hold(pmap, mpte); 2395 vm_page_unlock_queues(); 2396 } 2397 return 0; 2398 } 2399 2400 /* 2401 * Enter on the PV list if part of our managed memory. Note that we 2402 * raise IPL while manipulating pv_table since pmap_enter can be 2403 * called at interrupt time. 2404 */ 2405 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2406 pmap_insert_entry(pmap, va, mpte, m); 2407 2408 /* 2409 * Increment counters 2410 */ 2411 pmap->pm_stats.resident_count++; 2412 2413 pa = VM_PAGE_TO_PHYS(m); 2414 2415 /* 2416 * Now validate mapping with RO protection 2417 */ 2418 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2419 pte_store(pte, pa | PG_V | PG_U); 2420 else 2421 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2422 2423 return mpte; 2424} 2425 2426/* 2427 * Make a temporary mapping for a physical address. This is only intended 2428 * to be used for panic dumps. 2429 */ 2430void * 2431pmap_kenter_temporary(vm_offset_t pa, int i) 2432{ 2433 vm_offset_t va; 2434 2435 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2436 pmap_kenter(va, pa); 2437#ifndef I386_CPU 2438 invlpg(va); 2439#else 2440 invltlb(); 2441#endif 2442 return ((void *)crashdumpmap); 2443} 2444 2445#define MAX_INIT_PT (96) 2446/* 2447 * pmap_object_init_pt preloads the ptes for a given object 2448 * into the specified pmap. This eliminates the blast of soft 2449 * faults on process startup and immediately after an mmap. 2450 */ 2451void 2452pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2453 vm_object_t object, vm_pindex_t pindex, 2454 vm_size_t size, int limit) 2455{ 2456 vm_offset_t tmpidx; 2457 int psize; 2458 vm_page_t p, mpte; 2459 2460 if (pmap == NULL || object == NULL) 2461 return; 2462 VM_OBJECT_LOCK(object); 2463 /* 2464 * This code maps large physical mmap regions into the 2465 * processor address space. Note that some shortcuts 2466 * are taken, but the code works. 2467 */ 2468 if (pseflag && (object->type == OBJT_DEVICE) && 2469 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2470 int i; 2471 vm_page_t m[1]; 2472 unsigned int ptepindex; 2473 int npdes; 2474 pd_entry_t ptepa; 2475 2476 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2477 goto unlock_return; 2478retry: 2479 p = vm_page_lookup(object, pindex); 2480 if (p != NULL) { 2481 vm_page_lock_queues(); 2482 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2483 goto retry; 2484 } else { 2485 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2486 if (p == NULL) 2487 goto unlock_return; 2488 m[0] = p; 2489 2490 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2491 vm_page_lock_queues(); 2492 vm_page_free(p); 2493 vm_page_unlock_queues(); 2494 goto unlock_return; 2495 } 2496 2497 p = vm_page_lookup(object, pindex); 2498 vm_page_lock_queues(); 2499 vm_page_wakeup(p); 2500 } 2501 vm_page_unlock_queues(); 2502 2503 ptepa = VM_PAGE_TO_PHYS(p); 2504 if (ptepa & (NBPDR - 1)) { 2505 goto unlock_return; 2506 } 2507 2508 p->valid = VM_PAGE_BITS_ALL; 2509 2510 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2511 npdes = size >> PDRSHIFT; 2512 for(i = 0; i < npdes; i++) { 2513 pde_store(&pmap->pm_pdir[ptepindex], 2514 ptepa | PG_U | PG_RW | PG_V | PG_PS); 2515 ptepa += NBPDR; 2516 ptepindex += 1; 2517 } 2518 pmap_invalidate_all(kernel_pmap); 2519 goto unlock_return; 2520 } 2521 2522 psize = i386_btop(size); 2523 2524 if ((object->type != OBJT_VNODE) || 2525 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2526 (object->resident_page_count > MAX_INIT_PT))) { 2527 goto unlock_return; 2528 } 2529 2530 if (psize + pindex > object->size) { 2531 if (object->size < pindex) 2532 goto unlock_return; 2533 psize = object->size - pindex; 2534 } 2535 2536 mpte = NULL; 2537 2538 if ((p = TAILQ_FIRST(&object->memq)) != NULL) { 2539 if (p->pindex < pindex) { 2540 p = vm_page_splay(pindex, object->root); 2541 if ((object->root = p)->pindex < pindex) 2542 p = TAILQ_NEXT(p, listq); 2543 } 2544 } 2545 /* 2546 * Assert: the variable p is either (1) the page with the 2547 * least pindex greater than or equal to the parameter pindex 2548 * or (2) NULL. 2549 */ 2550 for (; 2551 p != NULL && (tmpidx = p->pindex - pindex) < psize; 2552 p = TAILQ_NEXT(p, listq)) { 2553 /* 2554 * don't allow an madvise to blow away our really 2555 * free pages allocating pv entries. 2556 */ 2557 if ((limit & MAP_PREFAULT_MADVISE) && 2558 cnt.v_free_count < cnt.v_free_reserved) { 2559 break; 2560 } 2561 vm_page_lock_queues(); 2562 if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL && 2563 (p->busy == 0) && 2564 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2565 if ((p->queue - p->pc) == PQ_CACHE) 2566 vm_page_deactivate(p); 2567 vm_page_busy(p); 2568 vm_page_unlock_queues(); 2569 VM_OBJECT_UNLOCK(object); 2570 mpte = pmap_enter_quick(pmap, 2571 addr + i386_ptob(tmpidx), p, mpte); 2572 VM_OBJECT_LOCK(object); 2573 vm_page_lock_queues(); 2574 vm_page_wakeup(p); 2575 } 2576 vm_page_unlock_queues(); 2577 } 2578unlock_return: 2579 VM_OBJECT_UNLOCK(object); 2580} 2581 2582/* 2583 * pmap_prefault provides a quick way of clustering 2584 * pagefaults into a processes address space. It is a "cousin" 2585 * of pmap_object_init_pt, except it runs at page fault time instead 2586 * of mmap time. 2587 */ 2588#define PFBAK 4 2589#define PFFOR 4 2590#define PAGEORDER_SIZE (PFBAK+PFFOR) 2591 2592static int pmap_prefault_pageorder[] = { 2593 -1 * PAGE_SIZE, 1 * PAGE_SIZE, 2594 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2595 -3 * PAGE_SIZE, 3 * PAGE_SIZE, 2596 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2597}; 2598 2599void 2600pmap_prefault(pmap, addra, entry) 2601 pmap_t pmap; 2602 vm_offset_t addra; 2603 vm_map_entry_t entry; 2604{ 2605 int i; 2606 vm_offset_t starta; 2607 vm_offset_t addr; 2608 vm_pindex_t pindex; 2609 vm_page_t m, mpte; 2610 vm_object_t object; 2611 2612 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2613 return; 2614 2615 object = entry->object.vm_object; 2616 2617 starta = addra - PFBAK * PAGE_SIZE; 2618 if (starta < entry->start) { 2619 starta = entry->start; 2620 } else if (starta > addra) { 2621 starta = 0; 2622 } 2623 2624 mpte = NULL; 2625 for (i = 0; i < PAGEORDER_SIZE; i++) { 2626 vm_object_t lobject; 2627 pt_entry_t *pte; 2628 2629 addr = addra + pmap_prefault_pageorder[i]; 2630 if (addr > addra + (PFFOR * PAGE_SIZE)) 2631 addr = 0; 2632 2633 if (addr < starta || addr >= entry->end) 2634 continue; 2635 2636 if ((*pmap_pde(pmap, addr)) == 0) 2637 continue; 2638 2639 pte = vtopte(addr); 2640 if (*pte) 2641 continue; 2642 2643 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2644 lobject = object; 2645 for (m = vm_page_lookup(lobject, pindex); 2646 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 2647 lobject = lobject->backing_object) { 2648 if (lobject->backing_object_offset & PAGE_MASK) 2649 break; 2650 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 2651 m = vm_page_lookup(lobject->backing_object, pindex); 2652 } 2653 2654 /* 2655 * give-up when a page is not in memory 2656 */ 2657 if (m == NULL) 2658 break; 2659 vm_page_lock_queues(); 2660 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2661 (m->busy == 0) && 2662 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2663 2664 if ((m->queue - m->pc) == PQ_CACHE) { 2665 vm_page_deactivate(m); 2666 } 2667 vm_page_busy(m); 2668 vm_page_unlock_queues(); 2669 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2670 vm_page_lock_queues(); 2671 vm_page_wakeup(m); 2672 } 2673 vm_page_unlock_queues(); 2674 } 2675} 2676 2677/* 2678 * Routine: pmap_change_wiring 2679 * Function: Change the wiring attribute for a map/virtual-address 2680 * pair. 2681 * In/out conditions: 2682 * The mapping must already exist in the pmap. 2683 */ 2684void 2685pmap_change_wiring(pmap, va, wired) 2686 register pmap_t pmap; 2687 vm_offset_t va; 2688 boolean_t wired; 2689{ 2690 register pt_entry_t *pte; 2691 2692 if (pmap == NULL) 2693 return; 2694 2695 pte = pmap_pte_quick(pmap, va); 2696 2697 if (wired && !pmap_pte_w(pte)) 2698 pmap->pm_stats.wired_count++; 2699 else if (!wired && pmap_pte_w(pte)) 2700 pmap->pm_stats.wired_count--; 2701 2702 /* 2703 * Wiring is not a hardware characteristic so there is no need to 2704 * invalidate TLB. 2705 */ 2706 pmap_pte_set_w(pte, wired); 2707} 2708 2709 2710 2711/* 2712 * Copy the range specified by src_addr/len 2713 * from the source map to the range dst_addr/len 2714 * in the destination map. 2715 * 2716 * This routine is only advisory and need not do anything. 2717 */ 2718 2719void 2720pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2721 vm_offset_t src_addr) 2722{ 2723 vm_offset_t addr; 2724 vm_offset_t end_addr = src_addr + len; 2725 vm_offset_t pdnxt; 2726 vm_page_t m; 2727 2728 if (dst_addr != src_addr) 2729 return; 2730 2731 if (!pmap_is_current(src_pmap)) 2732 return; 2733 2734 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2735 pt_entry_t *src_pte, *dst_pte; 2736 vm_page_t dstmpte, srcmpte; 2737 pd_entry_t srcptepaddr; 2738 unsigned ptepindex; 2739 2740 if (addr >= UPT_MIN_ADDRESS) 2741 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2742 2743 /* 2744 * Don't let optional prefaulting of pages make us go 2745 * way below the low water mark of free pages or way 2746 * above high water mark of used pv entries. 2747 */ 2748 if (cnt.v_free_count < cnt.v_free_reserved || 2749 pv_entry_count > pv_entry_high_water) 2750 break; 2751 2752 pdnxt = (addr + NBPDR) & ~PDRMASK; 2753 ptepindex = addr >> PDRSHIFT; 2754 2755 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2756 if (srcptepaddr == 0) 2757 continue; 2758 2759 if (srcptepaddr & PG_PS) { 2760 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2761 dst_pmap->pm_pdir[ptepindex] = srcptepaddr; 2762 dst_pmap->pm_stats.resident_count += 2763 NBPDR / PAGE_SIZE; 2764 } 2765 continue; 2766 } 2767 2768 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2769 if ((srcmpte == NULL) || 2770 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2771 continue; 2772 2773 if (pdnxt > end_addr) 2774 pdnxt = end_addr; 2775 2776 src_pte = vtopte(addr); 2777 while (addr < pdnxt) { 2778 pt_entry_t ptetemp; 2779 ptetemp = *src_pte; 2780 /* 2781 * we only virtual copy managed pages 2782 */ 2783 if ((ptetemp & PG_MANAGED) != 0) { 2784 /* 2785 * We have to check after allocpte for the 2786 * pte still being around... allocpte can 2787 * block. 2788 */ 2789 dstmpte = pmap_allocpte(dst_pmap, addr); 2790 dst_pte = pmap_pte_quick(dst_pmap, addr); 2791 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2792 /* 2793 * Clear the modified and 2794 * accessed (referenced) bits 2795 * during the copy. 2796 */ 2797 m = PHYS_TO_VM_PAGE(ptetemp); 2798 *dst_pte = ptetemp & ~(PG_M | PG_A); 2799 dst_pmap->pm_stats.resident_count++; 2800 pmap_insert_entry(dst_pmap, addr, 2801 dstmpte, m); 2802 } else { 2803 vm_page_lock_queues(); 2804 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2805 vm_page_unlock_queues(); 2806 } 2807 if (dstmpte->hold_count >= srcmpte->hold_count) 2808 break; 2809 } 2810 addr += PAGE_SIZE; 2811 src_pte++; 2812 } 2813 } 2814} 2815 2816#ifdef SMP 2817 2818/* 2819 * pmap_zpi_switchin*() 2820 * 2821 * These functions allow us to avoid doing IPIs alltogether in certain 2822 * temporary page-mapping situations (page zeroing). Instead to deal 2823 * with being preempted and moved onto a different cpu we invalidate 2824 * the page when the scheduler switches us in. This does not occur 2825 * very often so we remain relatively optimal with very little effort. 2826 */ 2827static void 2828pmap_zpi_switchin12(void) 2829{ 2830 invlpg((u_int)CADDR1); 2831 invlpg((u_int)CADDR2); 2832} 2833 2834static void 2835pmap_zpi_switchin2(void) 2836{ 2837 invlpg((u_int)CADDR2); 2838} 2839 2840static void 2841pmap_zpi_switchin3(void) 2842{ 2843 invlpg((u_int)CADDR3); 2844} 2845 2846#endif 2847 2848/* 2849 * pmap_zero_page zeros the specified hardware page by mapping 2850 * the page into KVM and using bzero to clear its contents. 2851 */ 2852void 2853pmap_zero_page(vm_page_t m) 2854{ 2855 2856 mtx_lock(&CMAPCADDR12_lock); 2857 if (*CMAP2) 2858 panic("pmap_zero_page: CMAP2 busy"); 2859 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2860#ifdef I386_CPU 2861 invltlb(); 2862#else 2863#ifdef SMP 2864 curthread->td_switchin = pmap_zpi_switchin2; 2865#endif 2866 invlpg((u_int)CADDR2); 2867#endif 2868#if defined(I686_CPU) 2869 if (cpu_class == CPUCLASS_686) 2870 i686_pagezero(CADDR2); 2871 else 2872#endif 2873 bzero(CADDR2, PAGE_SIZE); 2874#ifdef SMP 2875 curthread->td_switchin = NULL; 2876#endif 2877 *CMAP2 = 0; 2878 mtx_unlock(&CMAPCADDR12_lock); 2879} 2880 2881/* 2882 * pmap_zero_page_area zeros the specified hardware page by mapping 2883 * the page into KVM and using bzero to clear its contents. 2884 * 2885 * off and size may not cover an area beyond a single hardware page. 2886 */ 2887void 2888pmap_zero_page_area(vm_page_t m, int off, int size) 2889{ 2890 2891 mtx_lock(&CMAPCADDR12_lock); 2892 if (*CMAP2) 2893 panic("pmap_zero_page: CMAP2 busy"); 2894 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2895#ifdef I386_CPU 2896 invltlb(); 2897#else 2898#ifdef SMP 2899 curthread->td_switchin = pmap_zpi_switchin2; 2900#endif 2901 invlpg((u_int)CADDR2); 2902#endif 2903#if defined(I686_CPU) 2904 if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) 2905 i686_pagezero(CADDR2); 2906 else 2907#endif 2908 bzero((char *)CADDR2 + off, size); 2909#ifdef SMP 2910 curthread->td_switchin = NULL; 2911#endif 2912 *CMAP2 = 0; 2913 mtx_unlock(&CMAPCADDR12_lock); 2914} 2915 2916/* 2917 * pmap_zero_page_idle zeros the specified hardware page by mapping 2918 * the page into KVM and using bzero to clear its contents. This 2919 * is intended to be called from the vm_pagezero process only and 2920 * outside of Giant. 2921 */ 2922void 2923pmap_zero_page_idle(vm_page_t m) 2924{ 2925 2926 if (*CMAP3) 2927 panic("pmap_zero_page: CMAP3 busy"); 2928 *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2929#ifdef I386_CPU 2930 invltlb(); 2931#else 2932#ifdef SMP 2933 curthread->td_switchin = pmap_zpi_switchin3; 2934#endif 2935 invlpg((u_int)CADDR3); 2936#endif 2937#if defined(I686_CPU) 2938 if (cpu_class == CPUCLASS_686) 2939 i686_pagezero(CADDR3); 2940 else 2941#endif 2942 bzero(CADDR3, PAGE_SIZE); 2943#ifdef SMP 2944 curthread->td_switchin = NULL; 2945#endif 2946 *CMAP3 = 0; 2947} 2948 2949/* 2950 * pmap_copy_page copies the specified (machine independent) 2951 * page by mapping the page into virtual memory and using 2952 * bcopy to copy the page, one machine dependent page at a 2953 * time. 2954 */ 2955void 2956pmap_copy_page(vm_page_t src, vm_page_t dst) 2957{ 2958 2959 mtx_lock(&CMAPCADDR12_lock); 2960 if (*CMAP1) 2961 panic("pmap_copy_page: CMAP1 busy"); 2962 if (*CMAP2) 2963 panic("pmap_copy_page: CMAP2 busy"); 2964 *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; 2965 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; 2966#ifdef I386_CPU 2967 invltlb(); 2968#else 2969#ifdef SMP 2970 curthread->td_switchin = pmap_zpi_switchin12; 2971#endif 2972 invlpg((u_int)CADDR1); 2973 invlpg((u_int)CADDR2); 2974#endif 2975 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2976#ifdef SMP 2977 curthread->td_switchin = NULL; 2978#endif 2979 *CMAP1 = 0; 2980 *CMAP2 = 0; 2981 mtx_unlock(&CMAPCADDR12_lock); 2982} 2983 2984/* 2985 * Returns true if the pmap's pv is one of the first 2986 * 16 pvs linked to from this page. This count may 2987 * be changed upwards or downwards in the future; it 2988 * is only necessary that true be returned for a small 2989 * subset of pmaps for proper page aging. 2990 */ 2991boolean_t 2992pmap_page_exists_quick(pmap, m) 2993 pmap_t pmap; 2994 vm_page_t m; 2995{ 2996 pv_entry_t pv; 2997 int loops = 0; 2998 int s; 2999 3000 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3001 return FALSE; 3002 3003 s = splvm(); 3004 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3005 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3006 if (pv->pv_pmap == pmap) { 3007 splx(s); 3008 return TRUE; 3009 } 3010 loops++; 3011 if (loops >= 16) 3012 break; 3013 } 3014 splx(s); 3015 return (FALSE); 3016} 3017 3018#define PMAP_REMOVE_PAGES_CURPROC_ONLY 3019/* 3020 * Remove all pages from specified address space 3021 * this aids process exit speeds. Also, this code 3022 * is special cased for current process only, but 3023 * can have the more generic (and slightly slower) 3024 * mode enabled. This is much faster than pmap_remove 3025 * in the case of running down an entire address space. 3026 */ 3027void 3028pmap_remove_pages(pmap, sva, eva) 3029 pmap_t pmap; 3030 vm_offset_t sva, eva; 3031{ 3032 pt_entry_t *pte, tpte; 3033 vm_page_t m; 3034 pv_entry_t pv, npv; 3035 int s; 3036 3037#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 3038 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 3039 printf("warning: pmap_remove_pages called with non-current pmap\n"); 3040 return; 3041 } 3042#endif 3043 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3044 s = splvm(); 3045 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 3046 3047 if (pv->pv_va >= eva || pv->pv_va < sva) { 3048 npv = TAILQ_NEXT(pv, pv_plist); 3049 continue; 3050 } 3051 3052#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 3053 pte = vtopte(pv->pv_va); 3054#else 3055 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3056#endif 3057 tpte = *pte; 3058 3059 if (tpte == 0) { 3060 printf("TPTE at %p IS ZERO @ VA %08x\n", 3061 pte, pv->pv_va); 3062 panic("bad pte"); 3063 } 3064 3065/* 3066 * We cannot remove wired pages from a process' mapping at this time 3067 */ 3068 if (tpte & PG_W) { 3069 npv = TAILQ_NEXT(pv, pv_plist); 3070 continue; 3071 } 3072 3073 m = PHYS_TO_VM_PAGE(tpte); 3074 KASSERT(m->phys_addr == (tpte & PG_FRAME), 3075 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3076 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 3077 3078 KASSERT(m < &vm_page_array[vm_page_array_size], 3079 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 3080 3081 pv->pv_pmap->pm_stats.resident_count--; 3082 3083 pte_clear(pte); 3084 3085 /* 3086 * Update the vm_page_t clean and reference bits. 3087 */ 3088 if (tpte & PG_M) { 3089 vm_page_dirty(m); 3090 } 3091 3092 npv = TAILQ_NEXT(pv, pv_plist); 3093 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 3094 3095 m->md.pv_list_count--; 3096 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3097 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 3098 vm_page_flag_clear(m, PG_WRITEABLE); 3099 } 3100 3101 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 3102 free_pv_entry(pv); 3103 } 3104 splx(s); 3105 pmap_invalidate_all(pmap); 3106} 3107 3108/* 3109 * pmap_is_modified: 3110 * 3111 * Return whether or not the specified physical page was modified 3112 * in any physical maps. 3113 */ 3114boolean_t 3115pmap_is_modified(vm_page_t m) 3116{ 3117 pv_entry_t pv; 3118 pt_entry_t *pte; 3119 int s; 3120 3121 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3122 return FALSE; 3123 3124 s = splvm(); 3125 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3126 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3127 /* 3128 * if the bit being tested is the modified bit, then 3129 * mark clean_map and ptes as never 3130 * modified. 3131 */ 3132 if (!pmap_track_modified(pv->pv_va)) 3133 continue; 3134#if defined(PMAP_DIAGNOSTIC) 3135 if (!pv->pv_pmap) { 3136 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 3137 continue; 3138 } 3139#endif 3140 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3141 if (*pte & PG_M) { 3142 splx(s); 3143 return TRUE; 3144 } 3145 } 3146 splx(s); 3147 return (FALSE); 3148} 3149 3150/* 3151 * this routine is used to modify bits in ptes 3152 */ 3153static __inline void 3154pmap_changebit(vm_page_t m, int bit, boolean_t setem) 3155{ 3156 register pv_entry_t pv; 3157 register pt_entry_t *pte; 3158 int s; 3159 3160 if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || 3161 (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 3162 return; 3163 3164 s = splvm(); 3165 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3166 /* 3167 * Loop over all current mappings setting/clearing as appropos If 3168 * setting RO do we need to clear the VAC? 3169 */ 3170 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3171 /* 3172 * don't write protect pager mappings 3173 */ 3174 if (!setem && (bit == PG_RW)) { 3175 if (!pmap_track_modified(pv->pv_va)) 3176 continue; 3177 } 3178 3179#if defined(PMAP_DIAGNOSTIC) 3180 if (!pv->pv_pmap) { 3181 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 3182 continue; 3183 } 3184#endif 3185 3186 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3187 3188 if (setem) { 3189 *pte |= bit; 3190 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3191 } else { 3192 pt_entry_t pbits = *pte; 3193 if (pbits & bit) { 3194 if (bit == PG_RW) { 3195 if (pbits & PG_M) { 3196 vm_page_dirty(m); 3197 } 3198 pte_store(pte, pbits & ~(PG_M|PG_RW)); 3199 } else { 3200 pte_store(pte, pbits & ~bit); 3201 } 3202 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3203 } 3204 } 3205 } 3206 if (!setem && bit == PG_RW) 3207 vm_page_flag_clear(m, PG_WRITEABLE); 3208 splx(s); 3209} 3210 3211/* 3212 * pmap_page_protect: 3213 * 3214 * Lower the permission for all mappings to a given page. 3215 */ 3216void 3217pmap_page_protect(vm_page_t m, vm_prot_t prot) 3218{ 3219 if ((prot & VM_PROT_WRITE) == 0) { 3220 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3221 pmap_changebit(m, PG_RW, FALSE); 3222 } else { 3223 pmap_remove_all(m); 3224 } 3225 } 3226} 3227 3228/* 3229 * pmap_ts_referenced: 3230 * 3231 * Return a count of reference bits for a page, clearing those bits. 3232 * It is not necessary for every reference bit to be cleared, but it 3233 * is necessary that 0 only be returned when there are truly no 3234 * reference bits set. 3235 * 3236 * XXX: The exact number of bits to check and clear is a matter that 3237 * should be tested and standardized at some point in the future for 3238 * optimal aging of shared pages. 3239 */ 3240int 3241pmap_ts_referenced(vm_page_t m) 3242{ 3243 register pv_entry_t pv, pvf, pvn; 3244 pt_entry_t *pte; 3245 pt_entry_t v; 3246 int s; 3247 int rtval = 0; 3248 3249 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3250 return (rtval); 3251 3252 s = splvm(); 3253 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3254 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3255 3256 pvf = pv; 3257 3258 do { 3259 pvn = TAILQ_NEXT(pv, pv_list); 3260 3261 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3262 3263 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3264 3265 if (!pmap_track_modified(pv->pv_va)) 3266 continue; 3267 3268 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 3269 3270 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 3271 pte_store(pte, v & ~PG_A); 3272 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 3273 3274 rtval++; 3275 if (rtval > 4) { 3276 break; 3277 } 3278 } 3279 } while ((pv = pvn) != NULL && pv != pvf); 3280 } 3281 splx(s); 3282 3283 return (rtval); 3284} 3285 3286/* 3287 * Clear the modify bits on the specified physical page. 3288 */ 3289void 3290pmap_clear_modify(vm_page_t m) 3291{ 3292 pmap_changebit(m, PG_M, FALSE); 3293} 3294 3295/* 3296 * pmap_clear_reference: 3297 * 3298 * Clear the reference bit on the specified physical page. 3299 */ 3300void 3301pmap_clear_reference(vm_page_t m) 3302{ 3303 pmap_changebit(m, PG_A, FALSE); 3304} 3305 3306/* 3307 * Miscellaneous support routines follow 3308 */ 3309 3310static void 3311i386_protection_init() 3312{ 3313 register int *kp, prot; 3314 3315 kp = protection_codes; 3316 for (prot = 0; prot < 8; prot++) { 3317 switch (prot) { 3318 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 3319 /* 3320 * Read access is also 0. There isn't any execute bit, 3321 * so just make it readable. 3322 */ 3323 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 3324 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 3325 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 3326 *kp++ = 0; 3327 break; 3328 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 3329 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 3330 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 3331 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 3332 *kp++ = PG_RW; 3333 break; 3334 } 3335 } 3336} 3337 3338/* 3339 * Map a set of physical memory pages into the kernel virtual 3340 * address space. Return a pointer to where it is mapped. This 3341 * routine is intended to be used for mapping device memory, 3342 * NOT real memory. 3343 */ 3344void * 3345pmap_mapdev(pa, size) 3346 vm_paddr_t pa; 3347 vm_size_t size; 3348{ 3349 vm_offset_t va, tmpva, offset; 3350 3351 offset = pa & PAGE_MASK; 3352 size = roundup(offset + size, PAGE_SIZE); 3353 3354 GIANT_REQUIRED; 3355 3356 va = kmem_alloc_pageable(kernel_map, size); 3357 if (!va) 3358 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3359 3360 pa = pa & PG_FRAME; 3361 for (tmpva = va; size > 0; ) { 3362 pmap_kenter(tmpva, pa); 3363 size -= PAGE_SIZE; 3364 tmpva += PAGE_SIZE; 3365 pa += PAGE_SIZE; 3366 } 3367 pmap_invalidate_range(kernel_pmap, va, tmpva); 3368 return ((void *)(va + offset)); 3369} 3370 3371void 3372pmap_unmapdev(va, size) 3373 vm_offset_t va; 3374 vm_size_t size; 3375{ 3376 vm_offset_t base, offset, tmpva; 3377 pt_entry_t *pte; 3378 3379 base = va & PG_FRAME; 3380 offset = va & PAGE_MASK; 3381 size = roundup(offset + size, PAGE_SIZE); 3382 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 3383 pte = vtopte(tmpva); 3384 pte_clear(pte); 3385 } 3386 pmap_invalidate_range(kernel_pmap, va, tmpva); 3387 kmem_free(kernel_map, base, size); 3388} 3389 3390/* 3391 * perform the pmap work for mincore 3392 */ 3393int 3394pmap_mincore(pmap, addr) 3395 pmap_t pmap; 3396 vm_offset_t addr; 3397{ 3398 pt_entry_t *ptep, pte; 3399 vm_page_t m; 3400 int val = 0; 3401 3402 ptep = pmap_pte_quick(pmap, addr); 3403 if (ptep == 0) { 3404 return 0; 3405 } 3406 3407 if ((pte = *ptep) != 0) { 3408 vm_paddr_t pa; 3409 3410 val = MINCORE_INCORE; 3411 if ((pte & PG_MANAGED) == 0) 3412 return val; 3413 3414 pa = pte & PG_FRAME; 3415 3416 m = PHYS_TO_VM_PAGE(pa); 3417 3418 /* 3419 * Modified by us 3420 */ 3421 if (pte & PG_M) 3422 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3423 else { 3424 /* 3425 * Modified by someone else 3426 */ 3427 vm_page_lock_queues(); 3428 if (m->dirty || pmap_is_modified(m)) 3429 val |= MINCORE_MODIFIED_OTHER; 3430 vm_page_unlock_queues(); 3431 } 3432 /* 3433 * Referenced by us 3434 */ 3435 if (pte & PG_A) 3436 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3437 else { 3438 /* 3439 * Referenced by someone else 3440 */ 3441 vm_page_lock_queues(); 3442 if ((m->flags & PG_REFERENCED) || 3443 pmap_ts_referenced(m)) { 3444 val |= MINCORE_REFERENCED_OTHER; 3445 vm_page_flag_set(m, PG_REFERENCED); 3446 } 3447 vm_page_unlock_queues(); 3448 } 3449 } 3450 return val; 3451} 3452 3453void 3454pmap_activate(struct thread *td) 3455{ 3456 struct proc *p = td->td_proc; 3457 pmap_t pmap; 3458 u_int32_t cr3; 3459 3460 critical_enter(); 3461 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3462#if defined(SMP) 3463 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 3464#else 3465 pmap->pm_active |= 1; 3466#endif 3467#ifdef PAE 3468 cr3 = vtophys(pmap->pm_pdpt); 3469#else 3470 cr3 = vtophys(pmap->pm_pdir); 3471#endif 3472 /* XXXKSE this is wrong. 3473 * pmap_activate is for the current thread on the current cpu 3474 */ 3475 if (p->p_flag & P_THREADED) { 3476 /* Make sure all other cr3 entries are updated. */ 3477 /* what if they are running? XXXKSE (maybe abort them) */ 3478 FOREACH_THREAD_IN_PROC(p, td) { 3479 td->td_pcb->pcb_cr3 = cr3; 3480 } 3481 } else { 3482 td->td_pcb->pcb_cr3 = cr3; 3483 } 3484 load_cr3(cr3); 3485#ifdef SWTCH_OPTIM_STATS 3486 tlb_flush_count++; 3487#endif 3488 critical_exit(); 3489} 3490 3491vm_offset_t 3492pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3493{ 3494 3495 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3496 return addr; 3497 } 3498 3499 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3500 return addr; 3501} 3502 3503 3504#if defined(PMAP_DEBUG) 3505pmap_pid_dump(int pid) 3506{ 3507 pmap_t pmap; 3508 struct proc *p; 3509 int npte = 0; 3510 int index; 3511 3512 sx_slock(&allproc_lock); 3513 LIST_FOREACH(p, &allproc, p_list) { 3514 if (p->p_pid != pid) 3515 continue; 3516 3517 if (p->p_vmspace) { 3518 int i,j; 3519 index = 0; 3520 pmap = vmspace_pmap(p->p_vmspace); 3521 for (i = 0; i < NPDEPTD; i++) { 3522 pd_entry_t *pde; 3523 pt_entry_t *pte; 3524 vm_offset_t base = i << PDRSHIFT; 3525 3526 pde = &pmap->pm_pdir[i]; 3527 if (pde && pmap_pde_v(pde)) { 3528 for (j = 0; j < NPTEPG; j++) { 3529 vm_offset_t va = base + (j << PAGE_SHIFT); 3530 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3531 if (index) { 3532 index = 0; 3533 printf("\n"); 3534 } 3535 sx_sunlock(&allproc_lock); 3536 return npte; 3537 } 3538 pte = pmap_pte_quick(pmap, va); 3539 if (pte && pmap_pte_v(pte)) { 3540 pt_entry_t pa; 3541 vm_page_t m; 3542 pa = *pte; 3543 m = PHYS_TO_VM_PAGE(pa); 3544 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3545 va, pa, m->hold_count, m->wire_count, m->flags); 3546 npte++; 3547 index++; 3548 if (index >= 2) { 3549 index = 0; 3550 printf("\n"); 3551 } else { 3552 printf(" "); 3553 } 3554 } 3555 } 3556 } 3557 } 3558 } 3559 } 3560 sx_sunlock(&allproc_lock); 3561 return npte; 3562} 3563#endif 3564 3565#if defined(DEBUG) 3566 3567static void pads(pmap_t pm); 3568void pmap_pvdump(vm_offset_t pa); 3569 3570/* print address space of pmap*/ 3571static void 3572pads(pm) 3573 pmap_t pm; 3574{ 3575 int i, j; 3576 vm_paddr_t va; 3577 pt_entry_t *ptep; 3578 3579 if (pm == kernel_pmap) 3580 return; 3581 for (i = 0; i < NPDEPTD; i++) 3582 if (pm->pm_pdir[i]) 3583 for (j = 0; j < NPTEPG; j++) { 3584 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3585 if (pm == kernel_pmap && va < KERNBASE) 3586 continue; 3587 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3588 continue; 3589 ptep = pmap_pte_quick(pm, va); 3590 if (pmap_pte_v(ptep)) 3591 printf("%x:%x ", va, *ptep); 3592 }; 3593 3594} 3595 3596void 3597pmap_pvdump(pa) 3598 vm_paddr_t pa; 3599{ 3600 pv_entry_t pv; 3601 vm_page_t m; 3602 3603 printf("pa %x", pa); 3604 m = PHYS_TO_VM_PAGE(pa); 3605 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3606 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3607 pads(pv->pv_pmap); 3608 } 3609 printf(" "); 3610} 3611#endif 3612