pmap.c revision 241287
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * Since the information managed by this module is 46 * also stored by the logical address mapping module, 47 * this module may throw away valid virtual-to-physical 48 * mappings at almost any time. However, invalidations 49 * of virtual-to-physical mappings must be done as 50 * requested. 51 * 52 * In order to cope with hardware architectures which 53 * make virtual-to-physical map invalidates expensive, 54 * this module may delay invalidate or reduced protection 55 * operations until such time as they are actually 56 * necessary. This module is given full information as 57 * to which processors are currently using which maps, 58 * and to when physical maps must be made correct. 59 */ 60 61#include <sys/cdefs.h> 62__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 241287 2012-10-06 19:33:52Z alc $"); 63 64#include "opt_ddb.h" 65#include "opt_pmap.h" 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/lock.h> 70#include <sys/mman.h> 71#include <sys/msgbuf.h> 72#include <sys/mutex.h> 73#include <sys/pcpu.h> 74#include <sys/proc.h> 75#include <sys/rwlock.h> 76#include <sys/sched.h> 77#ifdef SMP 78#include <sys/smp.h> 79#else 80#include <sys/cpuset.h> 81#endif 82#include <sys/sysctl.h> 83#include <sys/vmmeter.h> 84 85#ifdef DDB 86#include <ddb/ddb.h> 87#endif 88 89#include <vm/vm.h> 90#include <vm/vm_param.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_page.h> 93#include <vm/vm_map.h> 94#include <vm/vm_object.h> 95#include <vm/vm_extern.h> 96#include <vm/vm_pageout.h> 97#include <vm/vm_pager.h> 98#include <vm/uma.h> 99 100#include <machine/cache.h> 101#include <machine/md_var.h> 102#include <machine/tlb.h> 103 104#undef PMAP_DEBUG 105 106#if !defined(DIAGNOSTIC) 107#define PMAP_INLINE __inline 108#else 109#define PMAP_INLINE 110#endif 111 112#ifdef PV_STATS 113#define PV_STAT(x) do { x ; } while (0) 114#else 115#define PV_STAT(x) do { } while (0) 116#endif 117 118/* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) 122#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) 123#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) 124#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) 125 126#ifdef __mips_n64 127#define NUPDE (NPDEPG * NPDEPG) 128#define NUSERPGTBLS (NUPDE + NPDEPG) 129#else 130#define NUPDE (NPDEPG) 131#define NUSERPGTBLS (NUPDE) 132#endif 133 134#define is_kernel_pmap(x) ((x) == kernel_pmap) 135 136struct pmap kernel_pmap_store; 137pd_entry_t *kernel_segmap; 138 139vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 140vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 141 142static int nkpt; 143unsigned pmap_max_asid; /* max ASID supported by the system */ 144 145#define PMAP_ASID_RESERVED 0 146 147vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 148 149static void pmap_asid_alloc(pmap_t pmap); 150 151/* 152 * Isolate the global pv list lock from data and other locks to prevent false 153 * sharing within the cache. 154 */ 155static struct { 156 struct rwlock lock; 157 char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)]; 158} pvh_global __aligned(CACHE_LINE_SIZE); 159 160#define pvh_global_lock pvh_global.lock 161 162/* 163 * Data for the pv entry allocation mechanism 164 */ 165static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 166static int pv_entry_count; 167 168static void free_pv_chunk(struct pv_chunk *pc); 169static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 170static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 171static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 172static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 173static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 174 vm_offset_t va); 175static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 176 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 177static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 178 pd_entry_t pde); 179static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 180static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 181static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 182 vm_offset_t va, vm_page_t m); 183static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); 184static void pmap_invalidate_all(pmap_t pmap); 185static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); 186static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m); 187 188static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 189static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 190static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); 191static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot); 192 193static void pmap_invalidate_page_action(void *arg); 194static void pmap_invalidate_range_action(void *arg); 195static void pmap_update_page_action(void *arg); 196 197#ifndef __mips_n64 198/* 199 * This structure is for high memory (memory above 512Meg in 32 bit) support. 200 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to 201 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc. 202 * 203 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To 204 * access a highmem physical address on a CPU, we map the physical address to 205 * the reserved virtual address for the CPU in the kernel pagetable. This is 206 * done with interrupts disabled(although a spinlock and sched_pin would be 207 * sufficient). 208 */ 209struct local_sysmaps { 210 vm_offset_t base; 211 uint32_t saved_intr; 212 uint16_t valid1, valid2; 213}; 214static struct local_sysmaps sysmap_lmem[MAXCPU]; 215 216static __inline void 217pmap_alloc_lmem_map(void) 218{ 219 int i; 220 221 for (i = 0; i < MAXCPU; i++) { 222 sysmap_lmem[i].base = virtual_avail; 223 virtual_avail += PAGE_SIZE * 2; 224 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 225 } 226} 227 228static __inline vm_offset_t 229pmap_lmem_map1(vm_paddr_t phys) 230{ 231 struct local_sysmaps *sysm; 232 pt_entry_t *pte, npte; 233 vm_offset_t va; 234 uint32_t intr; 235 int cpu; 236 237 intr = intr_disable(); 238 cpu = PCPU_GET(cpuid); 239 sysm = &sysmap_lmem[cpu]; 240 sysm->saved_intr = intr; 241 va = sysm->base; 242 npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 243 pte = pmap_pte(kernel_pmap, va); 244 *pte = npte; 245 sysm->valid1 = 1; 246 return (va); 247} 248 249static __inline vm_offset_t 250pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 251{ 252 struct local_sysmaps *sysm; 253 pt_entry_t *pte, npte; 254 vm_offset_t va1, va2; 255 uint32_t intr; 256 int cpu; 257 258 intr = intr_disable(); 259 cpu = PCPU_GET(cpuid); 260 sysm = &sysmap_lmem[cpu]; 261 sysm->saved_intr = intr; 262 va1 = sysm->base; 263 va2 = sysm->base + PAGE_SIZE; 264 npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 265 pte = pmap_pte(kernel_pmap, va1); 266 *pte = npte; 267 npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 268 pte = pmap_pte(kernel_pmap, va2); 269 *pte = npte; 270 sysm->valid1 = 1; 271 sysm->valid2 = 1; 272 return (va1); 273} 274 275static __inline void 276pmap_lmem_unmap(void) 277{ 278 struct local_sysmaps *sysm; 279 pt_entry_t *pte; 280 int cpu; 281 282 cpu = PCPU_GET(cpuid); 283 sysm = &sysmap_lmem[cpu]; 284 pte = pmap_pte(kernel_pmap, sysm->base); 285 *pte = PTE_G; 286 tlb_invalidate_address(kernel_pmap, sysm->base); 287 sysm->valid1 = 0; 288 if (sysm->valid2) { 289 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); 290 *pte = PTE_G; 291 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); 292 sysm->valid2 = 0; 293 } 294 intr_restore(sysm->saved_intr); 295} 296#else /* __mips_n64 */ 297 298static __inline void 299pmap_alloc_lmem_map(void) 300{ 301} 302 303static __inline vm_offset_t 304pmap_lmem_map1(vm_paddr_t phys) 305{ 306 307 return (0); 308} 309 310static __inline vm_offset_t 311pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 312{ 313 314 return (0); 315} 316 317static __inline vm_offset_t 318pmap_lmem_unmap(void) 319{ 320 321 return (0); 322} 323#endif /* !__mips_n64 */ 324 325/* 326 * Page table entry lookup routines. 327 */ 328static __inline pd_entry_t * 329pmap_segmap(pmap_t pmap, vm_offset_t va) 330{ 331 332 return (&pmap->pm_segtab[pmap_seg_index(va)]); 333} 334 335#ifdef __mips_n64 336static __inline pd_entry_t * 337pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 338{ 339 pd_entry_t *pde; 340 341 pde = (pd_entry_t *)*pdpe; 342 return (&pde[pmap_pde_index(va)]); 343} 344 345static __inline pd_entry_t * 346pmap_pde(pmap_t pmap, vm_offset_t va) 347{ 348 pd_entry_t *pdpe; 349 350 pdpe = pmap_segmap(pmap, va); 351 if (*pdpe == NULL) 352 return (NULL); 353 354 return (pmap_pdpe_to_pde(pdpe, va)); 355} 356#else 357static __inline pd_entry_t * 358pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 359{ 360 361 return (pdpe); 362} 363 364static __inline 365pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va) 366{ 367 368 return (pmap_segmap(pmap, va)); 369} 370#endif 371 372static __inline pt_entry_t * 373pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 374{ 375 pt_entry_t *pte; 376 377 pte = (pt_entry_t *)*pde; 378 return (&pte[pmap_pte_index(va)]); 379} 380 381pt_entry_t * 382pmap_pte(pmap_t pmap, vm_offset_t va) 383{ 384 pd_entry_t *pde; 385 386 pde = pmap_pde(pmap, va); 387 if (pde == NULL || *pde == NULL) 388 return (NULL); 389 390 return (pmap_pde_to_pte(pde, va)); 391} 392 393vm_offset_t 394pmap_steal_memory(vm_size_t size) 395{ 396 vm_paddr_t bank_size, pa; 397 vm_offset_t va; 398 399 size = round_page(size); 400 bank_size = phys_avail[1] - phys_avail[0]; 401 while (size > bank_size) { 402 int i; 403 404 for (i = 0; phys_avail[i + 2]; i += 2) { 405 phys_avail[i] = phys_avail[i + 2]; 406 phys_avail[i + 1] = phys_avail[i + 3]; 407 } 408 phys_avail[i] = 0; 409 phys_avail[i + 1] = 0; 410 if (!phys_avail[0]) 411 panic("pmap_steal_memory: out of memory"); 412 bank_size = phys_avail[1] - phys_avail[0]; 413 } 414 415 pa = phys_avail[0]; 416 phys_avail[0] += size; 417 if (MIPS_DIRECT_MAPPABLE(pa) == 0) 418 panic("Out of memory below 512Meg?"); 419 va = MIPS_PHYS_TO_DIRECT(pa); 420 bzero((caddr_t)va, size); 421 return (va); 422} 423 424/* 425 * Bootstrap the system enough to run with virtual memory. This 426 * assumes that the phys_avail array has been initialized. 427 */ 428static void 429pmap_create_kernel_pagetable(void) 430{ 431 int i, j; 432 vm_offset_t ptaddr; 433 pt_entry_t *pte; 434#ifdef __mips_n64 435 pd_entry_t *pde; 436 vm_offset_t pdaddr; 437 int npt, npde; 438#endif 439 440 /* 441 * Allocate segment table for the kernel 442 */ 443 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 444 445 /* 446 * Allocate second level page tables for the kernel 447 */ 448#ifdef __mips_n64 449 npde = howmany(NKPT, NPDEPG); 450 pdaddr = pmap_steal_memory(PAGE_SIZE * npde); 451#endif 452 nkpt = NKPT; 453 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); 454 455 /* 456 * The R[4-7]?00 stores only one copy of the Global bit in the 457 * translation lookaside buffer for each 2 page entry. Thus invalid 458 * entrys must have the Global bit set so when Entry LO and Entry HI 459 * G bits are anded together they will produce a global bit to store 460 * in the tlb. 461 */ 462 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) 463 *pte = PTE_G; 464 465#ifdef __mips_n64 466 for (i = 0, npt = nkpt; npt > 0; i++) { 467 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); 468 pde = (pd_entry_t *)kernel_segmap[i]; 469 470 for (j = 0; j < NPDEPG && npt > 0; j++, npt--) 471 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); 472 } 473#else 474 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++) 475 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE)); 476#endif 477 478 PMAP_LOCK_INIT(kernel_pmap); 479 kernel_pmap->pm_segtab = kernel_segmap; 480 CPU_FILL(&kernel_pmap->pm_active); 481 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 482 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 483 kernel_pmap->pm_asid[0].gen = 0; 484 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE; 485} 486 487void 488pmap_bootstrap(void) 489{ 490 int i; 491 int need_local_mappings = 0; 492 493 /* Sort. */ 494again: 495 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 496 /* 497 * Keep the memory aligned on page boundary. 498 */ 499 phys_avail[i] = round_page(phys_avail[i]); 500 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 501 502 if (i < 2) 503 continue; 504 if (phys_avail[i - 2] > phys_avail[i]) { 505 vm_paddr_t ptemp[2]; 506 507 ptemp[0] = phys_avail[i + 0]; 508 ptemp[1] = phys_avail[i + 1]; 509 510 phys_avail[i + 0] = phys_avail[i - 2]; 511 phys_avail[i + 1] = phys_avail[i - 1]; 512 513 phys_avail[i - 2] = ptemp[0]; 514 phys_avail[i - 1] = ptemp[1]; 515 goto again; 516 } 517 } 518 519 /* 520 * In 32 bit, we may have memory which cannot be mapped directly. 521 * This memory will need temporary mapping before it can be 522 * accessed. 523 */ 524 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1)) 525 need_local_mappings = 1; 526 527 /* 528 * Copy the phys_avail[] array before we start stealing memory from it. 529 */ 530 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 531 physmem_desc[i] = phys_avail[i]; 532 physmem_desc[i + 1] = phys_avail[i + 1]; 533 } 534 535 Maxmem = atop(phys_avail[i - 1]); 536 537 if (bootverbose) { 538 printf("Physical memory chunk(s):\n"); 539 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 540 vm_paddr_t size; 541 542 size = phys_avail[i + 1] - phys_avail[i]; 543 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 544 (uintmax_t) phys_avail[i], 545 (uintmax_t) phys_avail[i + 1] - 1, 546 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 547 } 548 printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem)); 549 } 550 /* 551 * Steal the message buffer from the beginning of memory. 552 */ 553 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize); 554 msgbufinit(msgbufp, msgbufsize); 555 556 /* 557 * Steal thread0 kstack. 558 */ 559 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 560 561 virtual_avail = VM_MIN_KERNEL_ADDRESS; 562 virtual_end = VM_MAX_KERNEL_ADDRESS; 563 564#ifdef SMP 565 /* 566 * Steal some virtual address space to map the pcpu area. 567 */ 568 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 569 pcpup = (struct pcpu *)virtual_avail; 570 virtual_avail += PAGE_SIZE * 2; 571 572 /* 573 * Initialize the wired TLB entry mapping the pcpu region for 574 * the BSP at 'pcpup'. Up until this point we were operating 575 * with the 'pcpup' for the BSP pointing to a virtual address 576 * in KSEG0 so there was no need for a TLB mapping. 577 */ 578 mips_pcpu_tlb_init(PCPU_ADDR(0)); 579 580 if (bootverbose) 581 printf("pcpu is available at virtual address %p.\n", pcpup); 582#endif 583 584 if (need_local_mappings) 585 pmap_alloc_lmem_map(); 586 pmap_create_kernel_pagetable(); 587 pmap_max_asid = VMNUM_PIDS; 588 mips_wr_entryhi(0); 589 mips_wr_pagemask(0); 590 591 /* 592 * Initialize the global pv list lock. 593 */ 594 rw_init(&pvh_global_lock, "pmap pv global"); 595} 596 597/* 598 * Initialize a vm_page's machine-dependent fields. 599 */ 600void 601pmap_page_init(vm_page_t m) 602{ 603 604 TAILQ_INIT(&m->md.pv_list); 605 m->md.pv_flags = 0; 606} 607 608/* 609 * Initialize the pmap module. 610 * Called by vm_init, to initialize any structures that the pmap 611 * system needs to map virtual memory. 612 */ 613void 614pmap_init(void) 615{ 616} 617 618/*************************************************** 619 * Low level helper routines..... 620 ***************************************************/ 621 622#ifdef SMP 623static __inline void 624pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 625{ 626 int cpuid, cpu, self; 627 cpuset_t active_cpus; 628 629 sched_pin(); 630 if (is_kernel_pmap(pmap)) { 631 smp_rendezvous(NULL, fn, NULL, arg); 632 goto out; 633 } 634 /* Force ASID update on inactive CPUs */ 635 CPU_FOREACH(cpu) { 636 if (!CPU_ISSET(cpu, &pmap->pm_active)) 637 pmap->pm_asid[cpu].gen = 0; 638 } 639 cpuid = PCPU_GET(cpuid); 640 /* 641 * XXX: barrier/locking for active? 642 * 643 * Take a snapshot of active here, any further changes are ignored. 644 * tlb update/invalidate should be harmless on inactive CPUs 645 */ 646 active_cpus = pmap->pm_active; 647 self = CPU_ISSET(cpuid, &active_cpus); 648 CPU_CLR(cpuid, &active_cpus); 649 /* Optimize for the case where this cpu is the only active one */ 650 if (CPU_EMPTY(&active_cpus)) { 651 if (self) 652 fn(arg); 653 } else { 654 if (self) 655 CPU_SET(cpuid, &active_cpus); 656 smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg); 657 } 658out: 659 sched_unpin(); 660} 661#else /* !SMP */ 662static __inline void 663pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 664{ 665 int cpuid; 666 667 if (is_kernel_pmap(pmap)) { 668 fn(arg); 669 return; 670 } 671 cpuid = PCPU_GET(cpuid); 672 if (!CPU_ISSET(cpuid, &pmap->pm_active)) 673 pmap->pm_asid[cpuid].gen = 0; 674 else 675 fn(arg); 676} 677#endif /* SMP */ 678 679static void 680pmap_invalidate_all(pmap_t pmap) 681{ 682 683 pmap_call_on_active_cpus(pmap, 684 (void (*)(void *))tlb_invalidate_all_user, pmap); 685} 686 687struct pmap_invalidate_page_arg { 688 pmap_t pmap; 689 vm_offset_t va; 690}; 691 692static void 693pmap_invalidate_page_action(void *arg) 694{ 695 struct pmap_invalidate_page_arg *p = arg; 696 697 tlb_invalidate_address(p->pmap, p->va); 698} 699 700static void 701pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 702{ 703 struct pmap_invalidate_page_arg arg; 704 705 arg.pmap = pmap; 706 arg.va = va; 707 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg); 708} 709 710struct pmap_invalidate_range_arg { 711 pmap_t pmap; 712 vm_offset_t sva; 713 vm_offset_t eva; 714}; 715 716static void 717pmap_invalidate_range_action(void *arg) 718{ 719 struct pmap_invalidate_range_arg *p = arg; 720 721 tlb_invalidate_range(p->pmap, p->sva, p->eva); 722} 723 724static void 725pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 726{ 727 struct pmap_invalidate_range_arg arg; 728 729 arg.pmap = pmap; 730 arg.sva = sva; 731 arg.eva = eva; 732 pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg); 733} 734 735struct pmap_update_page_arg { 736 pmap_t pmap; 737 vm_offset_t va; 738 pt_entry_t pte; 739}; 740 741static void 742pmap_update_page_action(void *arg) 743{ 744 struct pmap_update_page_arg *p = arg; 745 746 tlb_update(p->pmap, p->va, p->pte); 747} 748 749static void 750pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 751{ 752 struct pmap_update_page_arg arg; 753 754 arg.pmap = pmap; 755 arg.va = va; 756 arg.pte = pte; 757 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg); 758} 759 760/* 761 * Routine: pmap_extract 762 * Function: 763 * Extract the physical page address associated 764 * with the given map/virtual_address pair. 765 */ 766vm_paddr_t 767pmap_extract(pmap_t pmap, vm_offset_t va) 768{ 769 pt_entry_t *pte; 770 vm_offset_t retval = 0; 771 772 PMAP_LOCK(pmap); 773 pte = pmap_pte(pmap, va); 774 if (pte) { 775 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); 776 } 777 PMAP_UNLOCK(pmap); 778 return (retval); 779} 780 781/* 782 * Routine: pmap_extract_and_hold 783 * Function: 784 * Atomically extract and hold the physical page 785 * with the given pmap and virtual address pair 786 * if that mapping permits the given protection. 787 */ 788vm_page_t 789pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 790{ 791 pt_entry_t pte, *ptep; 792 vm_paddr_t pa, pte_pa; 793 vm_page_t m; 794 795 m = NULL; 796 pa = 0; 797 PMAP_LOCK(pmap); 798retry: 799 ptep = pmap_pte(pmap, va); 800 if (ptep != NULL) { 801 pte = *ptep; 802 if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || 803 (prot & VM_PROT_WRITE) == 0)) { 804 pte_pa = TLBLO_PTE_TO_PA(pte); 805 if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) 806 goto retry; 807 m = PHYS_TO_VM_PAGE(pte_pa); 808 vm_page_hold(m); 809 } 810 } 811 PA_UNLOCK_COND(pa); 812 PMAP_UNLOCK(pmap); 813 return (m); 814} 815 816/*************************************************** 817 * Low level mapping routines..... 818 ***************************************************/ 819 820/* 821 * add a wired page to the kva 822 */ 823void 824pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr) 825{ 826 pt_entry_t *pte; 827 pt_entry_t opte, npte; 828 829#ifdef PMAP_DEBUG 830 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 831#endif 832 833 pte = pmap_pte(kernel_pmap, va); 834 opte = *pte; 835 npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G; 836 *pte = npte; 837 if (pte_test(&opte, PTE_V) && opte != npte) 838 pmap_update_page(kernel_pmap, va, npte); 839} 840 841void 842pmap_kenter(vm_offset_t va, vm_paddr_t pa) 843{ 844 845 KASSERT(is_cacheable_mem(pa), 846 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa)); 847 848 pmap_kenter_attr(va, pa, PTE_C_CACHE); 849} 850 851/* 852 * remove a page from the kernel pagetables 853 */ 854 /* PMAP_INLINE */ void 855pmap_kremove(vm_offset_t va) 856{ 857 pt_entry_t *pte; 858 859 /* 860 * Write back all caches from the page being destroyed 861 */ 862 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 863 864 pte = pmap_pte(kernel_pmap, va); 865 *pte = PTE_G; 866 pmap_invalidate_page(kernel_pmap, va); 867} 868 869/* 870 * Used to map a range of physical addresses into kernel 871 * virtual address space. 872 * 873 * The value passed in '*virt' is a suggested virtual address for 874 * the mapping. Architectures which can support a direct-mapped 875 * physical to virtual region can return the appropriate address 876 * within that region, leaving '*virt' unchanged. Other 877 * architectures should map the pages starting at '*virt' and 878 * update '*virt' with the first usable address after the mapped 879 * region. 880 * 881 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 882 */ 883vm_offset_t 884pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 885{ 886 vm_offset_t va, sva; 887 888 if (MIPS_DIRECT_MAPPABLE(end - 1)) 889 return (MIPS_PHYS_TO_DIRECT(start)); 890 891 va = sva = *virt; 892 while (start < end) { 893 pmap_kenter(va, start); 894 va += PAGE_SIZE; 895 start += PAGE_SIZE; 896 } 897 *virt = va; 898 return (sva); 899} 900 901/* 902 * Add a list of wired pages to the kva 903 * this routine is only used for temporary 904 * kernel mappings that do not need to have 905 * page modification or references recorded. 906 * Note that old mappings are simply written 907 * over. The page *must* be wired. 908 */ 909void 910pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 911{ 912 int i; 913 vm_offset_t origva = va; 914 915 for (i = 0; i < count; i++) { 916 pmap_flush_pvcache(m[i]); 917 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 918 va += PAGE_SIZE; 919 } 920 921 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 922} 923 924/* 925 * this routine jerks page mappings from the 926 * kernel -- it is meant only for temporary mappings. 927 */ 928void 929pmap_qremove(vm_offset_t va, int count) 930{ 931 pt_entry_t *pte; 932 vm_offset_t origva; 933 934 if (count < 1) 935 return; 936 mips_dcache_wbinv_range_index(va, PAGE_SIZE * count); 937 origva = va; 938 do { 939 pte = pmap_pte(kernel_pmap, va); 940 *pte = PTE_G; 941 va += PAGE_SIZE; 942 } while (--count > 0); 943 pmap_invalidate_range(kernel_pmap, origva, va); 944} 945 946/*************************************************** 947 * Page table page management routines..... 948 ***************************************************/ 949 950/* 951 * Decrements a page table page's wire count, which is used to record the 952 * number of valid page table entries within the page. If the wire count 953 * drops to zero, then the page table page is unmapped. Returns TRUE if the 954 * page table page was unmapped and FALSE otherwise. 955 */ 956static PMAP_INLINE boolean_t 957pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 958{ 959 960 --m->wire_count; 961 if (m->wire_count == 0) { 962 _pmap_unwire_ptp(pmap, va, m); 963 return (TRUE); 964 } else 965 return (FALSE); 966} 967 968static void 969_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 970{ 971 pd_entry_t *pde; 972 973 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 974 /* 975 * unmap the page table page 976 */ 977#ifdef __mips_n64 978 if (m->pindex < NUPDE) 979 pde = pmap_pde(pmap, va); 980 else 981 pde = pmap_segmap(pmap, va); 982#else 983 pde = pmap_pde(pmap, va); 984#endif 985 *pde = 0; 986 pmap->pm_stats.resident_count--; 987 988#ifdef __mips_n64 989 if (m->pindex < NUPDE) { 990 pd_entry_t *pdp; 991 vm_page_t pdpg; 992 993 /* 994 * Recursively decrement next level pagetable refcount 995 */ 996 pdp = (pd_entry_t *)*pmap_segmap(pmap, va); 997 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); 998 pmap_unwire_ptp(pmap, va, pdpg); 999 } 1000#endif 1001 1002 /* 1003 * If the page is finally unwired, simply free it. 1004 */ 1005 vm_page_free_zero(m); 1006 atomic_subtract_int(&cnt.v_wire_count, 1); 1007} 1008 1009/* 1010 * After removing a page table entry, this routine is used to 1011 * conditionally free the page, and manage the hold/wire counts. 1012 */ 1013static int 1014pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde) 1015{ 1016 vm_page_t mpte; 1017 1018 if (va >= VM_MAXUSER_ADDRESS) 1019 return (0); 1020 KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0")); 1021 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde)); 1022 return (pmap_unwire_ptp(pmap, va, mpte)); 1023} 1024 1025void 1026pmap_pinit0(pmap_t pmap) 1027{ 1028 int i; 1029 1030 PMAP_LOCK_INIT(pmap); 1031 pmap->pm_segtab = kernel_segmap; 1032 CPU_ZERO(&pmap->pm_active); 1033 for (i = 0; i < MAXCPU; i++) { 1034 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1035 pmap->pm_asid[i].gen = 0; 1036 } 1037 PCPU_SET(curpmap, pmap); 1038 TAILQ_INIT(&pmap->pm_pvchunk); 1039 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1040} 1041 1042void 1043pmap_grow_direct_page_cache() 1044{ 1045 1046#ifdef __mips_n64 1047 vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS); 1048#else 1049 vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); 1050#endif 1051} 1052 1053vm_page_t 1054pmap_alloc_direct_page(unsigned int index, int req) 1055{ 1056 vm_page_t m; 1057 1058 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | 1059 VM_ALLOC_ZERO); 1060 if (m == NULL) 1061 return (NULL); 1062 1063 if ((m->flags & PG_ZERO) == 0) 1064 pmap_zero_page(m); 1065 1066 m->pindex = index; 1067 return (m); 1068} 1069 1070/* 1071 * Initialize a preallocated and zeroed pmap structure, 1072 * such as one in a vmspace structure. 1073 */ 1074int 1075pmap_pinit(pmap_t pmap) 1076{ 1077 vm_offset_t ptdva; 1078 vm_page_t ptdpg; 1079 int i; 1080 1081 PMAP_LOCK_INIT(pmap); 1082 1083 /* 1084 * allocate the page directory page 1085 */ 1086 while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) 1087 pmap_grow_direct_page_cache(); 1088 1089 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); 1090 pmap->pm_segtab = (pd_entry_t *)ptdva; 1091 CPU_ZERO(&pmap->pm_active); 1092 for (i = 0; i < MAXCPU; i++) { 1093 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1094 pmap->pm_asid[i].gen = 0; 1095 } 1096 TAILQ_INIT(&pmap->pm_pvchunk); 1097 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1098 1099 return (1); 1100} 1101 1102/* 1103 * this routine is called if the page table page is not 1104 * mapped correctly. 1105 */ 1106static vm_page_t 1107_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1108{ 1109 vm_offset_t pageva; 1110 vm_page_t m; 1111 1112 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1113 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1114 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1115 1116 /* 1117 * Find or fabricate a new pagetable page 1118 */ 1119 if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { 1120 if (flags & M_WAITOK) { 1121 PMAP_UNLOCK(pmap); 1122 rw_wunlock(&pvh_global_lock); 1123 pmap_grow_direct_page_cache(); 1124 rw_wlock(&pvh_global_lock); 1125 PMAP_LOCK(pmap); 1126 } 1127 1128 /* 1129 * Indicate the need to retry. While waiting, the page 1130 * table page may have been allocated. 1131 */ 1132 return (NULL); 1133 } 1134 1135 /* 1136 * Map the pagetable page into the process address space, if it 1137 * isn't already there. 1138 */ 1139 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1140 1141#ifdef __mips_n64 1142 if (ptepindex >= NUPDE) { 1143 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; 1144 } else { 1145 pd_entry_t *pdep, *pde; 1146 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); 1147 int pdeindex = ptepindex & (NPDEPG - 1); 1148 vm_page_t pg; 1149 1150 pdep = &pmap->pm_segtab[segindex]; 1151 if (*pdep == NULL) { 1152 /* recurse for allocating page dir */ 1153 if (_pmap_allocpte(pmap, NUPDE + segindex, 1154 flags) == NULL) { 1155 /* alloc failed, release current */ 1156 --m->wire_count; 1157 atomic_subtract_int(&cnt.v_wire_count, 1); 1158 vm_page_free_zero(m); 1159 return (NULL); 1160 } 1161 } else { 1162 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep)); 1163 pg->wire_count++; 1164 } 1165 /* Next level entry */ 1166 pde = (pd_entry_t *)*pdep; 1167 pde[pdeindex] = (pd_entry_t)pageva; 1168 } 1169#else 1170 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva; 1171#endif 1172 pmap->pm_stats.resident_count++; 1173 return (m); 1174} 1175 1176static vm_page_t 1177pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1178{ 1179 unsigned ptepindex; 1180 pd_entry_t *pde; 1181 vm_page_t m; 1182 1183 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1184 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1185 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1186 1187 /* 1188 * Calculate pagetable page index 1189 */ 1190 ptepindex = pmap_pde_pindex(va); 1191retry: 1192 /* 1193 * Get the page directory entry 1194 */ 1195 pde = pmap_pde(pmap, va); 1196 1197 /* 1198 * If the page table page is mapped, we just increment the hold 1199 * count, and activate it. 1200 */ 1201 if (pde != NULL && *pde != NULL) { 1202 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde)); 1203 m->wire_count++; 1204 } else { 1205 /* 1206 * Here if the pte page isn't mapped, or if it has been 1207 * deallocated. 1208 */ 1209 m = _pmap_allocpte(pmap, ptepindex, flags); 1210 if (m == NULL && (flags & M_WAITOK)) 1211 goto retry; 1212 } 1213 return (m); 1214} 1215 1216 1217/*************************************************** 1218 * Pmap allocation/deallocation routines. 1219 ***************************************************/ 1220 1221/* 1222 * Release any resources held by the given physical map. 1223 * Called when a pmap initialized by pmap_pinit is being released. 1224 * Should only be called if the map contains no valid mappings. 1225 */ 1226void 1227pmap_release(pmap_t pmap) 1228{ 1229 vm_offset_t ptdva; 1230 vm_page_t ptdpg; 1231 1232 KASSERT(pmap->pm_stats.resident_count == 0, 1233 ("pmap_release: pmap resident count %ld != 0", 1234 pmap->pm_stats.resident_count)); 1235 1236 ptdva = (vm_offset_t)pmap->pm_segtab; 1237 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); 1238 1239 ptdpg->wire_count--; 1240 atomic_subtract_int(&cnt.v_wire_count, 1); 1241 vm_page_free_zero(ptdpg); 1242 PMAP_LOCK_DESTROY(pmap); 1243} 1244 1245/* 1246 * grow the number of kernel page table entries, if needed 1247 */ 1248void 1249pmap_growkernel(vm_offset_t addr) 1250{ 1251 vm_page_t nkpg; 1252 pd_entry_t *pde, *pdpe; 1253 pt_entry_t *pte; 1254 int i; 1255 1256 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1257 addr = roundup2(addr, NBSEG); 1258 if (addr - 1 >= kernel_map->max_offset) 1259 addr = kernel_map->max_offset; 1260 while (kernel_vm_end < addr) { 1261 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); 1262#ifdef __mips_n64 1263 if (*pdpe == 0) { 1264 /* new intermediate page table entry */ 1265 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1266 if (nkpg == NULL) 1267 panic("pmap_growkernel: no memory to grow kernel"); 1268 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1269 continue; /* try again */ 1270 } 1271#endif 1272 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); 1273 if (*pde != 0) { 1274 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1275 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1276 kernel_vm_end = kernel_map->max_offset; 1277 break; 1278 } 1279 continue; 1280 } 1281 1282 /* 1283 * This index is bogus, but out of the way 1284 */ 1285 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1286 if (!nkpg) 1287 panic("pmap_growkernel: no memory to grow kernel"); 1288 nkpt++; 1289 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1290 1291 /* 1292 * The R[4-7]?00 stores only one copy of the Global bit in 1293 * the translation lookaside buffer for each 2 page entry. 1294 * Thus invalid entrys must have the Global bit set so when 1295 * Entry LO and Entry HI G bits are anded together they will 1296 * produce a global bit to store in the tlb. 1297 */ 1298 pte = (pt_entry_t *)*pde; 1299 for (i = 0; i < NPTEPG; i++) 1300 pte[i] = PTE_G; 1301 1302 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1303 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1304 kernel_vm_end = kernel_map->max_offset; 1305 break; 1306 } 1307 } 1308} 1309 1310/*************************************************** 1311 * page management routines. 1312 ***************************************************/ 1313 1314CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1315#ifdef __mips_n64 1316CTASSERT(_NPCM == 3); 1317CTASSERT(_NPCPV == 168); 1318#else 1319CTASSERT(_NPCM == 11); 1320CTASSERT(_NPCPV == 336); 1321#endif 1322 1323static __inline struct pv_chunk * 1324pv_to_chunk(pv_entry_t pv) 1325{ 1326 1327 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1328} 1329 1330#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1331 1332#ifdef __mips_n64 1333#define PC_FREE0_1 0xfffffffffffffffful 1334#define PC_FREE2 0x000000fffffffffful 1335#else 1336#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1337#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1338#endif 1339 1340static const u_long pc_freemask[_NPCM] = { 1341#ifdef __mips_n64 1342 PC_FREE0_1, PC_FREE0_1, PC_FREE2 1343#else 1344 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1345 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1346 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1347 PC_FREE0_9, PC_FREE10 1348#endif 1349}; 1350 1351static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 1352 1353SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1354 "Current number of pv entries"); 1355 1356#ifdef PV_STATS 1357static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1358 1359SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1360 "Current number of pv entry chunks"); 1361SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1362 "Current number of pv entry chunks allocated"); 1363SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1364 "Current number of pv entry chunks frees"); 1365SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1366 "Number of times tried to get a chunk page but failed."); 1367 1368static long pv_entry_frees, pv_entry_allocs; 1369static int pv_entry_spare; 1370 1371SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1372 "Current number of pv entry frees"); 1373SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1374 "Current number of pv entry allocs"); 1375SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1376 "Current number of spare pv entries"); 1377#endif 1378 1379/* 1380 * We are in a serious low memory condition. Resort to 1381 * drastic measures to free some pages so we can allocate 1382 * another pv entry chunk. 1383 */ 1384static vm_page_t 1385pmap_pv_reclaim(pmap_t locked_pmap) 1386{ 1387 struct pch newtail; 1388 struct pv_chunk *pc; 1389 pd_entry_t *pde; 1390 pmap_t pmap; 1391 pt_entry_t *pte, oldpte; 1392 pv_entry_t pv; 1393 vm_offset_t va; 1394 vm_page_t m, m_pc; 1395 u_long inuse; 1396 int bit, field, freed, idx; 1397 1398 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1399 pmap = NULL; 1400 m_pc = NULL; 1401 TAILQ_INIT(&newtail); 1402 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) { 1403 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1404 if (pmap != pc->pc_pmap) { 1405 if (pmap != NULL) { 1406 pmap_invalidate_all(pmap); 1407 if (pmap != locked_pmap) 1408 PMAP_UNLOCK(pmap); 1409 } 1410 pmap = pc->pc_pmap; 1411 /* Avoid deadlock and lock recursion. */ 1412 if (pmap > locked_pmap) 1413 PMAP_LOCK(pmap); 1414 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 1415 pmap = NULL; 1416 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1417 continue; 1418 } 1419 } 1420 1421 /* 1422 * Destroy every non-wired, 4 KB page mapping in the chunk. 1423 */ 1424 freed = 0; 1425 for (field = 0; field < _NPCM; field++) { 1426 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1427 inuse != 0; inuse &= ~(1UL << bit)) { 1428 bit = ffsl(inuse) - 1; 1429 idx = field * sizeof(inuse) * NBBY + bit; 1430 pv = &pc->pc_pventry[idx]; 1431 va = pv->pv_va; 1432 pde = pmap_pde(pmap, va); 1433 KASSERT(pde != NULL && *pde != 0, 1434 ("pmap_pv_reclaim: pde")); 1435 pte = pmap_pde_to_pte(pde, va); 1436 oldpte = *pte; 1437 KASSERT(!pte_test(&oldpte, PTE_W), 1438 ("wired pte for unwired page")); 1439 if (is_kernel_pmap(pmap)) 1440 *pte = PTE_G; 1441 else 1442 *pte = 0; 1443 pmap_invalidate_page(pmap, va); 1444 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte)); 1445 if (pte_test(&oldpte, PTE_D)) 1446 vm_page_dirty(m); 1447 if (m->md.pv_flags & PV_TABLE_REF) 1448 vm_page_aflag_set(m, PGA_REFERENCED); 1449 m->md.pv_flags &= ~PV_TABLE_REF; 1450 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1451 if (TAILQ_EMPTY(&m->md.pv_list)) 1452 vm_page_aflag_clear(m, PGA_WRITEABLE); 1453 pc->pc_map[field] |= 1UL << bit; 1454 pmap_unuse_pt(pmap, va, *pde); 1455 freed++; 1456 } 1457 } 1458 if (freed == 0) { 1459 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1460 continue; 1461 } 1462 /* Every freed mapping is for a 4 KB page. */ 1463 pmap->pm_stats.resident_count -= freed; 1464 PV_STAT(pv_entry_frees += freed); 1465 PV_STAT(pv_entry_spare += freed); 1466 pv_entry_count -= freed; 1467 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1468 for (field = 0; field < _NPCM; field++) 1469 if (pc->pc_map[field] != pc_freemask[field]) { 1470 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1471 pc_list); 1472 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1473 1474 /* 1475 * One freed pv entry in locked_pmap is 1476 * sufficient. 1477 */ 1478 if (pmap == locked_pmap) 1479 goto out; 1480 break; 1481 } 1482 if (field == _NPCM) { 1483 PV_STAT(pv_entry_spare -= _NPCPV); 1484 PV_STAT(pc_chunk_count--); 1485 PV_STAT(pc_chunk_frees++); 1486 /* Entire chunk is free; return it. */ 1487 m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS( 1488 (vm_offset_t)pc)); 1489 break; 1490 } 1491 } 1492out: 1493 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 1494 if (pmap != NULL) { 1495 pmap_invalidate_all(pmap); 1496 if (pmap != locked_pmap) 1497 PMAP_UNLOCK(pmap); 1498 } 1499 return (m_pc); 1500} 1501 1502/* 1503 * free the pv_entry back to the free list 1504 */ 1505static void 1506free_pv_entry(pmap_t pmap, pv_entry_t pv) 1507{ 1508 struct pv_chunk *pc; 1509 int bit, field, idx; 1510 1511 rw_assert(&pvh_global_lock, RA_WLOCKED); 1512 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1513 PV_STAT(pv_entry_frees++); 1514 PV_STAT(pv_entry_spare++); 1515 pv_entry_count--; 1516 pc = pv_to_chunk(pv); 1517 idx = pv - &pc->pc_pventry[0]; 1518 field = idx / (sizeof(u_long) * NBBY); 1519 bit = idx % (sizeof(u_long) * NBBY); 1520 pc->pc_map[field] |= 1ul << bit; 1521 for (idx = 0; idx < _NPCM; idx++) 1522 if (pc->pc_map[idx] != pc_freemask[idx]) { 1523 /* 1524 * 98% of the time, pc is already at the head of the 1525 * list. If it isn't already, move it to the head. 1526 */ 1527 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 1528 pc)) { 1529 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1530 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1531 pc_list); 1532 } 1533 return; 1534 } 1535 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1536 free_pv_chunk(pc); 1537} 1538 1539static void 1540free_pv_chunk(struct pv_chunk *pc) 1541{ 1542 vm_page_t m; 1543 1544 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1545 PV_STAT(pv_entry_spare -= _NPCPV); 1546 PV_STAT(pc_chunk_count--); 1547 PV_STAT(pc_chunk_frees++); 1548 /* entire chunk is free, return it */ 1549 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc)); 1550 vm_page_unwire(m, 0); 1551 vm_page_free(m); 1552} 1553 1554/* 1555 * get a new pv_entry, allocating a block from the system 1556 * when needed. 1557 */ 1558static pv_entry_t 1559get_pv_entry(pmap_t pmap, boolean_t try) 1560{ 1561 struct pv_chunk *pc; 1562 pv_entry_t pv; 1563 vm_page_t m; 1564 int bit, field, idx; 1565 1566 rw_assert(&pvh_global_lock, RA_WLOCKED); 1567 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1568 PV_STAT(pv_entry_allocs++); 1569 pv_entry_count++; 1570retry: 1571 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1572 if (pc != NULL) { 1573 for (field = 0; field < _NPCM; field++) { 1574 if (pc->pc_map[field]) { 1575 bit = ffsl(pc->pc_map[field]) - 1; 1576 break; 1577 } 1578 } 1579 if (field < _NPCM) { 1580 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 1581 pv = &pc->pc_pventry[idx]; 1582 pc->pc_map[field] &= ~(1ul << bit); 1583 /* If this was the last item, move it to tail */ 1584 for (field = 0; field < _NPCM; field++) 1585 if (pc->pc_map[field] != 0) { 1586 PV_STAT(pv_entry_spare--); 1587 return (pv); /* not full, return */ 1588 } 1589 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1590 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 1591 PV_STAT(pv_entry_spare--); 1592 return (pv); 1593 } 1594 } 1595 /* No free items, allocate another chunk */ 1596 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL | 1597 VM_ALLOC_WIRED); 1598 if (m == NULL) { 1599 if (try) { 1600 pv_entry_count--; 1601 PV_STAT(pc_chunk_tryfail++); 1602 return (NULL); 1603 } 1604 m = pmap_pv_reclaim(pmap); 1605 if (m == NULL) 1606 goto retry; 1607 } 1608 PV_STAT(pc_chunk_count++); 1609 PV_STAT(pc_chunk_allocs++); 1610 pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1611 pc->pc_pmap = pmap; 1612 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 1613 for (field = 1; field < _NPCM; field++) 1614 pc->pc_map[field] = pc_freemask[field]; 1615 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1616 pv = &pc->pc_pventry[0]; 1617 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1618 PV_STAT(pv_entry_spare += _NPCPV - 1); 1619 return (pv); 1620} 1621 1622static pv_entry_t 1623pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1624{ 1625 pv_entry_t pv; 1626 1627 rw_assert(&pvh_global_lock, RA_WLOCKED); 1628 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 1629 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1630 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 1631 break; 1632 } 1633 } 1634 return (pv); 1635} 1636 1637static void 1638pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1639{ 1640 pv_entry_t pv; 1641 1642 pv = pmap_pvh_remove(pvh, pmap, va); 1643 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", 1644 (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)), 1645 (u_long)va)); 1646 free_pv_entry(pmap, pv); 1647} 1648 1649static void 1650pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1651{ 1652 1653 rw_assert(&pvh_global_lock, RA_WLOCKED); 1654 pmap_pvh_free(&m->md, pmap, va); 1655 if (TAILQ_EMPTY(&m->md.pv_list)) 1656 vm_page_aflag_clear(m, PGA_WRITEABLE); 1657} 1658 1659/* 1660 * Conditionally create a pv entry. 1661 */ 1662static boolean_t 1663pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1664 vm_page_t m) 1665{ 1666 pv_entry_t pv; 1667 1668 rw_assert(&pvh_global_lock, RA_WLOCKED); 1669 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1670 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { 1671 pv->pv_va = va; 1672 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1673 return (TRUE); 1674 } else 1675 return (FALSE); 1676} 1677 1678/* 1679 * pmap_remove_pte: do the things to unmap a page in a process 1680 */ 1681static int 1682pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 1683 pd_entry_t pde) 1684{ 1685 pt_entry_t oldpte; 1686 vm_page_t m; 1687 vm_paddr_t pa; 1688 1689 rw_assert(&pvh_global_lock, RA_WLOCKED); 1690 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1691 1692 /* 1693 * Write back all cache lines from the page being unmapped. 1694 */ 1695 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1696 1697 oldpte = *ptq; 1698 if (is_kernel_pmap(pmap)) 1699 *ptq = PTE_G; 1700 else 1701 *ptq = 0; 1702 1703 if (pte_test(&oldpte, PTE_W)) 1704 pmap->pm_stats.wired_count -= 1; 1705 1706 pmap->pm_stats.resident_count -= 1; 1707 1708 if (pte_test(&oldpte, PTE_MANAGED)) { 1709 pa = TLBLO_PTE_TO_PA(oldpte); 1710 m = PHYS_TO_VM_PAGE(pa); 1711 if (pte_test(&oldpte, PTE_D)) { 1712 KASSERT(!pte_test(&oldpte, PTE_RO), 1713 ("%s: modified page not writable: va: %p, pte: %#jx", 1714 __func__, (void *)va, (uintmax_t)oldpte)); 1715 vm_page_dirty(m); 1716 } 1717 if (m->md.pv_flags & PV_TABLE_REF) 1718 vm_page_aflag_set(m, PGA_REFERENCED); 1719 m->md.pv_flags &= ~PV_TABLE_REF; 1720 1721 pmap_remove_entry(pmap, m, va); 1722 } 1723 return (pmap_unuse_pt(pmap, va, pde)); 1724} 1725 1726/* 1727 * Remove a single page from a process address space 1728 */ 1729static void 1730pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1731{ 1732 pd_entry_t *pde; 1733 pt_entry_t *ptq; 1734 1735 rw_assert(&pvh_global_lock, RA_WLOCKED); 1736 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1737 pde = pmap_pde(pmap, va); 1738 if (pde == NULL || *pde == 0) 1739 return; 1740 ptq = pmap_pde_to_pte(pde, va); 1741 1742 /* 1743 * If there is no pte for this address, just skip it! 1744 */ 1745 if (!pte_test(ptq, PTE_V)) 1746 return; 1747 1748 (void)pmap_remove_pte(pmap, ptq, va, *pde); 1749 pmap_invalidate_page(pmap, va); 1750} 1751 1752/* 1753 * Remove the given range of addresses from the specified map. 1754 * 1755 * It is assumed that the start and end are properly 1756 * rounded to the page size. 1757 */ 1758void 1759pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1760{ 1761 pd_entry_t *pde, *pdpe; 1762 pt_entry_t *pte; 1763 vm_offset_t va, va_next; 1764 1765 /* 1766 * Perform an unsynchronized read. This is, however, safe. 1767 */ 1768 if (pmap->pm_stats.resident_count == 0) 1769 return; 1770 1771 rw_wlock(&pvh_global_lock); 1772 PMAP_LOCK(pmap); 1773 1774 /* 1775 * special handling of removing one page. a very common operation 1776 * and easy to short circuit some code. 1777 */ 1778 if ((sva + PAGE_SIZE) == eva) { 1779 pmap_remove_page(pmap, sva); 1780 goto out; 1781 } 1782 for (; sva < eva; sva = va_next) { 1783 pdpe = pmap_segmap(pmap, sva); 1784#ifdef __mips_n64 1785 if (*pdpe == 0) { 1786 va_next = (sva + NBSEG) & ~SEGMASK; 1787 if (va_next < sva) 1788 va_next = eva; 1789 continue; 1790 } 1791#endif 1792 va_next = (sva + NBPDR) & ~PDRMASK; 1793 if (va_next < sva) 1794 va_next = eva; 1795 1796 pde = pmap_pdpe_to_pde(pdpe, sva); 1797 if (*pde == NULL) 1798 continue; 1799 1800 /* 1801 * Limit our scan to either the end of the va represented 1802 * by the current page table page, or to the end of the 1803 * range being removed. 1804 */ 1805 if (va_next > eva) 1806 va_next = eva; 1807 1808 va = va_next; 1809 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1810 sva += PAGE_SIZE) { 1811 if (!pte_test(pte, PTE_V)) { 1812 if (va != va_next) { 1813 pmap_invalidate_range(pmap, va, sva); 1814 va = va_next; 1815 } 1816 continue; 1817 } 1818 if (va == va_next) 1819 va = sva; 1820 if (pmap_remove_pte(pmap, pte, sva, *pde)) { 1821 sva += PAGE_SIZE; 1822 break; 1823 } 1824 } 1825 if (va != va_next) 1826 pmap_invalidate_range(pmap, va, sva); 1827 } 1828out: 1829 rw_wunlock(&pvh_global_lock); 1830 PMAP_UNLOCK(pmap); 1831} 1832 1833/* 1834 * Routine: pmap_remove_all 1835 * Function: 1836 * Removes this physical page from 1837 * all physical maps in which it resides. 1838 * Reflects back modify bits to the pager. 1839 * 1840 * Notes: 1841 * Original versions of this routine were very 1842 * inefficient because they iteratively called 1843 * pmap_remove (slow...) 1844 */ 1845 1846void 1847pmap_remove_all(vm_page_t m) 1848{ 1849 pv_entry_t pv; 1850 pmap_t pmap; 1851 pd_entry_t *pde; 1852 pt_entry_t *pte, tpte; 1853 1854 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1855 ("pmap_remove_all: page %p is not managed", m)); 1856 rw_wlock(&pvh_global_lock); 1857 1858 if (m->md.pv_flags & PV_TABLE_REF) 1859 vm_page_aflag_set(m, PGA_REFERENCED); 1860 1861 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1862 pmap = PV_PMAP(pv); 1863 PMAP_LOCK(pmap); 1864 1865 /* 1866 * If it's last mapping writeback all caches from 1867 * the page being destroyed 1868 */ 1869 if (TAILQ_NEXT(pv, pv_list) == NULL) 1870 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1871 1872 pmap->pm_stats.resident_count--; 1873 1874 pde = pmap_pde(pmap, pv->pv_va); 1875 KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde")); 1876 pte = pmap_pde_to_pte(pde, pv->pv_va); 1877 1878 tpte = *pte; 1879 if (is_kernel_pmap(pmap)) 1880 *pte = PTE_G; 1881 else 1882 *pte = 0; 1883 1884 if (pte_test(&tpte, PTE_W)) 1885 pmap->pm_stats.wired_count--; 1886 1887 /* 1888 * Update the vm_page_t clean and reference bits. 1889 */ 1890 if (pte_test(&tpte, PTE_D)) { 1891 KASSERT(!pte_test(&tpte, PTE_RO), 1892 ("%s: modified page not writable: va: %p, pte: %#jx", 1893 __func__, (void *)pv->pv_va, (uintmax_t)tpte)); 1894 vm_page_dirty(m); 1895 } 1896 pmap_invalidate_page(pmap, pv->pv_va); 1897 1898 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1899 pmap_unuse_pt(pmap, pv->pv_va, *pde); 1900 free_pv_entry(pmap, pv); 1901 PMAP_UNLOCK(pmap); 1902 } 1903 1904 vm_page_aflag_clear(m, PGA_WRITEABLE); 1905 m->md.pv_flags &= ~PV_TABLE_REF; 1906 rw_wunlock(&pvh_global_lock); 1907} 1908 1909/* 1910 * Set the physical protection on the 1911 * specified range of this map as requested. 1912 */ 1913void 1914pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1915{ 1916 pt_entry_t *pte; 1917 pd_entry_t *pde, *pdpe; 1918 vm_offset_t va_next; 1919 1920 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1921 pmap_remove(pmap, sva, eva); 1922 return; 1923 } 1924 if (prot & VM_PROT_WRITE) 1925 return; 1926 1927 rw_wlock(&pvh_global_lock); 1928 PMAP_LOCK(pmap); 1929 for (; sva < eva; sva = va_next) { 1930 pt_entry_t pbits; 1931 vm_page_t m; 1932 vm_paddr_t pa; 1933 1934 pdpe = pmap_segmap(pmap, sva); 1935#ifdef __mips_n64 1936 if (*pdpe == 0) { 1937 va_next = (sva + NBSEG) & ~SEGMASK; 1938 if (va_next < sva) 1939 va_next = eva; 1940 continue; 1941 } 1942#endif 1943 va_next = (sva + NBPDR) & ~PDRMASK; 1944 if (va_next < sva) 1945 va_next = eva; 1946 1947 pde = pmap_pdpe_to_pde(pdpe, sva); 1948 if (*pde == NULL) 1949 continue; 1950 if (va_next > eva) 1951 va_next = eva; 1952 1953 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1954 sva += PAGE_SIZE) { 1955 1956 /* Skip invalid PTEs */ 1957 if (!pte_test(pte, PTE_V)) 1958 continue; 1959 pbits = *pte; 1960 if (pte_test(&pbits, PTE_MANAGED | PTE_D)) { 1961 pa = TLBLO_PTE_TO_PA(pbits); 1962 m = PHYS_TO_VM_PAGE(pa); 1963 vm_page_dirty(m); 1964 } 1965 pte_clear(&pbits, PTE_D); 1966 pte_set(&pbits, PTE_RO); 1967 1968 if (pbits != *pte) { 1969 *pte = pbits; 1970 pmap_update_page(pmap, sva, pbits); 1971 } 1972 } 1973 } 1974 rw_wunlock(&pvh_global_lock); 1975 PMAP_UNLOCK(pmap); 1976} 1977 1978/* 1979 * Insert the given physical page (p) at 1980 * the specified virtual address (v) in the 1981 * target physical map with the protection requested. 1982 * 1983 * If specified, the page will be wired down, meaning 1984 * that the related pte can not be reclaimed. 1985 * 1986 * NB: This is the only routine which MAY NOT lazy-evaluate 1987 * or lose information. That is, this routine must actually 1988 * insert this page into the given map NOW. 1989 */ 1990void 1991pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1992 vm_prot_t prot, boolean_t wired) 1993{ 1994 vm_paddr_t pa, opa; 1995 pt_entry_t *pte; 1996 pt_entry_t origpte, newpte; 1997 pv_entry_t pv; 1998 vm_page_t mpte, om; 1999 2000 va &= ~PAGE_MASK; 2001 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2002 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || 2003 va >= kmi.clean_eva, 2004 ("pmap_enter: managed mapping within the clean submap")); 2005 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, 2006 ("pmap_enter: page %p is not busy", m)); 2007 pa = VM_PAGE_TO_PHYS(m); 2008 newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot); 2009 if (wired) 2010 newpte |= PTE_W; 2011 if (is_kernel_pmap(pmap)) 2012 newpte |= PTE_G; 2013 if (is_cacheable_mem(pa)) 2014 newpte |= PTE_C_CACHE; 2015 else 2016 newpte |= PTE_C_UNCACHED; 2017 2018 mpte = NULL; 2019 2020 rw_wlock(&pvh_global_lock); 2021 PMAP_LOCK(pmap); 2022 2023 /* 2024 * In the case that a page table page is not resident, we are 2025 * creating it here. 2026 */ 2027 if (va < VM_MAXUSER_ADDRESS) { 2028 mpte = pmap_allocpte(pmap, va, M_WAITOK); 2029 } 2030 pte = pmap_pte(pmap, va); 2031 2032 /* 2033 * Page Directory table entry not valid, we need a new PT page 2034 */ 2035 if (pte == NULL) { 2036 panic("pmap_enter: invalid page directory, pdir=%p, va=%p", 2037 (void *)pmap->pm_segtab, (void *)va); 2038 } 2039 om = NULL; 2040 origpte = *pte; 2041 opa = TLBLO_PTE_TO_PA(origpte); 2042 2043 /* 2044 * Mapping has not changed, must be protection or wiring change. 2045 */ 2046 if (pte_test(&origpte, PTE_V) && opa == pa) { 2047 /* 2048 * Wiring change, just update stats. We don't worry about 2049 * wiring PT pages as they remain resident as long as there 2050 * are valid mappings in them. Hence, if a user page is 2051 * wired, the PT page will be also. 2052 */ 2053 if (wired && !pte_test(&origpte, PTE_W)) 2054 pmap->pm_stats.wired_count++; 2055 else if (!wired && pte_test(&origpte, PTE_W)) 2056 pmap->pm_stats.wired_count--; 2057 2058 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO), 2059 ("%s: modified page not writable: va: %p, pte: %#jx", 2060 __func__, (void *)va, (uintmax_t)origpte)); 2061 2062 /* 2063 * Remove extra pte reference 2064 */ 2065 if (mpte) 2066 mpte->wire_count--; 2067 2068 if (pte_test(&origpte, PTE_MANAGED)) { 2069 m->md.pv_flags |= PV_TABLE_REF; 2070 om = m; 2071 newpte |= PTE_MANAGED; 2072 if (!pte_test(&newpte, PTE_RO)) 2073 vm_page_aflag_set(m, PGA_WRITEABLE); 2074 } 2075 goto validate; 2076 } 2077 2078 pv = NULL; 2079 2080 /* 2081 * Mapping has changed, invalidate old range and fall through to 2082 * handle validating new mapping. 2083 */ 2084 if (opa) { 2085 if (pte_test(&origpte, PTE_W)) 2086 pmap->pm_stats.wired_count--; 2087 2088 if (pte_test(&origpte, PTE_MANAGED)) { 2089 om = PHYS_TO_VM_PAGE(opa); 2090 pv = pmap_pvh_remove(&om->md, pmap, va); 2091 } 2092 if (mpte != NULL) { 2093 mpte->wire_count--; 2094 KASSERT(mpte->wire_count > 0, 2095 ("pmap_enter: missing reference to page table page," 2096 " va: %p", (void *)va)); 2097 } 2098 } else 2099 pmap->pm_stats.resident_count++; 2100 2101 /* 2102 * Enter on the PV list if part of our managed memory. 2103 */ 2104 if ((m->oflags & VPO_UNMANAGED) == 0) { 2105 m->md.pv_flags |= PV_TABLE_REF; 2106 if (pv == NULL) 2107 pv = get_pv_entry(pmap, FALSE); 2108 pv->pv_va = va; 2109 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2110 newpte |= PTE_MANAGED; 2111 if (!pte_test(&newpte, PTE_RO)) 2112 vm_page_aflag_set(m, PGA_WRITEABLE); 2113 } else if (pv != NULL) 2114 free_pv_entry(pmap, pv); 2115 2116 /* 2117 * Increment counters 2118 */ 2119 if (wired) 2120 pmap->pm_stats.wired_count++; 2121 2122validate: 2123 2124#ifdef PMAP_DEBUG 2125 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 2126#endif 2127 2128 /* 2129 * if the mapping or permission bits are different, we need to 2130 * update the pte. 2131 */ 2132 if (origpte != newpte) { 2133 *pte = newpte; 2134 if (pte_test(&origpte, PTE_V)) { 2135 if (pte_test(&origpte, PTE_MANAGED) && opa != pa) { 2136 if (om->md.pv_flags & PV_TABLE_REF) 2137 vm_page_aflag_set(om, PGA_REFERENCED); 2138 om->md.pv_flags &= ~PV_TABLE_REF; 2139 } 2140 if (pte_test(&origpte, PTE_D)) { 2141 KASSERT(!pte_test(&origpte, PTE_RO), 2142 ("pmap_enter: modified page not writable:" 2143 " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte)); 2144 if (pte_test(&origpte, PTE_MANAGED)) 2145 vm_page_dirty(om); 2146 } 2147 if (pte_test(&origpte, PTE_MANAGED) && 2148 TAILQ_EMPTY(&om->md.pv_list)) 2149 vm_page_aflag_clear(om, PGA_WRITEABLE); 2150 pmap_update_page(pmap, va, newpte); 2151 } 2152 } 2153 2154 /* 2155 * Sync I & D caches for executable pages. Do this only if the 2156 * target pmap belongs to the current process. Otherwise, an 2157 * unresolvable TLB miss may occur. 2158 */ 2159 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 2160 (prot & VM_PROT_EXECUTE)) { 2161 mips_icache_sync_range(va, PAGE_SIZE); 2162 mips_dcache_wbinv_range(va, PAGE_SIZE); 2163 } 2164 rw_wunlock(&pvh_global_lock); 2165 PMAP_UNLOCK(pmap); 2166} 2167 2168/* 2169 * this code makes some *MAJOR* assumptions: 2170 * 1. Current pmap & pmap exists. 2171 * 2. Not wired. 2172 * 3. Read access. 2173 * 4. No page table pages. 2174 * but is *MUCH* faster than pmap_enter... 2175 */ 2176 2177void 2178pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2179{ 2180 2181 rw_wlock(&pvh_global_lock); 2182 PMAP_LOCK(pmap); 2183 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2184 rw_wunlock(&pvh_global_lock); 2185 PMAP_UNLOCK(pmap); 2186} 2187 2188static vm_page_t 2189pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2190 vm_prot_t prot, vm_page_t mpte) 2191{ 2192 pt_entry_t *pte; 2193 vm_paddr_t pa; 2194 2195 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2196 (m->oflags & VPO_UNMANAGED) != 0, 2197 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2198 rw_assert(&pvh_global_lock, RA_WLOCKED); 2199 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2200 2201 /* 2202 * In the case that a page table page is not resident, we are 2203 * creating it here. 2204 */ 2205 if (va < VM_MAXUSER_ADDRESS) { 2206 pd_entry_t *pde; 2207 unsigned ptepindex; 2208 2209 /* 2210 * Calculate pagetable page index 2211 */ 2212 ptepindex = pmap_pde_pindex(va); 2213 if (mpte && (mpte->pindex == ptepindex)) { 2214 mpte->wire_count++; 2215 } else { 2216 /* 2217 * Get the page directory entry 2218 */ 2219 pde = pmap_pde(pmap, va); 2220 2221 /* 2222 * If the page table page is mapped, we just 2223 * increment the hold count, and activate it. 2224 */ 2225 if (pde && *pde != 0) { 2226 mpte = PHYS_TO_VM_PAGE( 2227 MIPS_DIRECT_TO_PHYS(*pde)); 2228 mpte->wire_count++; 2229 } else { 2230 mpte = _pmap_allocpte(pmap, ptepindex, 2231 M_NOWAIT); 2232 if (mpte == NULL) 2233 return (mpte); 2234 } 2235 } 2236 } else { 2237 mpte = NULL; 2238 } 2239 2240 pte = pmap_pte(pmap, va); 2241 if (pte_test(pte, PTE_V)) { 2242 if (mpte != NULL) { 2243 mpte->wire_count--; 2244 mpte = NULL; 2245 } 2246 return (mpte); 2247 } 2248 2249 /* 2250 * Enter on the PV list if part of our managed memory. 2251 */ 2252 if ((m->oflags & VPO_UNMANAGED) == 0 && 2253 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2254 if (mpte != NULL) { 2255 pmap_unwire_ptp(pmap, va, mpte); 2256 mpte = NULL; 2257 } 2258 return (mpte); 2259 } 2260 2261 /* 2262 * Increment counters 2263 */ 2264 pmap->pm_stats.resident_count++; 2265 2266 pa = VM_PAGE_TO_PHYS(m); 2267 2268 /* 2269 * Now validate mapping with RO protection 2270 */ 2271 *pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V; 2272 if ((m->oflags & VPO_UNMANAGED) == 0) 2273 *pte |= PTE_MANAGED; 2274 2275 if (is_cacheable_mem(pa)) 2276 *pte |= PTE_C_CACHE; 2277 else 2278 *pte |= PTE_C_UNCACHED; 2279 2280 if (is_kernel_pmap(pmap)) 2281 *pte |= PTE_G; 2282 else { 2283 /* 2284 * Sync I & D caches. Do this only if the target pmap 2285 * belongs to the current process. Otherwise, an 2286 * unresolvable TLB miss may occur. */ 2287 if (pmap == &curproc->p_vmspace->vm_pmap) { 2288 va &= ~PAGE_MASK; 2289 mips_icache_sync_range(va, PAGE_SIZE); 2290 mips_dcache_wbinv_range(va, PAGE_SIZE); 2291 } 2292 } 2293 return (mpte); 2294} 2295 2296/* 2297 * Make a temporary mapping for a physical address. This is only intended 2298 * to be used for panic dumps. 2299 * 2300 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2301 */ 2302void * 2303pmap_kenter_temporary(vm_paddr_t pa, int i) 2304{ 2305 vm_offset_t va; 2306 2307 if (i != 0) 2308 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2309 __func__); 2310 2311 if (MIPS_DIRECT_MAPPABLE(pa)) { 2312 va = MIPS_PHYS_TO_DIRECT(pa); 2313 } else { 2314#ifndef __mips_n64 /* XXX : to be converted to new style */ 2315 int cpu; 2316 register_t intr; 2317 struct local_sysmaps *sysm; 2318 pt_entry_t *pte, npte; 2319 2320 /* If this is used other than for dumps, we may need to leave 2321 * interrupts disasbled on return. If crash dumps don't work when 2322 * we get to this point, we might want to consider this (leaving things 2323 * disabled as a starting point ;-) 2324 */ 2325 intr = intr_disable(); 2326 cpu = PCPU_GET(cpuid); 2327 sysm = &sysmap_lmem[cpu]; 2328 /* Since this is for the debugger, no locks or any other fun */ 2329 npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V | 2330 PTE_G; 2331 pte = pmap_pte(kernel_pmap, sysm->base); 2332 *pte = npte; 2333 sysm->valid1 = 1; 2334 pmap_update_page(kernel_pmap, sysm->base, npte); 2335 va = sysm->base; 2336 intr_restore(intr); 2337#endif 2338 } 2339 return ((void *)va); 2340} 2341 2342void 2343pmap_kenter_temporary_free(vm_paddr_t pa) 2344{ 2345#ifndef __mips_n64 /* XXX : to be converted to new style */ 2346 int cpu; 2347 register_t intr; 2348 struct local_sysmaps *sysm; 2349#endif 2350 2351 if (MIPS_DIRECT_MAPPABLE(pa)) { 2352 /* nothing to do for this case */ 2353 return; 2354 } 2355#ifndef __mips_n64 /* XXX : to be converted to new style */ 2356 cpu = PCPU_GET(cpuid); 2357 sysm = &sysmap_lmem[cpu]; 2358 if (sysm->valid1) { 2359 pt_entry_t *pte; 2360 2361 intr = intr_disable(); 2362 pte = pmap_pte(kernel_pmap, sysm->base); 2363 *pte = PTE_G; 2364 pmap_invalidate_page(kernel_pmap, sysm->base); 2365 intr_restore(intr); 2366 sysm->valid1 = 0; 2367 } 2368#endif 2369} 2370 2371/* 2372 * Maps a sequence of resident pages belonging to the same object. 2373 * The sequence begins with the given page m_start. This page is 2374 * mapped at the given virtual address start. Each subsequent page is 2375 * mapped at a virtual address that is offset from start by the same 2376 * amount as the page is offset from m_start within the object. The 2377 * last page in the sequence is the page with the largest offset from 2378 * m_start that can be mapped at a virtual address less than the given 2379 * virtual address end. Not every virtual page between start and end 2380 * is mapped; only those for which a resident page exists with the 2381 * corresponding offset from m_start are mapped. 2382 */ 2383void 2384pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2385 vm_page_t m_start, vm_prot_t prot) 2386{ 2387 vm_page_t m, mpte; 2388 vm_pindex_t diff, psize; 2389 2390 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2391 psize = atop(end - start); 2392 mpte = NULL; 2393 m = m_start; 2394 rw_wlock(&pvh_global_lock); 2395 PMAP_LOCK(pmap); 2396 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2397 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2398 prot, mpte); 2399 m = TAILQ_NEXT(m, listq); 2400 } 2401 rw_wunlock(&pvh_global_lock); 2402 PMAP_UNLOCK(pmap); 2403} 2404 2405/* 2406 * pmap_object_init_pt preloads the ptes for a given object 2407 * into the specified pmap. This eliminates the blast of soft 2408 * faults on process startup and immediately after an mmap. 2409 */ 2410void 2411pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2412 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2413{ 2414 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2415 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2416 ("pmap_object_init_pt: non-device object")); 2417} 2418 2419/* 2420 * Routine: pmap_change_wiring 2421 * Function: Change the wiring attribute for a map/virtual-address 2422 * pair. 2423 * In/out conditions: 2424 * The mapping must already exist in the pmap. 2425 */ 2426void 2427pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2428{ 2429 pt_entry_t *pte; 2430 2431 PMAP_LOCK(pmap); 2432 pte = pmap_pte(pmap, va); 2433 2434 if (wired && !pte_test(pte, PTE_W)) 2435 pmap->pm_stats.wired_count++; 2436 else if (!wired && pte_test(pte, PTE_W)) 2437 pmap->pm_stats.wired_count--; 2438 2439 /* 2440 * Wiring is not a hardware characteristic so there is no need to 2441 * invalidate TLB. 2442 */ 2443 if (wired) 2444 pte_set(pte, PTE_W); 2445 else 2446 pte_clear(pte, PTE_W); 2447 PMAP_UNLOCK(pmap); 2448} 2449 2450/* 2451 * Copy the range specified by src_addr/len 2452 * from the source map to the range dst_addr/len 2453 * in the destination map. 2454 * 2455 * This routine is only advisory and need not do anything. 2456 */ 2457 2458void 2459pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2460 vm_size_t len, vm_offset_t src_addr) 2461{ 2462} 2463 2464/* 2465 * pmap_zero_page zeros the specified hardware page by mapping 2466 * the page into KVM and using bzero to clear its contents. 2467 * 2468 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2469 */ 2470void 2471pmap_zero_page(vm_page_t m) 2472{ 2473 vm_offset_t va; 2474 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2475 2476 if (MIPS_DIRECT_MAPPABLE(phys)) { 2477 va = MIPS_PHYS_TO_DIRECT(phys); 2478 bzero((caddr_t)va, PAGE_SIZE); 2479 mips_dcache_wbinv_range(va, PAGE_SIZE); 2480 } else { 2481 va = pmap_lmem_map1(phys); 2482 bzero((caddr_t)va, PAGE_SIZE); 2483 mips_dcache_wbinv_range(va, PAGE_SIZE); 2484 pmap_lmem_unmap(); 2485 } 2486} 2487 2488/* 2489 * pmap_zero_page_area zeros the specified hardware page by mapping 2490 * the page into KVM and using bzero to clear its contents. 2491 * 2492 * off and size may not cover an area beyond a single hardware page. 2493 */ 2494void 2495pmap_zero_page_area(vm_page_t m, int off, int size) 2496{ 2497 vm_offset_t va; 2498 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2499 2500 if (MIPS_DIRECT_MAPPABLE(phys)) { 2501 va = MIPS_PHYS_TO_DIRECT(phys); 2502 bzero((char *)(caddr_t)va + off, size); 2503 mips_dcache_wbinv_range(va + off, size); 2504 } else { 2505 va = pmap_lmem_map1(phys); 2506 bzero((char *)va + off, size); 2507 mips_dcache_wbinv_range(va + off, size); 2508 pmap_lmem_unmap(); 2509 } 2510} 2511 2512void 2513pmap_zero_page_idle(vm_page_t m) 2514{ 2515 vm_offset_t va; 2516 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2517 2518 if (MIPS_DIRECT_MAPPABLE(phys)) { 2519 va = MIPS_PHYS_TO_DIRECT(phys); 2520 bzero((caddr_t)va, PAGE_SIZE); 2521 mips_dcache_wbinv_range(va, PAGE_SIZE); 2522 } else { 2523 va = pmap_lmem_map1(phys); 2524 bzero((caddr_t)va, PAGE_SIZE); 2525 mips_dcache_wbinv_range(va, PAGE_SIZE); 2526 pmap_lmem_unmap(); 2527 } 2528} 2529 2530/* 2531 * pmap_copy_page copies the specified (machine independent) 2532 * page by mapping the page into virtual memory and using 2533 * bcopy to copy the page, one machine dependent page at a 2534 * time. 2535 * 2536 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2537 */ 2538void 2539pmap_copy_page(vm_page_t src, vm_page_t dst) 2540{ 2541 vm_offset_t va_src, va_dst; 2542 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src); 2543 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst); 2544 2545 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) { 2546 /* easy case, all can be accessed via KSEG0 */ 2547 /* 2548 * Flush all caches for VA that are mapped to this page 2549 * to make sure that data in SDRAM is up to date 2550 */ 2551 pmap_flush_pvcache(src); 2552 mips_dcache_wbinv_range_index( 2553 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE); 2554 va_src = MIPS_PHYS_TO_DIRECT(phys_src); 2555 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst); 2556 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2557 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2558 } else { 2559 va_src = pmap_lmem_map2(phys_src, phys_dst); 2560 va_dst = va_src + PAGE_SIZE; 2561 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2562 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2563 pmap_lmem_unmap(); 2564 } 2565} 2566 2567/* 2568 * Returns true if the pmap's pv is one of the first 2569 * 16 pvs linked to from this page. This count may 2570 * be changed upwards or downwards in the future; it 2571 * is only necessary that true be returned for a small 2572 * subset of pmaps for proper page aging. 2573 */ 2574boolean_t 2575pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2576{ 2577 pv_entry_t pv; 2578 int loops = 0; 2579 boolean_t rv; 2580 2581 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2582 ("pmap_page_exists_quick: page %p is not managed", m)); 2583 rv = FALSE; 2584 rw_wlock(&pvh_global_lock); 2585 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2586 if (PV_PMAP(pv) == pmap) { 2587 rv = TRUE; 2588 break; 2589 } 2590 loops++; 2591 if (loops >= 16) 2592 break; 2593 } 2594 rw_wunlock(&pvh_global_lock); 2595 return (rv); 2596} 2597 2598/* 2599 * Remove all pages from specified address space 2600 * this aids process exit speeds. Also, this code 2601 * is special cased for current process only, but 2602 * can have the more generic (and slightly slower) 2603 * mode enabled. This is much faster than pmap_remove 2604 * in the case of running down an entire address space. 2605 */ 2606void 2607pmap_remove_pages(pmap_t pmap) 2608{ 2609 pd_entry_t *pde; 2610 pt_entry_t *pte, tpte; 2611 pv_entry_t pv; 2612 vm_page_t m; 2613 struct pv_chunk *pc, *npc; 2614 u_long inuse, bitmask; 2615 int allfree, bit, field, idx; 2616 2617 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2618 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2619 return; 2620 } 2621 rw_wlock(&pvh_global_lock); 2622 PMAP_LOCK(pmap); 2623 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2624 allfree = 1; 2625 for (field = 0; field < _NPCM; field++) { 2626 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2627 while (inuse != 0) { 2628 bit = ffsl(inuse) - 1; 2629 bitmask = 1UL << bit; 2630 idx = field * sizeof(inuse) * NBBY + bit; 2631 pv = &pc->pc_pventry[idx]; 2632 inuse &= ~bitmask; 2633 2634 pde = pmap_pde(pmap, pv->pv_va); 2635 KASSERT(pde != NULL && *pde != 0, 2636 ("pmap_remove_pages: pde")); 2637 pte = pmap_pde_to_pte(pde, pv->pv_va); 2638 if (!pte_test(pte, PTE_V)) 2639 panic("pmap_remove_pages: bad pte"); 2640 tpte = *pte; 2641 2642/* 2643 * We cannot remove wired pages from a process' mapping at this time 2644 */ 2645 if (pte_test(&tpte, PTE_W)) { 2646 allfree = 0; 2647 continue; 2648 } 2649 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2650 2651 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); 2652 KASSERT(m != NULL, 2653 ("pmap_remove_pages: bad tpte %#jx", 2654 (uintmax_t)tpte)); 2655 2656 /* 2657 * Update the vm_page_t clean and reference bits. 2658 */ 2659 if (pte_test(&tpte, PTE_D)) 2660 vm_page_dirty(m); 2661 2662 /* Mark free */ 2663 PV_STAT(pv_entry_frees++); 2664 PV_STAT(pv_entry_spare++); 2665 pv_entry_count--; 2666 pc->pc_map[field] |= bitmask; 2667 pmap->pm_stats.resident_count--; 2668 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2669 if (TAILQ_EMPTY(&m->md.pv_list)) 2670 vm_page_aflag_clear(m, PGA_WRITEABLE); 2671 pmap_unuse_pt(pmap, pv->pv_va, *pde); 2672 } 2673 } 2674 if (allfree) { 2675 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2676 free_pv_chunk(pc); 2677 } 2678 } 2679 pmap_invalidate_all(pmap); 2680 PMAP_UNLOCK(pmap); 2681 rw_wunlock(&pvh_global_lock); 2682} 2683 2684/* 2685 * pmap_testbit tests bits in pte's 2686 */ 2687static boolean_t 2688pmap_testbit(vm_page_t m, int bit) 2689{ 2690 pv_entry_t pv; 2691 pmap_t pmap; 2692 pt_entry_t *pte; 2693 boolean_t rv = FALSE; 2694 2695 if (m->oflags & VPO_UNMANAGED) 2696 return (rv); 2697 2698 rw_assert(&pvh_global_lock, RA_WLOCKED); 2699 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2700 pmap = PV_PMAP(pv); 2701 PMAP_LOCK(pmap); 2702 pte = pmap_pte(pmap, pv->pv_va); 2703 rv = pte_test(pte, bit); 2704 PMAP_UNLOCK(pmap); 2705 if (rv) 2706 break; 2707 } 2708 return (rv); 2709} 2710 2711/* 2712 * pmap_page_wired_mappings: 2713 * 2714 * Return the number of managed mappings to the given physical page 2715 * that are wired. 2716 */ 2717int 2718pmap_page_wired_mappings(vm_page_t m) 2719{ 2720 pv_entry_t pv; 2721 pmap_t pmap; 2722 pt_entry_t *pte; 2723 int count; 2724 2725 count = 0; 2726 if ((m->oflags & VPO_UNMANAGED) != 0) 2727 return (count); 2728 rw_wlock(&pvh_global_lock); 2729 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2730 pmap = PV_PMAP(pv); 2731 PMAP_LOCK(pmap); 2732 pte = pmap_pte(pmap, pv->pv_va); 2733 if (pte_test(pte, PTE_W)) 2734 count++; 2735 PMAP_UNLOCK(pmap); 2736 } 2737 rw_wunlock(&pvh_global_lock); 2738 return (count); 2739} 2740 2741/* 2742 * Clear the write and modified bits in each of the given page's mappings. 2743 */ 2744void 2745pmap_remove_write(vm_page_t m) 2746{ 2747 pmap_t pmap; 2748 pt_entry_t pbits, *pte; 2749 pv_entry_t pv; 2750 2751 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2752 ("pmap_remove_write: page %p is not managed", m)); 2753 2754 /* 2755 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 2756 * another thread while the object is locked. Thus, if PGA_WRITEABLE 2757 * is clear, no page table entries need updating. 2758 */ 2759 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2760 if ((m->oflags & VPO_BUSY) == 0 && 2761 (m->aflags & PGA_WRITEABLE) == 0) 2762 return; 2763 rw_wlock(&pvh_global_lock); 2764 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2765 pmap = PV_PMAP(pv); 2766 PMAP_LOCK(pmap); 2767 pte = pmap_pte(pmap, pv->pv_va); 2768 KASSERT(pte != NULL && pte_test(pte, PTE_V), 2769 ("page on pv_list has no pte")); 2770 pbits = *pte; 2771 if (pte_test(&pbits, PTE_D)) { 2772 pte_clear(&pbits, PTE_D); 2773 vm_page_dirty(m); 2774 } 2775 pte_set(&pbits, PTE_RO); 2776 if (pbits != *pte) { 2777 *pte = pbits; 2778 pmap_update_page(pmap, pv->pv_va, pbits); 2779 } 2780 PMAP_UNLOCK(pmap); 2781 } 2782 vm_page_aflag_clear(m, PGA_WRITEABLE); 2783 rw_wunlock(&pvh_global_lock); 2784} 2785 2786/* 2787 * pmap_ts_referenced: 2788 * 2789 * Return the count of reference bits for a page, clearing all of them. 2790 */ 2791int 2792pmap_ts_referenced(vm_page_t m) 2793{ 2794 2795 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2796 ("pmap_ts_referenced: page %p is not managed", m)); 2797 if (m->md.pv_flags & PV_TABLE_REF) { 2798 rw_wlock(&pvh_global_lock); 2799 m->md.pv_flags &= ~PV_TABLE_REF; 2800 rw_wunlock(&pvh_global_lock); 2801 return (1); 2802 } 2803 return (0); 2804} 2805 2806/* 2807 * pmap_is_modified: 2808 * 2809 * Return whether or not the specified physical page was modified 2810 * in any physical maps. 2811 */ 2812boolean_t 2813pmap_is_modified(vm_page_t m) 2814{ 2815 boolean_t rv; 2816 2817 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2818 ("pmap_is_modified: page %p is not managed", m)); 2819 2820 /* 2821 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 2822 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2823 * is clear, no PTEs can have PTE_D set. 2824 */ 2825 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2826 if ((m->oflags & VPO_BUSY) == 0 && 2827 (m->aflags & PGA_WRITEABLE) == 0) 2828 return (FALSE); 2829 rw_wlock(&pvh_global_lock); 2830 rv = pmap_testbit(m, PTE_D); 2831 rw_wunlock(&pvh_global_lock); 2832 return (rv); 2833} 2834 2835/* N/C */ 2836 2837/* 2838 * pmap_is_prefaultable: 2839 * 2840 * Return whether or not the specified virtual address is elgible 2841 * for prefault. 2842 */ 2843boolean_t 2844pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2845{ 2846 pd_entry_t *pde; 2847 pt_entry_t *pte; 2848 boolean_t rv; 2849 2850 rv = FALSE; 2851 PMAP_LOCK(pmap); 2852 pde = pmap_pde(pmap, addr); 2853 if (pde != NULL && *pde != 0) { 2854 pte = pmap_pde_to_pte(pde, addr); 2855 rv = (*pte == 0); 2856 } 2857 PMAP_UNLOCK(pmap); 2858 return (rv); 2859} 2860 2861/* 2862 * Clear the modify bits on the specified physical page. 2863 */ 2864void 2865pmap_clear_modify(vm_page_t m) 2866{ 2867 pmap_t pmap; 2868 pt_entry_t *pte; 2869 pv_entry_t pv; 2870 2871 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2872 ("pmap_clear_modify: page %p is not managed", m)); 2873 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2874 KASSERT((m->oflags & VPO_BUSY) == 0, 2875 ("pmap_clear_modify: page %p is busy", m)); 2876 2877 /* 2878 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set. 2879 * If the object containing the page is locked and the page is not 2880 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 2881 */ 2882 if ((m->aflags & PGA_WRITEABLE) == 0) 2883 return; 2884 rw_wlock(&pvh_global_lock); 2885 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2886 pmap = PV_PMAP(pv); 2887 PMAP_LOCK(pmap); 2888 pte = pmap_pte(pmap, pv->pv_va); 2889 if (pte_test(pte, PTE_D)) { 2890 pte_clear(pte, PTE_D); 2891 pmap_update_page(pmap, pv->pv_va, *pte); 2892 } 2893 PMAP_UNLOCK(pmap); 2894 } 2895 rw_wunlock(&pvh_global_lock); 2896} 2897 2898/* 2899 * pmap_is_referenced: 2900 * 2901 * Return whether or not the specified physical page was referenced 2902 * in any physical maps. 2903 */ 2904boolean_t 2905pmap_is_referenced(vm_page_t m) 2906{ 2907 2908 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2909 ("pmap_is_referenced: page %p is not managed", m)); 2910 return ((m->md.pv_flags & PV_TABLE_REF) != 0); 2911} 2912 2913/* 2914 * pmap_clear_reference: 2915 * 2916 * Clear the reference bit on the specified physical page. 2917 */ 2918void 2919pmap_clear_reference(vm_page_t m) 2920{ 2921 2922 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2923 ("pmap_clear_reference: page %p is not managed", m)); 2924 rw_wlock(&pvh_global_lock); 2925 if (m->md.pv_flags & PV_TABLE_REF) { 2926 m->md.pv_flags &= ~PV_TABLE_REF; 2927 } 2928 rw_wunlock(&pvh_global_lock); 2929} 2930 2931/* 2932 * Miscellaneous support routines follow 2933 */ 2934 2935/* 2936 * Map a set of physical memory pages into the kernel virtual 2937 * address space. Return a pointer to where it is mapped. This 2938 * routine is intended to be used for mapping device memory, 2939 * NOT real memory. 2940 * 2941 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. 2942 */ 2943void * 2944pmap_mapdev(vm_paddr_t pa, vm_size_t size) 2945{ 2946 vm_offset_t va, tmpva, offset; 2947 2948 /* 2949 * KSEG1 maps only first 512M of phys address space. For 2950 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2951 */ 2952 if (MIPS_DIRECT_MAPPABLE(pa + size - 1)) 2953 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa)); 2954 else { 2955 offset = pa & PAGE_MASK; 2956 size = roundup(size + offset, PAGE_SIZE); 2957 2958 va = kmem_alloc_nofault(kernel_map, size); 2959 if (!va) 2960 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2961 pa = trunc_page(pa); 2962 for (tmpva = va; size > 0;) { 2963 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED); 2964 size -= PAGE_SIZE; 2965 tmpva += PAGE_SIZE; 2966 pa += PAGE_SIZE; 2967 } 2968 } 2969 2970 return ((void *)(va + offset)); 2971} 2972 2973void 2974pmap_unmapdev(vm_offset_t va, vm_size_t size) 2975{ 2976#ifndef __mips_n64 2977 vm_offset_t base, offset; 2978 2979 /* If the address is within KSEG1 then there is nothing to do */ 2980 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 2981 return; 2982 2983 base = trunc_page(va); 2984 offset = va & PAGE_MASK; 2985 size = roundup(size + offset, PAGE_SIZE); 2986 kmem_free(kernel_map, base, size); 2987#endif 2988} 2989 2990/* 2991 * perform the pmap work for mincore 2992 */ 2993int 2994pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2995{ 2996 pt_entry_t *ptep, pte; 2997 vm_paddr_t pa; 2998 vm_page_t m; 2999 int val; 3000 3001 PMAP_LOCK(pmap); 3002retry: 3003 ptep = pmap_pte(pmap, addr); 3004 pte = (ptep != NULL) ? *ptep : 0; 3005 if (!pte_test(&pte, PTE_V)) { 3006 val = 0; 3007 goto out; 3008 } 3009 val = MINCORE_INCORE; 3010 if (pte_test(&pte, PTE_D)) 3011 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3012 pa = TLBLO_PTE_TO_PA(pte); 3013 if (pte_test(&pte, PTE_MANAGED)) { 3014 /* 3015 * This may falsely report the given address as 3016 * MINCORE_REFERENCED. Unfortunately, due to the lack of 3017 * per-PTE reference information, it is impossible to 3018 * determine if the address is MINCORE_REFERENCED. 3019 */ 3020 m = PHYS_TO_VM_PAGE(pa); 3021 if ((m->aflags & PGA_REFERENCED) != 0) 3022 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3023 } 3024 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3025 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 3026 pte_test(&pte, PTE_MANAGED)) { 3027 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3028 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3029 goto retry; 3030 } else 3031out: 3032 PA_UNLOCK_COND(*locked_pa); 3033 PMAP_UNLOCK(pmap); 3034 return (val); 3035} 3036 3037void 3038pmap_activate(struct thread *td) 3039{ 3040 pmap_t pmap, oldpmap; 3041 struct proc *p = td->td_proc; 3042 u_int cpuid; 3043 3044 critical_enter(); 3045 3046 pmap = vmspace_pmap(p->p_vmspace); 3047 oldpmap = PCPU_GET(curpmap); 3048 cpuid = PCPU_GET(cpuid); 3049 3050 if (oldpmap) 3051 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 3052 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 3053 pmap_asid_alloc(pmap); 3054 if (td == curthread) { 3055 PCPU_SET(segbase, pmap->pm_segtab); 3056 mips_wr_entryhi(pmap->pm_asid[cpuid].asid); 3057 } 3058 3059 PCPU_SET(curpmap, pmap); 3060 critical_exit(); 3061} 3062 3063void 3064pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3065{ 3066} 3067 3068/* 3069 * Increase the starting virtual address of the given mapping if a 3070 * different alignment might result in more superpage mappings. 3071 */ 3072void 3073pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3074 vm_offset_t *addr, vm_size_t size) 3075{ 3076 vm_offset_t superpage_offset; 3077 3078 if (size < NBSEG) 3079 return; 3080 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 3081 offset += ptoa(object->pg_color); 3082 superpage_offset = offset & SEGMASK; 3083 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || 3084 (*addr & SEGMASK) == superpage_offset) 3085 return; 3086 if ((*addr & SEGMASK) < superpage_offset) 3087 *addr = (*addr & ~SEGMASK) + superpage_offset; 3088 else 3089 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; 3090} 3091 3092/* 3093 * Increase the starting virtual address of the given mapping so 3094 * that it is aligned to not be the second page in a TLB entry. 3095 * This routine assumes that the length is appropriately-sized so 3096 * that the allocation does not share a TLB entry at all if required. 3097 */ 3098void 3099pmap_align_tlb(vm_offset_t *addr) 3100{ 3101 if ((*addr & PAGE_SIZE) == 0) 3102 return; 3103 *addr += PAGE_SIZE; 3104 return; 3105} 3106 3107#ifdef DDB 3108DB_SHOW_COMMAND(ptable, ddb_pid_dump) 3109{ 3110 pmap_t pmap; 3111 struct thread *td = NULL; 3112 struct proc *p; 3113 int i, j, k; 3114 vm_paddr_t pa; 3115 vm_offset_t va; 3116 3117 if (have_addr) { 3118 td = db_lookup_thread(addr, TRUE); 3119 if (td == NULL) { 3120 db_printf("Invalid pid or tid"); 3121 return; 3122 } 3123 p = td->td_proc; 3124 if (p->p_vmspace == NULL) { 3125 db_printf("No vmspace for process"); 3126 return; 3127 } 3128 pmap = vmspace_pmap(p->p_vmspace); 3129 } else 3130 pmap = kernel_pmap; 3131 3132 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n", 3133 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid, 3134 pmap->pm_asid[0].gen); 3135 for (i = 0; i < NPDEPG; i++) { 3136 pd_entry_t *pdpe; 3137 pt_entry_t *pde; 3138 pt_entry_t pte; 3139 3140 pdpe = (pd_entry_t *)pmap->pm_segtab[i]; 3141 if (pdpe == NULL) 3142 continue; 3143 db_printf("[%4d] %p\n", i, pdpe); 3144#ifdef __mips_n64 3145 for (j = 0; j < NPDEPG; j++) { 3146 pde = (pt_entry_t *)pdpe[j]; 3147 if (pde == NULL) 3148 continue; 3149 db_printf("\t[%4d] %p\n", j, pde); 3150#else 3151 { 3152 j = 0; 3153 pde = (pt_entry_t *)pdpe; 3154#endif 3155 for (k = 0; k < NPTEPG; k++) { 3156 pte = pde[k]; 3157 if (pte == 0 || !pte_test(&pte, PTE_V)) 3158 continue; 3159 pa = TLBLO_PTE_TO_PA(pte); 3160 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); 3161 db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n", 3162 k, (void *)va, (uintmax_t)pte, (uintmax_t)pa); 3163 } 3164 } 3165 } 3166} 3167#endif 3168 3169#if defined(DEBUG) 3170 3171static void pads(pmap_t pm); 3172void pmap_pvdump(vm_offset_t pa); 3173 3174/* print address space of pmap*/ 3175static void 3176pads(pmap_t pm) 3177{ 3178 unsigned va, i, j; 3179 pt_entry_t *ptep; 3180 3181 if (pm == kernel_pmap) 3182 return; 3183 for (i = 0; i < NPTEPG; i++) 3184 if (pm->pm_segtab[i]) 3185 for (j = 0; j < NPTEPG; j++) { 3186 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3187 if (pm == kernel_pmap && va < KERNBASE) 3188 continue; 3189 if (pm != kernel_pmap && 3190 va >= VM_MAXUSER_ADDRESS) 3191 continue; 3192 ptep = pmap_pte(pm, va); 3193 if (pte_test(ptep, PTE_V)) 3194 printf("%x:%x ", va, *(int *)ptep); 3195 } 3196 3197} 3198 3199void 3200pmap_pvdump(vm_offset_t pa) 3201{ 3202 register pv_entry_t pv; 3203 vm_page_t m; 3204 3205 printf("pa %x", pa); 3206 m = PHYS_TO_VM_PAGE(pa); 3207 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3208 pv = TAILQ_NEXT(pv, pv_list)) { 3209 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3210 pads(pv->pv_pmap); 3211 } 3212 printf(" "); 3213} 3214 3215/* N/C */ 3216#endif 3217 3218 3219/* 3220 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3221 * It takes almost as much or more time to search the TLB for a 3222 * specific ASID and flush those entries as it does to flush the entire TLB. 3223 * Therefore, when we allocate a new ASID, we just take the next number. When 3224 * we run out of numbers, we flush the TLB, increment the generation count 3225 * and start over. ASID zero is reserved for kernel use. 3226 */ 3227static void 3228pmap_asid_alloc(pmap) 3229 pmap_t pmap; 3230{ 3231 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3232 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3233 else { 3234 if (PCPU_GET(next_asid) == pmap_max_asid) { 3235 tlb_invalidate_all_user(NULL); 3236 PCPU_SET(asid_generation, 3237 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3238 if (PCPU_GET(asid_generation) == 0) { 3239 PCPU_SET(asid_generation, 1); 3240 } 3241 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3242 } 3243 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3244 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3245 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3246 } 3247} 3248 3249static pt_entry_t 3250init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot) 3251{ 3252 pt_entry_t rw; 3253 3254 if (!(prot & VM_PROT_WRITE)) 3255 rw = PTE_V | PTE_RO; 3256 else if ((m->oflags & VPO_UNMANAGED) == 0) { 3257 if ((access & VM_PROT_WRITE) != 0) 3258 rw = PTE_V | PTE_D; 3259 else 3260 rw = PTE_V; 3261 } else 3262 /* Needn't emulate a modified bit for unmanaged pages. */ 3263 rw = PTE_V | PTE_D; 3264 return (rw); 3265} 3266 3267/* 3268 * pmap_emulate_modified : do dirty bit emulation 3269 * 3270 * On SMP, update just the local TLB, other CPUs will update their 3271 * TLBs from PTE lazily, if they get the exception. 3272 * Returns 0 in case of sucess, 1 if the page is read only and we 3273 * need to fault. 3274 */ 3275int 3276pmap_emulate_modified(pmap_t pmap, vm_offset_t va) 3277{ 3278 pt_entry_t *pte; 3279 3280 PMAP_LOCK(pmap); 3281 pte = pmap_pte(pmap, va); 3282 if (pte == NULL) 3283 panic("pmap_emulate_modified: can't find PTE"); 3284#ifdef SMP 3285 /* It is possible that some other CPU changed m-bit */ 3286 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { 3287 tlb_update(pmap, va, *pte); 3288 PMAP_UNLOCK(pmap); 3289 return (0); 3290 } 3291#else 3292 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) 3293 panic("pmap_emulate_modified: invalid pte"); 3294#endif 3295 if (pte_test(pte, PTE_RO)) { 3296 PMAP_UNLOCK(pmap); 3297 return (1); 3298 } 3299 pte_set(pte, PTE_D); 3300 tlb_update(pmap, va, *pte); 3301 if (!pte_test(pte, PTE_MANAGED)) 3302 panic("pmap_emulate_modified: unmanaged page"); 3303 PMAP_UNLOCK(pmap); 3304 return (0); 3305} 3306 3307/* 3308 * Routine: pmap_kextract 3309 * Function: 3310 * Extract the physical page address associated 3311 * virtual address. 3312 */ 3313vm_paddr_t 3314pmap_kextract(vm_offset_t va) 3315{ 3316 int mapped; 3317 3318 /* 3319 * First, the direct-mapped regions. 3320 */ 3321#if defined(__mips_n64) 3322 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) 3323 return (MIPS_XKPHYS_TO_PHYS(va)); 3324#endif 3325 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) 3326 return (MIPS_KSEG0_TO_PHYS(va)); 3327 3328 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) 3329 return (MIPS_KSEG1_TO_PHYS(va)); 3330 3331 /* 3332 * User virtual addresses. 3333 */ 3334 if (va < VM_MAXUSER_ADDRESS) { 3335 pt_entry_t *ptep; 3336 3337 if (curproc && curproc->p_vmspace) { 3338 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3339 if (ptep) { 3340 return (TLBLO_PTE_TO_PA(*ptep) | 3341 (va & PAGE_MASK)); 3342 } 3343 return (0); 3344 } 3345 } 3346 3347 /* 3348 * Should be kernel virtual here, otherwise fail 3349 */ 3350 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); 3351#if defined(__mips_n64) 3352 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); 3353#endif 3354 /* 3355 * Kernel virtual. 3356 */ 3357 3358 if (mapped) { 3359 pt_entry_t *ptep; 3360 3361 /* Is the kernel pmap initialized? */ 3362 if (!CPU_EMPTY(&kernel_pmap->pm_active)) { 3363 /* It's inside the virtual address range */ 3364 ptep = pmap_pte(kernel_pmap, va); 3365 if (ptep) { 3366 return (TLBLO_PTE_TO_PA(*ptep) | 3367 (va & PAGE_MASK)); 3368 } 3369 } 3370 return (0); 3371 } 3372 3373 panic("%s for unknown address space %p.", __func__, (void *)va); 3374} 3375 3376 3377void 3378pmap_flush_pvcache(vm_page_t m) 3379{ 3380 pv_entry_t pv; 3381 3382 if (m != NULL) { 3383 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3384 pv = TAILQ_NEXT(pv, pv_list)) { 3385 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3386 } 3387 } 3388} 3389