pmap.c revision 248084
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * Since the information managed by this module is 46 * also stored by the logical address mapping module, 47 * this module may throw away valid virtual-to-physical 48 * mappings at almost any time. However, invalidations 49 * of virtual-to-physical mappings must be done as 50 * requested. 51 * 52 * In order to cope with hardware architectures which 53 * make virtual-to-physical map invalidates expensive, 54 * this module may delay invalidate or reduced protection 55 * operations until such time as they are actually 56 * necessary. This module is given full information as 57 * to which processors are currently using which maps, 58 * and to when physical maps must be made correct. 59 */ 60 61#include <sys/cdefs.h> 62__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 248084 2013-03-09 02:32:23Z attilio $"); 63 64#include "opt_ddb.h" 65#include "opt_pmap.h" 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/lock.h> 70#include <sys/mman.h> 71#include <sys/msgbuf.h> 72#include <sys/mutex.h> 73#include <sys/pcpu.h> 74#include <sys/proc.h> 75#include <sys/rwlock.h> 76#include <sys/sched.h> 77#ifdef SMP 78#include <sys/smp.h> 79#else 80#include <sys/cpuset.h> 81#endif 82#include <sys/sysctl.h> 83#include <sys/vmmeter.h> 84 85#ifdef DDB 86#include <ddb/ddb.h> 87#endif 88 89#include <vm/vm.h> 90#include <vm/vm_param.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_page.h> 93#include <vm/vm_map.h> 94#include <vm/vm_object.h> 95#include <vm/vm_extern.h> 96#include <vm/vm_pageout.h> 97#include <vm/vm_pager.h> 98#include <vm/uma.h> 99 100#include <machine/cache.h> 101#include <machine/md_var.h> 102#include <machine/tlb.h> 103 104#undef PMAP_DEBUG 105 106#if !defined(DIAGNOSTIC) 107#define PMAP_INLINE __inline 108#else 109#define PMAP_INLINE 110#endif 111 112#ifdef PV_STATS 113#define PV_STAT(x) do { x ; } while (0) 114#else 115#define PV_STAT(x) do { } while (0) 116#endif 117 118/* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) 122#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) 123#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) 124#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) 125 126#ifdef __mips_n64 127#define NUPDE (NPDEPG * NPDEPG) 128#define NUSERPGTBLS (NUPDE + NPDEPG) 129#else 130#define NUPDE (NPDEPG) 131#define NUSERPGTBLS (NUPDE) 132#endif 133 134#define is_kernel_pmap(x) ((x) == kernel_pmap) 135 136struct pmap kernel_pmap_store; 137pd_entry_t *kernel_segmap; 138 139vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 140vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 141 142static int nkpt; 143unsigned pmap_max_asid; /* max ASID supported by the system */ 144 145#define PMAP_ASID_RESERVED 0 146 147vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 148 149static void pmap_asid_alloc(pmap_t pmap); 150 151static struct rwlock_padalign pvh_global_lock; 152 153/* 154 * Data for the pv entry allocation mechanism 155 */ 156static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 157static int pv_entry_count; 158 159static void free_pv_chunk(struct pv_chunk *pc); 160static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 161static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 162static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 163static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 164static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 165 vm_offset_t va); 166static vm_page_t pmap_alloc_direct_page(unsigned int index, int req); 167static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 168 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 169static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 170 pd_entry_t pde); 171static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 172static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 173static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 174 vm_offset_t va, vm_page_t m); 175static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); 176static void pmap_invalidate_all(pmap_t pmap); 177static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); 178static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m); 179 180static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 181static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 182static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); 183static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot); 184 185static void pmap_invalidate_page_action(void *arg); 186static void pmap_invalidate_range_action(void *arg); 187static void pmap_update_page_action(void *arg); 188 189#ifndef __mips_n64 190/* 191 * This structure is for high memory (memory above 512Meg in 32 bit) support. 192 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to 193 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc. 194 * 195 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To 196 * access a highmem physical address on a CPU, we map the physical address to 197 * the reserved virtual address for the CPU in the kernel pagetable. This is 198 * done with interrupts disabled(although a spinlock and sched_pin would be 199 * sufficient). 200 */ 201struct local_sysmaps { 202 vm_offset_t base; 203 uint32_t saved_intr; 204 uint16_t valid1, valid2; 205}; 206static struct local_sysmaps sysmap_lmem[MAXCPU]; 207 208static __inline void 209pmap_alloc_lmem_map(void) 210{ 211 int i; 212 213 for (i = 0; i < MAXCPU; i++) { 214 sysmap_lmem[i].base = virtual_avail; 215 virtual_avail += PAGE_SIZE * 2; 216 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 217 } 218} 219 220static __inline vm_offset_t 221pmap_lmem_map1(vm_paddr_t phys) 222{ 223 struct local_sysmaps *sysm; 224 pt_entry_t *pte, npte; 225 vm_offset_t va; 226 uint32_t intr; 227 int cpu; 228 229 intr = intr_disable(); 230 cpu = PCPU_GET(cpuid); 231 sysm = &sysmap_lmem[cpu]; 232 sysm->saved_intr = intr; 233 va = sysm->base; 234 npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 235 pte = pmap_pte(kernel_pmap, va); 236 *pte = npte; 237 sysm->valid1 = 1; 238 return (va); 239} 240 241static __inline vm_offset_t 242pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 243{ 244 struct local_sysmaps *sysm; 245 pt_entry_t *pte, npte; 246 vm_offset_t va1, va2; 247 uint32_t intr; 248 int cpu; 249 250 intr = intr_disable(); 251 cpu = PCPU_GET(cpuid); 252 sysm = &sysmap_lmem[cpu]; 253 sysm->saved_intr = intr; 254 va1 = sysm->base; 255 va2 = sysm->base + PAGE_SIZE; 256 npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 257 pte = pmap_pte(kernel_pmap, va1); 258 *pte = npte; 259 npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 260 pte = pmap_pte(kernel_pmap, va2); 261 *pte = npte; 262 sysm->valid1 = 1; 263 sysm->valid2 = 1; 264 return (va1); 265} 266 267static __inline void 268pmap_lmem_unmap(void) 269{ 270 struct local_sysmaps *sysm; 271 pt_entry_t *pte; 272 int cpu; 273 274 cpu = PCPU_GET(cpuid); 275 sysm = &sysmap_lmem[cpu]; 276 pte = pmap_pte(kernel_pmap, sysm->base); 277 *pte = PTE_G; 278 tlb_invalidate_address(kernel_pmap, sysm->base); 279 sysm->valid1 = 0; 280 if (sysm->valid2) { 281 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); 282 *pte = PTE_G; 283 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); 284 sysm->valid2 = 0; 285 } 286 intr_restore(sysm->saved_intr); 287} 288#else /* __mips_n64 */ 289 290static __inline void 291pmap_alloc_lmem_map(void) 292{ 293} 294 295static __inline vm_offset_t 296pmap_lmem_map1(vm_paddr_t phys) 297{ 298 299 return (0); 300} 301 302static __inline vm_offset_t 303pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 304{ 305 306 return (0); 307} 308 309static __inline vm_offset_t 310pmap_lmem_unmap(void) 311{ 312 313 return (0); 314} 315#endif /* !__mips_n64 */ 316 317/* 318 * Page table entry lookup routines. 319 */ 320static __inline pd_entry_t * 321pmap_segmap(pmap_t pmap, vm_offset_t va) 322{ 323 324 return (&pmap->pm_segtab[pmap_seg_index(va)]); 325} 326 327#ifdef __mips_n64 328static __inline pd_entry_t * 329pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 330{ 331 pd_entry_t *pde; 332 333 pde = (pd_entry_t *)*pdpe; 334 return (&pde[pmap_pde_index(va)]); 335} 336 337static __inline pd_entry_t * 338pmap_pde(pmap_t pmap, vm_offset_t va) 339{ 340 pd_entry_t *pdpe; 341 342 pdpe = pmap_segmap(pmap, va); 343 if (*pdpe == NULL) 344 return (NULL); 345 346 return (pmap_pdpe_to_pde(pdpe, va)); 347} 348#else 349static __inline pd_entry_t * 350pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 351{ 352 353 return (pdpe); 354} 355 356static __inline 357pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va) 358{ 359 360 return (pmap_segmap(pmap, va)); 361} 362#endif 363 364static __inline pt_entry_t * 365pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 366{ 367 pt_entry_t *pte; 368 369 pte = (pt_entry_t *)*pde; 370 return (&pte[pmap_pte_index(va)]); 371} 372 373pt_entry_t * 374pmap_pte(pmap_t pmap, vm_offset_t va) 375{ 376 pd_entry_t *pde; 377 378 pde = pmap_pde(pmap, va); 379 if (pde == NULL || *pde == NULL) 380 return (NULL); 381 382 return (pmap_pde_to_pte(pde, va)); 383} 384 385vm_offset_t 386pmap_steal_memory(vm_size_t size) 387{ 388 vm_paddr_t bank_size, pa; 389 vm_offset_t va; 390 391 size = round_page(size); 392 bank_size = phys_avail[1] - phys_avail[0]; 393 while (size > bank_size) { 394 int i; 395 396 for (i = 0; phys_avail[i + 2]; i += 2) { 397 phys_avail[i] = phys_avail[i + 2]; 398 phys_avail[i + 1] = phys_avail[i + 3]; 399 } 400 phys_avail[i] = 0; 401 phys_avail[i + 1] = 0; 402 if (!phys_avail[0]) 403 panic("pmap_steal_memory: out of memory"); 404 bank_size = phys_avail[1] - phys_avail[0]; 405 } 406 407 pa = phys_avail[0]; 408 phys_avail[0] += size; 409 if (MIPS_DIRECT_MAPPABLE(pa) == 0) 410 panic("Out of memory below 512Meg?"); 411 va = MIPS_PHYS_TO_DIRECT(pa); 412 bzero((caddr_t)va, size); 413 return (va); 414} 415 416/* 417 * Bootstrap the system enough to run with virtual memory. This 418 * assumes that the phys_avail array has been initialized. 419 */ 420static void 421pmap_create_kernel_pagetable(void) 422{ 423 int i, j; 424 vm_offset_t ptaddr; 425 pt_entry_t *pte; 426#ifdef __mips_n64 427 pd_entry_t *pde; 428 vm_offset_t pdaddr; 429 int npt, npde; 430#endif 431 432 /* 433 * Allocate segment table for the kernel 434 */ 435 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 436 437 /* 438 * Allocate second level page tables for the kernel 439 */ 440#ifdef __mips_n64 441 npde = howmany(NKPT, NPDEPG); 442 pdaddr = pmap_steal_memory(PAGE_SIZE * npde); 443#endif 444 nkpt = NKPT; 445 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); 446 447 /* 448 * The R[4-7]?00 stores only one copy of the Global bit in the 449 * translation lookaside buffer for each 2 page entry. Thus invalid 450 * entrys must have the Global bit set so when Entry LO and Entry HI 451 * G bits are anded together they will produce a global bit to store 452 * in the tlb. 453 */ 454 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) 455 *pte = PTE_G; 456 457#ifdef __mips_n64 458 for (i = 0, npt = nkpt; npt > 0; i++) { 459 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); 460 pde = (pd_entry_t *)kernel_segmap[i]; 461 462 for (j = 0; j < NPDEPG && npt > 0; j++, npt--) 463 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); 464 } 465#else 466 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++) 467 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE)); 468#endif 469 470 PMAP_LOCK_INIT(kernel_pmap); 471 kernel_pmap->pm_segtab = kernel_segmap; 472 CPU_FILL(&kernel_pmap->pm_active); 473 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 474 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 475 kernel_pmap->pm_asid[0].gen = 0; 476 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE; 477} 478 479void 480pmap_bootstrap(void) 481{ 482 int i; 483 int need_local_mappings = 0; 484 485 /* Sort. */ 486again: 487 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 488 /* 489 * Keep the memory aligned on page boundary. 490 */ 491 phys_avail[i] = round_page(phys_avail[i]); 492 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 493 494 if (i < 2) 495 continue; 496 if (phys_avail[i - 2] > phys_avail[i]) { 497 vm_paddr_t ptemp[2]; 498 499 ptemp[0] = phys_avail[i + 0]; 500 ptemp[1] = phys_avail[i + 1]; 501 502 phys_avail[i + 0] = phys_avail[i - 2]; 503 phys_avail[i + 1] = phys_avail[i - 1]; 504 505 phys_avail[i - 2] = ptemp[0]; 506 phys_avail[i - 1] = ptemp[1]; 507 goto again; 508 } 509 } 510 511 /* 512 * In 32 bit, we may have memory which cannot be mapped directly. 513 * This memory will need temporary mapping before it can be 514 * accessed. 515 */ 516 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1)) 517 need_local_mappings = 1; 518 519 /* 520 * Copy the phys_avail[] array before we start stealing memory from it. 521 */ 522 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 523 physmem_desc[i] = phys_avail[i]; 524 physmem_desc[i + 1] = phys_avail[i + 1]; 525 } 526 527 Maxmem = atop(phys_avail[i - 1]); 528 529 if (bootverbose) { 530 printf("Physical memory chunk(s):\n"); 531 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 532 vm_paddr_t size; 533 534 size = phys_avail[i + 1] - phys_avail[i]; 535 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 536 (uintmax_t) phys_avail[i], 537 (uintmax_t) phys_avail[i + 1] - 1, 538 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 539 } 540 printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem)); 541 } 542 /* 543 * Steal the message buffer from the beginning of memory. 544 */ 545 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize); 546 msgbufinit(msgbufp, msgbufsize); 547 548 /* 549 * Steal thread0 kstack. 550 */ 551 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 552 553 virtual_avail = VM_MIN_KERNEL_ADDRESS; 554 virtual_end = VM_MAX_KERNEL_ADDRESS; 555 556#ifdef SMP 557 /* 558 * Steal some virtual address space to map the pcpu area. 559 */ 560 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 561 pcpup = (struct pcpu *)virtual_avail; 562 virtual_avail += PAGE_SIZE * 2; 563 564 /* 565 * Initialize the wired TLB entry mapping the pcpu region for 566 * the BSP at 'pcpup'. Up until this point we were operating 567 * with the 'pcpup' for the BSP pointing to a virtual address 568 * in KSEG0 so there was no need for a TLB mapping. 569 */ 570 mips_pcpu_tlb_init(PCPU_ADDR(0)); 571 572 if (bootverbose) 573 printf("pcpu is available at virtual address %p.\n", pcpup); 574#endif 575 576 if (need_local_mappings) 577 pmap_alloc_lmem_map(); 578 pmap_create_kernel_pagetable(); 579 pmap_max_asid = VMNUM_PIDS; 580 mips_wr_entryhi(0); 581 mips_wr_pagemask(0); 582 583 /* 584 * Initialize the global pv list lock. 585 */ 586 rw_init(&pvh_global_lock, "pmap pv global"); 587} 588 589/* 590 * Initialize a vm_page's machine-dependent fields. 591 */ 592void 593pmap_page_init(vm_page_t m) 594{ 595 596 TAILQ_INIT(&m->md.pv_list); 597 m->md.pv_flags = 0; 598} 599 600/* 601 * Initialize the pmap module. 602 * Called by vm_init, to initialize any structures that the pmap 603 * system needs to map virtual memory. 604 */ 605void 606pmap_init(void) 607{ 608} 609 610/*************************************************** 611 * Low level helper routines..... 612 ***************************************************/ 613 614#ifdef SMP 615static __inline void 616pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 617{ 618 int cpuid, cpu, self; 619 cpuset_t active_cpus; 620 621 sched_pin(); 622 if (is_kernel_pmap(pmap)) { 623 smp_rendezvous(NULL, fn, NULL, arg); 624 goto out; 625 } 626 /* Force ASID update on inactive CPUs */ 627 CPU_FOREACH(cpu) { 628 if (!CPU_ISSET(cpu, &pmap->pm_active)) 629 pmap->pm_asid[cpu].gen = 0; 630 } 631 cpuid = PCPU_GET(cpuid); 632 /* 633 * XXX: barrier/locking for active? 634 * 635 * Take a snapshot of active here, any further changes are ignored. 636 * tlb update/invalidate should be harmless on inactive CPUs 637 */ 638 active_cpus = pmap->pm_active; 639 self = CPU_ISSET(cpuid, &active_cpus); 640 CPU_CLR(cpuid, &active_cpus); 641 /* Optimize for the case where this cpu is the only active one */ 642 if (CPU_EMPTY(&active_cpus)) { 643 if (self) 644 fn(arg); 645 } else { 646 if (self) 647 CPU_SET(cpuid, &active_cpus); 648 smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg); 649 } 650out: 651 sched_unpin(); 652} 653#else /* !SMP */ 654static __inline void 655pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 656{ 657 int cpuid; 658 659 if (is_kernel_pmap(pmap)) { 660 fn(arg); 661 return; 662 } 663 cpuid = PCPU_GET(cpuid); 664 if (!CPU_ISSET(cpuid, &pmap->pm_active)) 665 pmap->pm_asid[cpuid].gen = 0; 666 else 667 fn(arg); 668} 669#endif /* SMP */ 670 671static void 672pmap_invalidate_all(pmap_t pmap) 673{ 674 675 pmap_call_on_active_cpus(pmap, 676 (void (*)(void *))tlb_invalidate_all_user, pmap); 677} 678 679struct pmap_invalidate_page_arg { 680 pmap_t pmap; 681 vm_offset_t va; 682}; 683 684static void 685pmap_invalidate_page_action(void *arg) 686{ 687 struct pmap_invalidate_page_arg *p = arg; 688 689 tlb_invalidate_address(p->pmap, p->va); 690} 691 692static void 693pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 694{ 695 struct pmap_invalidate_page_arg arg; 696 697 arg.pmap = pmap; 698 arg.va = va; 699 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg); 700} 701 702struct pmap_invalidate_range_arg { 703 pmap_t pmap; 704 vm_offset_t sva; 705 vm_offset_t eva; 706}; 707 708static void 709pmap_invalidate_range_action(void *arg) 710{ 711 struct pmap_invalidate_range_arg *p = arg; 712 713 tlb_invalidate_range(p->pmap, p->sva, p->eva); 714} 715 716static void 717pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 718{ 719 struct pmap_invalidate_range_arg arg; 720 721 arg.pmap = pmap; 722 arg.sva = sva; 723 arg.eva = eva; 724 pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg); 725} 726 727struct pmap_update_page_arg { 728 pmap_t pmap; 729 vm_offset_t va; 730 pt_entry_t pte; 731}; 732 733static void 734pmap_update_page_action(void *arg) 735{ 736 struct pmap_update_page_arg *p = arg; 737 738 tlb_update(p->pmap, p->va, p->pte); 739} 740 741static void 742pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 743{ 744 struct pmap_update_page_arg arg; 745 746 arg.pmap = pmap; 747 arg.va = va; 748 arg.pte = pte; 749 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg); 750} 751 752/* 753 * Routine: pmap_extract 754 * Function: 755 * Extract the physical page address associated 756 * with the given map/virtual_address pair. 757 */ 758vm_paddr_t 759pmap_extract(pmap_t pmap, vm_offset_t va) 760{ 761 pt_entry_t *pte; 762 vm_offset_t retval = 0; 763 764 PMAP_LOCK(pmap); 765 pte = pmap_pte(pmap, va); 766 if (pte) { 767 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); 768 } 769 PMAP_UNLOCK(pmap); 770 return (retval); 771} 772 773/* 774 * Routine: pmap_extract_and_hold 775 * Function: 776 * Atomically extract and hold the physical page 777 * with the given pmap and virtual address pair 778 * if that mapping permits the given protection. 779 */ 780vm_page_t 781pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 782{ 783 pt_entry_t pte, *ptep; 784 vm_paddr_t pa, pte_pa; 785 vm_page_t m; 786 787 m = NULL; 788 pa = 0; 789 PMAP_LOCK(pmap); 790retry: 791 ptep = pmap_pte(pmap, va); 792 if (ptep != NULL) { 793 pte = *ptep; 794 if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || 795 (prot & VM_PROT_WRITE) == 0)) { 796 pte_pa = TLBLO_PTE_TO_PA(pte); 797 if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) 798 goto retry; 799 m = PHYS_TO_VM_PAGE(pte_pa); 800 vm_page_hold(m); 801 } 802 } 803 PA_UNLOCK_COND(pa); 804 PMAP_UNLOCK(pmap); 805 return (m); 806} 807 808/*************************************************** 809 * Low level mapping routines..... 810 ***************************************************/ 811 812/* 813 * add a wired page to the kva 814 */ 815void 816pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr) 817{ 818 pt_entry_t *pte; 819 pt_entry_t opte, npte; 820 821#ifdef PMAP_DEBUG 822 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 823#endif 824 825 pte = pmap_pte(kernel_pmap, va); 826 opte = *pte; 827 npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G; 828 *pte = npte; 829 if (pte_test(&opte, PTE_V) && opte != npte) 830 pmap_update_page(kernel_pmap, va, npte); 831} 832 833void 834pmap_kenter(vm_offset_t va, vm_paddr_t pa) 835{ 836 837 KASSERT(is_cacheable_mem(pa), 838 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa)); 839 840 pmap_kenter_attr(va, pa, PTE_C_CACHE); 841} 842 843/* 844 * remove a page from the kernel pagetables 845 */ 846 /* PMAP_INLINE */ void 847pmap_kremove(vm_offset_t va) 848{ 849 pt_entry_t *pte; 850 851 /* 852 * Write back all caches from the page being destroyed 853 */ 854 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 855 856 pte = pmap_pte(kernel_pmap, va); 857 *pte = PTE_G; 858 pmap_invalidate_page(kernel_pmap, va); 859} 860 861/* 862 * Used to map a range of physical addresses into kernel 863 * virtual address space. 864 * 865 * The value passed in '*virt' is a suggested virtual address for 866 * the mapping. Architectures which can support a direct-mapped 867 * physical to virtual region can return the appropriate address 868 * within that region, leaving '*virt' unchanged. Other 869 * architectures should map the pages starting at '*virt' and 870 * update '*virt' with the first usable address after the mapped 871 * region. 872 * 873 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 874 */ 875vm_offset_t 876pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 877{ 878 vm_offset_t va, sva; 879 880 if (MIPS_DIRECT_MAPPABLE(end - 1)) 881 return (MIPS_PHYS_TO_DIRECT(start)); 882 883 va = sva = *virt; 884 while (start < end) { 885 pmap_kenter(va, start); 886 va += PAGE_SIZE; 887 start += PAGE_SIZE; 888 } 889 *virt = va; 890 return (sva); 891} 892 893/* 894 * Add a list of wired pages to the kva 895 * this routine is only used for temporary 896 * kernel mappings that do not need to have 897 * page modification or references recorded. 898 * Note that old mappings are simply written 899 * over. The page *must* be wired. 900 */ 901void 902pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 903{ 904 int i; 905 vm_offset_t origva = va; 906 907 for (i = 0; i < count; i++) { 908 pmap_flush_pvcache(m[i]); 909 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 910 va += PAGE_SIZE; 911 } 912 913 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 914} 915 916/* 917 * this routine jerks page mappings from the 918 * kernel -- it is meant only for temporary mappings. 919 */ 920void 921pmap_qremove(vm_offset_t va, int count) 922{ 923 pt_entry_t *pte; 924 vm_offset_t origva; 925 926 if (count < 1) 927 return; 928 mips_dcache_wbinv_range_index(va, PAGE_SIZE * count); 929 origva = va; 930 do { 931 pte = pmap_pte(kernel_pmap, va); 932 *pte = PTE_G; 933 va += PAGE_SIZE; 934 } while (--count > 0); 935 pmap_invalidate_range(kernel_pmap, origva, va); 936} 937 938/*************************************************** 939 * Page table page management routines..... 940 ***************************************************/ 941 942/* 943 * Decrements a page table page's wire count, which is used to record the 944 * number of valid page table entries within the page. If the wire count 945 * drops to zero, then the page table page is unmapped. Returns TRUE if the 946 * page table page was unmapped and FALSE otherwise. 947 */ 948static PMAP_INLINE boolean_t 949pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 950{ 951 952 --m->wire_count; 953 if (m->wire_count == 0) { 954 _pmap_unwire_ptp(pmap, va, m); 955 return (TRUE); 956 } else 957 return (FALSE); 958} 959 960static void 961_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 962{ 963 pd_entry_t *pde; 964 965 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 966 /* 967 * unmap the page table page 968 */ 969#ifdef __mips_n64 970 if (m->pindex < NUPDE) 971 pde = pmap_pde(pmap, va); 972 else 973 pde = pmap_segmap(pmap, va); 974#else 975 pde = pmap_pde(pmap, va); 976#endif 977 *pde = 0; 978 pmap->pm_stats.resident_count--; 979 980#ifdef __mips_n64 981 if (m->pindex < NUPDE) { 982 pd_entry_t *pdp; 983 vm_page_t pdpg; 984 985 /* 986 * Recursively decrement next level pagetable refcount 987 */ 988 pdp = (pd_entry_t *)*pmap_segmap(pmap, va); 989 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); 990 pmap_unwire_ptp(pmap, va, pdpg); 991 } 992#endif 993 994 /* 995 * If the page is finally unwired, simply free it. 996 */ 997 vm_page_free_zero(m); 998 atomic_subtract_int(&cnt.v_wire_count, 1); 999} 1000 1001/* 1002 * After removing a page table entry, this routine is used to 1003 * conditionally free the page, and manage the hold/wire counts. 1004 */ 1005static int 1006pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde) 1007{ 1008 vm_page_t mpte; 1009 1010 if (va >= VM_MAXUSER_ADDRESS) 1011 return (0); 1012 KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0")); 1013 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde)); 1014 return (pmap_unwire_ptp(pmap, va, mpte)); 1015} 1016 1017void 1018pmap_pinit0(pmap_t pmap) 1019{ 1020 int i; 1021 1022 PMAP_LOCK_INIT(pmap); 1023 pmap->pm_segtab = kernel_segmap; 1024 CPU_ZERO(&pmap->pm_active); 1025 for (i = 0; i < MAXCPU; i++) { 1026 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1027 pmap->pm_asid[i].gen = 0; 1028 } 1029 PCPU_SET(curpmap, pmap); 1030 TAILQ_INIT(&pmap->pm_pvchunk); 1031 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1032} 1033 1034void 1035pmap_grow_direct_page_cache() 1036{ 1037 1038#ifdef __mips_n64 1039 vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS); 1040#else 1041 vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); 1042#endif 1043} 1044 1045static vm_page_t 1046pmap_alloc_direct_page(unsigned int index, int req) 1047{ 1048 vm_page_t m; 1049 1050 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | 1051 VM_ALLOC_ZERO); 1052 if (m == NULL) 1053 return (NULL); 1054 1055 if ((m->flags & PG_ZERO) == 0) 1056 pmap_zero_page(m); 1057 1058 m->pindex = index; 1059 return (m); 1060} 1061 1062/* 1063 * Initialize a preallocated and zeroed pmap structure, 1064 * such as one in a vmspace structure. 1065 */ 1066int 1067pmap_pinit(pmap_t pmap) 1068{ 1069 vm_offset_t ptdva; 1070 vm_page_t ptdpg; 1071 int i; 1072 1073 PMAP_LOCK_INIT(pmap); 1074 1075 /* 1076 * allocate the page directory page 1077 */ 1078 while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) 1079 pmap_grow_direct_page_cache(); 1080 1081 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); 1082 pmap->pm_segtab = (pd_entry_t *)ptdva; 1083 CPU_ZERO(&pmap->pm_active); 1084 for (i = 0; i < MAXCPU; i++) { 1085 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1086 pmap->pm_asid[i].gen = 0; 1087 } 1088 TAILQ_INIT(&pmap->pm_pvchunk); 1089 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1090 1091 return (1); 1092} 1093 1094/* 1095 * this routine is called if the page table page is not 1096 * mapped correctly. 1097 */ 1098static vm_page_t 1099_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1100{ 1101 vm_offset_t pageva; 1102 vm_page_t m; 1103 1104 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1105 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1106 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1107 1108 /* 1109 * Find or fabricate a new pagetable page 1110 */ 1111 if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { 1112 if (flags & M_WAITOK) { 1113 PMAP_UNLOCK(pmap); 1114 rw_wunlock(&pvh_global_lock); 1115 pmap_grow_direct_page_cache(); 1116 rw_wlock(&pvh_global_lock); 1117 PMAP_LOCK(pmap); 1118 } 1119 1120 /* 1121 * Indicate the need to retry. While waiting, the page 1122 * table page may have been allocated. 1123 */ 1124 return (NULL); 1125 } 1126 1127 /* 1128 * Map the pagetable page into the process address space, if it 1129 * isn't already there. 1130 */ 1131 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1132 1133#ifdef __mips_n64 1134 if (ptepindex >= NUPDE) { 1135 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; 1136 } else { 1137 pd_entry_t *pdep, *pde; 1138 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); 1139 int pdeindex = ptepindex & (NPDEPG - 1); 1140 vm_page_t pg; 1141 1142 pdep = &pmap->pm_segtab[segindex]; 1143 if (*pdep == NULL) { 1144 /* recurse for allocating page dir */ 1145 if (_pmap_allocpte(pmap, NUPDE + segindex, 1146 flags) == NULL) { 1147 /* alloc failed, release current */ 1148 --m->wire_count; 1149 atomic_subtract_int(&cnt.v_wire_count, 1); 1150 vm_page_free_zero(m); 1151 return (NULL); 1152 } 1153 } else { 1154 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep)); 1155 pg->wire_count++; 1156 } 1157 /* Next level entry */ 1158 pde = (pd_entry_t *)*pdep; 1159 pde[pdeindex] = (pd_entry_t)pageva; 1160 } 1161#else 1162 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva; 1163#endif 1164 pmap->pm_stats.resident_count++; 1165 return (m); 1166} 1167 1168static vm_page_t 1169pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1170{ 1171 unsigned ptepindex; 1172 pd_entry_t *pde; 1173 vm_page_t m; 1174 1175 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1176 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1177 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1178 1179 /* 1180 * Calculate pagetable page index 1181 */ 1182 ptepindex = pmap_pde_pindex(va); 1183retry: 1184 /* 1185 * Get the page directory entry 1186 */ 1187 pde = pmap_pde(pmap, va); 1188 1189 /* 1190 * If the page table page is mapped, we just increment the hold 1191 * count, and activate it. 1192 */ 1193 if (pde != NULL && *pde != NULL) { 1194 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde)); 1195 m->wire_count++; 1196 } else { 1197 /* 1198 * Here if the pte page isn't mapped, or if it has been 1199 * deallocated. 1200 */ 1201 m = _pmap_allocpte(pmap, ptepindex, flags); 1202 if (m == NULL && (flags & M_WAITOK)) 1203 goto retry; 1204 } 1205 return (m); 1206} 1207 1208 1209/*************************************************** 1210 * Pmap allocation/deallocation routines. 1211 ***************************************************/ 1212 1213/* 1214 * Release any resources held by the given physical map. 1215 * Called when a pmap initialized by pmap_pinit is being released. 1216 * Should only be called if the map contains no valid mappings. 1217 */ 1218void 1219pmap_release(pmap_t pmap) 1220{ 1221 vm_offset_t ptdva; 1222 vm_page_t ptdpg; 1223 1224 KASSERT(pmap->pm_stats.resident_count == 0, 1225 ("pmap_release: pmap resident count %ld != 0", 1226 pmap->pm_stats.resident_count)); 1227 1228 ptdva = (vm_offset_t)pmap->pm_segtab; 1229 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); 1230 1231 ptdpg->wire_count--; 1232 atomic_subtract_int(&cnt.v_wire_count, 1); 1233 vm_page_free_zero(ptdpg); 1234 PMAP_LOCK_DESTROY(pmap); 1235} 1236 1237/* 1238 * grow the number of kernel page table entries, if needed 1239 */ 1240void 1241pmap_growkernel(vm_offset_t addr) 1242{ 1243 vm_page_t nkpg; 1244 pd_entry_t *pde, *pdpe; 1245 pt_entry_t *pte; 1246 int i; 1247 1248 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1249 addr = roundup2(addr, NBSEG); 1250 if (addr - 1 >= kernel_map->max_offset) 1251 addr = kernel_map->max_offset; 1252 while (kernel_vm_end < addr) { 1253 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); 1254#ifdef __mips_n64 1255 if (*pdpe == 0) { 1256 /* new intermediate page table entry */ 1257 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1258 if (nkpg == NULL) 1259 panic("pmap_growkernel: no memory to grow kernel"); 1260 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1261 continue; /* try again */ 1262 } 1263#endif 1264 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); 1265 if (*pde != 0) { 1266 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1267 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1268 kernel_vm_end = kernel_map->max_offset; 1269 break; 1270 } 1271 continue; 1272 } 1273 1274 /* 1275 * This index is bogus, but out of the way 1276 */ 1277 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1278 if (!nkpg) 1279 panic("pmap_growkernel: no memory to grow kernel"); 1280 nkpt++; 1281 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1282 1283 /* 1284 * The R[4-7]?00 stores only one copy of the Global bit in 1285 * the translation lookaside buffer for each 2 page entry. 1286 * Thus invalid entrys must have the Global bit set so when 1287 * Entry LO and Entry HI G bits are anded together they will 1288 * produce a global bit to store in the tlb. 1289 */ 1290 pte = (pt_entry_t *)*pde; 1291 for (i = 0; i < NPTEPG; i++) 1292 pte[i] = PTE_G; 1293 1294 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1295 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1296 kernel_vm_end = kernel_map->max_offset; 1297 break; 1298 } 1299 } 1300} 1301 1302/*************************************************** 1303 * page management routines. 1304 ***************************************************/ 1305 1306CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1307#ifdef __mips_n64 1308CTASSERT(_NPCM == 3); 1309CTASSERT(_NPCPV == 168); 1310#else 1311CTASSERT(_NPCM == 11); 1312CTASSERT(_NPCPV == 336); 1313#endif 1314 1315static __inline struct pv_chunk * 1316pv_to_chunk(pv_entry_t pv) 1317{ 1318 1319 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1320} 1321 1322#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1323 1324#ifdef __mips_n64 1325#define PC_FREE0_1 0xfffffffffffffffful 1326#define PC_FREE2 0x000000fffffffffful 1327#else 1328#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1329#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1330#endif 1331 1332static const u_long pc_freemask[_NPCM] = { 1333#ifdef __mips_n64 1334 PC_FREE0_1, PC_FREE0_1, PC_FREE2 1335#else 1336 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1337 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1338 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1339 PC_FREE0_9, PC_FREE10 1340#endif 1341}; 1342 1343static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 1344 1345SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1346 "Current number of pv entries"); 1347 1348#ifdef PV_STATS 1349static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1350 1351SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1352 "Current number of pv entry chunks"); 1353SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1354 "Current number of pv entry chunks allocated"); 1355SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1356 "Current number of pv entry chunks frees"); 1357SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1358 "Number of times tried to get a chunk page but failed."); 1359 1360static long pv_entry_frees, pv_entry_allocs; 1361static int pv_entry_spare; 1362 1363SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1364 "Current number of pv entry frees"); 1365SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1366 "Current number of pv entry allocs"); 1367SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1368 "Current number of spare pv entries"); 1369#endif 1370 1371/* 1372 * We are in a serious low memory condition. Resort to 1373 * drastic measures to free some pages so we can allocate 1374 * another pv entry chunk. 1375 */ 1376static vm_page_t 1377pmap_pv_reclaim(pmap_t locked_pmap) 1378{ 1379 struct pch newtail; 1380 struct pv_chunk *pc; 1381 pd_entry_t *pde; 1382 pmap_t pmap; 1383 pt_entry_t *pte, oldpte; 1384 pv_entry_t pv; 1385 vm_offset_t va; 1386 vm_page_t m, m_pc; 1387 u_long inuse; 1388 int bit, field, freed, idx; 1389 1390 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1391 pmap = NULL; 1392 m_pc = NULL; 1393 TAILQ_INIT(&newtail); 1394 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) { 1395 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1396 if (pmap != pc->pc_pmap) { 1397 if (pmap != NULL) { 1398 pmap_invalidate_all(pmap); 1399 if (pmap != locked_pmap) 1400 PMAP_UNLOCK(pmap); 1401 } 1402 pmap = pc->pc_pmap; 1403 /* Avoid deadlock and lock recursion. */ 1404 if (pmap > locked_pmap) 1405 PMAP_LOCK(pmap); 1406 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 1407 pmap = NULL; 1408 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1409 continue; 1410 } 1411 } 1412 1413 /* 1414 * Destroy every non-wired, 4 KB page mapping in the chunk. 1415 */ 1416 freed = 0; 1417 for (field = 0; field < _NPCM; field++) { 1418 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1419 inuse != 0; inuse &= ~(1UL << bit)) { 1420 bit = ffsl(inuse) - 1; 1421 idx = field * sizeof(inuse) * NBBY + bit; 1422 pv = &pc->pc_pventry[idx]; 1423 va = pv->pv_va; 1424 pde = pmap_pde(pmap, va); 1425 KASSERT(pde != NULL && *pde != 0, 1426 ("pmap_pv_reclaim: pde")); 1427 pte = pmap_pde_to_pte(pde, va); 1428 oldpte = *pte; 1429 if (pte_test(&oldpte, PTE_W)) 1430 continue; 1431 if (is_kernel_pmap(pmap)) 1432 *pte = PTE_G; 1433 else 1434 *pte = 0; 1435 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte)); 1436 if (pte_test(&oldpte, PTE_D)) 1437 vm_page_dirty(m); 1438 if (m->md.pv_flags & PV_TABLE_REF) 1439 vm_page_aflag_set(m, PGA_REFERENCED); 1440 m->md.pv_flags &= ~PV_TABLE_REF; 1441 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1442 if (TAILQ_EMPTY(&m->md.pv_list)) 1443 vm_page_aflag_clear(m, PGA_WRITEABLE); 1444 pc->pc_map[field] |= 1UL << bit; 1445 pmap_unuse_pt(pmap, va, *pde); 1446 freed++; 1447 } 1448 } 1449 if (freed == 0) { 1450 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1451 continue; 1452 } 1453 /* Every freed mapping is for a 4 KB page. */ 1454 pmap->pm_stats.resident_count -= freed; 1455 PV_STAT(pv_entry_frees += freed); 1456 PV_STAT(pv_entry_spare += freed); 1457 pv_entry_count -= freed; 1458 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1459 for (field = 0; field < _NPCM; field++) 1460 if (pc->pc_map[field] != pc_freemask[field]) { 1461 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1462 pc_list); 1463 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1464 1465 /* 1466 * One freed pv entry in locked_pmap is 1467 * sufficient. 1468 */ 1469 if (pmap == locked_pmap) 1470 goto out; 1471 break; 1472 } 1473 if (field == _NPCM) { 1474 PV_STAT(pv_entry_spare -= _NPCPV); 1475 PV_STAT(pc_chunk_count--); 1476 PV_STAT(pc_chunk_frees++); 1477 /* Entire chunk is free; return it. */ 1478 m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS( 1479 (vm_offset_t)pc)); 1480 break; 1481 } 1482 } 1483out: 1484 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 1485 if (pmap != NULL) { 1486 pmap_invalidate_all(pmap); 1487 if (pmap != locked_pmap) 1488 PMAP_UNLOCK(pmap); 1489 } 1490 return (m_pc); 1491} 1492 1493/* 1494 * free the pv_entry back to the free list 1495 */ 1496static void 1497free_pv_entry(pmap_t pmap, pv_entry_t pv) 1498{ 1499 struct pv_chunk *pc; 1500 int bit, field, idx; 1501 1502 rw_assert(&pvh_global_lock, RA_WLOCKED); 1503 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1504 PV_STAT(pv_entry_frees++); 1505 PV_STAT(pv_entry_spare++); 1506 pv_entry_count--; 1507 pc = pv_to_chunk(pv); 1508 idx = pv - &pc->pc_pventry[0]; 1509 field = idx / (sizeof(u_long) * NBBY); 1510 bit = idx % (sizeof(u_long) * NBBY); 1511 pc->pc_map[field] |= 1ul << bit; 1512 for (idx = 0; idx < _NPCM; idx++) 1513 if (pc->pc_map[idx] != pc_freemask[idx]) { 1514 /* 1515 * 98% of the time, pc is already at the head of the 1516 * list. If it isn't already, move it to the head. 1517 */ 1518 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 1519 pc)) { 1520 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1521 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1522 pc_list); 1523 } 1524 return; 1525 } 1526 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1527 free_pv_chunk(pc); 1528} 1529 1530static void 1531free_pv_chunk(struct pv_chunk *pc) 1532{ 1533 vm_page_t m; 1534 1535 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1536 PV_STAT(pv_entry_spare -= _NPCPV); 1537 PV_STAT(pc_chunk_count--); 1538 PV_STAT(pc_chunk_frees++); 1539 /* entire chunk is free, return it */ 1540 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc)); 1541 vm_page_unwire(m, 0); 1542 vm_page_free(m); 1543} 1544 1545/* 1546 * get a new pv_entry, allocating a block from the system 1547 * when needed. 1548 */ 1549static pv_entry_t 1550get_pv_entry(pmap_t pmap, boolean_t try) 1551{ 1552 struct pv_chunk *pc; 1553 pv_entry_t pv; 1554 vm_page_t m; 1555 int bit, field, idx; 1556 1557 rw_assert(&pvh_global_lock, RA_WLOCKED); 1558 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1559 PV_STAT(pv_entry_allocs++); 1560 pv_entry_count++; 1561retry: 1562 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1563 if (pc != NULL) { 1564 for (field = 0; field < _NPCM; field++) { 1565 if (pc->pc_map[field]) { 1566 bit = ffsl(pc->pc_map[field]) - 1; 1567 break; 1568 } 1569 } 1570 if (field < _NPCM) { 1571 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 1572 pv = &pc->pc_pventry[idx]; 1573 pc->pc_map[field] &= ~(1ul << bit); 1574 /* If this was the last item, move it to tail */ 1575 for (field = 0; field < _NPCM; field++) 1576 if (pc->pc_map[field] != 0) { 1577 PV_STAT(pv_entry_spare--); 1578 return (pv); /* not full, return */ 1579 } 1580 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1581 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 1582 PV_STAT(pv_entry_spare--); 1583 return (pv); 1584 } 1585 } 1586 /* No free items, allocate another chunk */ 1587 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL | 1588 VM_ALLOC_WIRED); 1589 if (m == NULL) { 1590 if (try) { 1591 pv_entry_count--; 1592 PV_STAT(pc_chunk_tryfail++); 1593 return (NULL); 1594 } 1595 m = pmap_pv_reclaim(pmap); 1596 if (m == NULL) 1597 goto retry; 1598 } 1599 PV_STAT(pc_chunk_count++); 1600 PV_STAT(pc_chunk_allocs++); 1601 pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1602 pc->pc_pmap = pmap; 1603 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 1604 for (field = 1; field < _NPCM; field++) 1605 pc->pc_map[field] = pc_freemask[field]; 1606 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1607 pv = &pc->pc_pventry[0]; 1608 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1609 PV_STAT(pv_entry_spare += _NPCPV - 1); 1610 return (pv); 1611} 1612 1613static pv_entry_t 1614pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1615{ 1616 pv_entry_t pv; 1617 1618 rw_assert(&pvh_global_lock, RA_WLOCKED); 1619 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 1620 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1621 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 1622 break; 1623 } 1624 } 1625 return (pv); 1626} 1627 1628static void 1629pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1630{ 1631 pv_entry_t pv; 1632 1633 pv = pmap_pvh_remove(pvh, pmap, va); 1634 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", 1635 (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)), 1636 (u_long)va)); 1637 free_pv_entry(pmap, pv); 1638} 1639 1640static void 1641pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1642{ 1643 1644 rw_assert(&pvh_global_lock, RA_WLOCKED); 1645 pmap_pvh_free(&m->md, pmap, va); 1646 if (TAILQ_EMPTY(&m->md.pv_list)) 1647 vm_page_aflag_clear(m, PGA_WRITEABLE); 1648} 1649 1650/* 1651 * Conditionally create a pv entry. 1652 */ 1653static boolean_t 1654pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1655 vm_page_t m) 1656{ 1657 pv_entry_t pv; 1658 1659 rw_assert(&pvh_global_lock, RA_WLOCKED); 1660 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1661 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { 1662 pv->pv_va = va; 1663 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1664 return (TRUE); 1665 } else 1666 return (FALSE); 1667} 1668 1669/* 1670 * pmap_remove_pte: do the things to unmap a page in a process 1671 */ 1672static int 1673pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 1674 pd_entry_t pde) 1675{ 1676 pt_entry_t oldpte; 1677 vm_page_t m; 1678 vm_paddr_t pa; 1679 1680 rw_assert(&pvh_global_lock, RA_WLOCKED); 1681 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1682 1683 /* 1684 * Write back all cache lines from the page being unmapped. 1685 */ 1686 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1687 1688 oldpte = *ptq; 1689 if (is_kernel_pmap(pmap)) 1690 *ptq = PTE_G; 1691 else 1692 *ptq = 0; 1693 1694 if (pte_test(&oldpte, PTE_W)) 1695 pmap->pm_stats.wired_count -= 1; 1696 1697 pmap->pm_stats.resident_count -= 1; 1698 1699 if (pte_test(&oldpte, PTE_MANAGED)) { 1700 pa = TLBLO_PTE_TO_PA(oldpte); 1701 m = PHYS_TO_VM_PAGE(pa); 1702 if (pte_test(&oldpte, PTE_D)) { 1703 KASSERT(!pte_test(&oldpte, PTE_RO), 1704 ("%s: modified page not writable: va: %p, pte: %#jx", 1705 __func__, (void *)va, (uintmax_t)oldpte)); 1706 vm_page_dirty(m); 1707 } 1708 if (m->md.pv_flags & PV_TABLE_REF) 1709 vm_page_aflag_set(m, PGA_REFERENCED); 1710 m->md.pv_flags &= ~PV_TABLE_REF; 1711 1712 pmap_remove_entry(pmap, m, va); 1713 } 1714 return (pmap_unuse_pt(pmap, va, pde)); 1715} 1716 1717/* 1718 * Remove a single page from a process address space 1719 */ 1720static void 1721pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1722{ 1723 pd_entry_t *pde; 1724 pt_entry_t *ptq; 1725 1726 rw_assert(&pvh_global_lock, RA_WLOCKED); 1727 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1728 pde = pmap_pde(pmap, va); 1729 if (pde == NULL || *pde == 0) 1730 return; 1731 ptq = pmap_pde_to_pte(pde, va); 1732 1733 /* 1734 * If there is no pte for this address, just skip it! 1735 */ 1736 if (!pte_test(ptq, PTE_V)) 1737 return; 1738 1739 (void)pmap_remove_pte(pmap, ptq, va, *pde); 1740 pmap_invalidate_page(pmap, va); 1741} 1742 1743/* 1744 * Remove the given range of addresses from the specified map. 1745 * 1746 * It is assumed that the start and end are properly 1747 * rounded to the page size. 1748 */ 1749void 1750pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1751{ 1752 pd_entry_t *pde, *pdpe; 1753 pt_entry_t *pte; 1754 vm_offset_t va, va_next; 1755 1756 /* 1757 * Perform an unsynchronized read. This is, however, safe. 1758 */ 1759 if (pmap->pm_stats.resident_count == 0) 1760 return; 1761 1762 rw_wlock(&pvh_global_lock); 1763 PMAP_LOCK(pmap); 1764 1765 /* 1766 * special handling of removing one page. a very common operation 1767 * and easy to short circuit some code. 1768 */ 1769 if ((sva + PAGE_SIZE) == eva) { 1770 pmap_remove_page(pmap, sva); 1771 goto out; 1772 } 1773 for (; sva < eva; sva = va_next) { 1774 pdpe = pmap_segmap(pmap, sva); 1775#ifdef __mips_n64 1776 if (*pdpe == 0) { 1777 va_next = (sva + NBSEG) & ~SEGMASK; 1778 if (va_next < sva) 1779 va_next = eva; 1780 continue; 1781 } 1782#endif 1783 va_next = (sva + NBPDR) & ~PDRMASK; 1784 if (va_next < sva) 1785 va_next = eva; 1786 1787 pde = pmap_pdpe_to_pde(pdpe, sva); 1788 if (*pde == NULL) 1789 continue; 1790 1791 /* 1792 * Limit our scan to either the end of the va represented 1793 * by the current page table page, or to the end of the 1794 * range being removed. 1795 */ 1796 if (va_next > eva) 1797 va_next = eva; 1798 1799 va = va_next; 1800 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1801 sva += PAGE_SIZE) { 1802 if (!pte_test(pte, PTE_V)) { 1803 if (va != va_next) { 1804 pmap_invalidate_range(pmap, va, sva); 1805 va = va_next; 1806 } 1807 continue; 1808 } 1809 if (va == va_next) 1810 va = sva; 1811 if (pmap_remove_pte(pmap, pte, sva, *pde)) { 1812 sva += PAGE_SIZE; 1813 break; 1814 } 1815 } 1816 if (va != va_next) 1817 pmap_invalidate_range(pmap, va, sva); 1818 } 1819out: 1820 rw_wunlock(&pvh_global_lock); 1821 PMAP_UNLOCK(pmap); 1822} 1823 1824/* 1825 * Routine: pmap_remove_all 1826 * Function: 1827 * Removes this physical page from 1828 * all physical maps in which it resides. 1829 * Reflects back modify bits to the pager. 1830 * 1831 * Notes: 1832 * Original versions of this routine were very 1833 * inefficient because they iteratively called 1834 * pmap_remove (slow...) 1835 */ 1836 1837void 1838pmap_remove_all(vm_page_t m) 1839{ 1840 pv_entry_t pv; 1841 pmap_t pmap; 1842 pd_entry_t *pde; 1843 pt_entry_t *pte, tpte; 1844 1845 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1846 ("pmap_remove_all: page %p is not managed", m)); 1847 rw_wlock(&pvh_global_lock); 1848 1849 if (m->md.pv_flags & PV_TABLE_REF) 1850 vm_page_aflag_set(m, PGA_REFERENCED); 1851 1852 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1853 pmap = PV_PMAP(pv); 1854 PMAP_LOCK(pmap); 1855 1856 /* 1857 * If it's last mapping writeback all caches from 1858 * the page being destroyed 1859 */ 1860 if (TAILQ_NEXT(pv, pv_list) == NULL) 1861 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1862 1863 pmap->pm_stats.resident_count--; 1864 1865 pde = pmap_pde(pmap, pv->pv_va); 1866 KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde")); 1867 pte = pmap_pde_to_pte(pde, pv->pv_va); 1868 1869 tpte = *pte; 1870 if (is_kernel_pmap(pmap)) 1871 *pte = PTE_G; 1872 else 1873 *pte = 0; 1874 1875 if (pte_test(&tpte, PTE_W)) 1876 pmap->pm_stats.wired_count--; 1877 1878 /* 1879 * Update the vm_page_t clean and reference bits. 1880 */ 1881 if (pte_test(&tpte, PTE_D)) { 1882 KASSERT(!pte_test(&tpte, PTE_RO), 1883 ("%s: modified page not writable: va: %p, pte: %#jx", 1884 __func__, (void *)pv->pv_va, (uintmax_t)tpte)); 1885 vm_page_dirty(m); 1886 } 1887 pmap_invalidate_page(pmap, pv->pv_va); 1888 1889 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1890 pmap_unuse_pt(pmap, pv->pv_va, *pde); 1891 free_pv_entry(pmap, pv); 1892 PMAP_UNLOCK(pmap); 1893 } 1894 1895 vm_page_aflag_clear(m, PGA_WRITEABLE); 1896 m->md.pv_flags &= ~PV_TABLE_REF; 1897 rw_wunlock(&pvh_global_lock); 1898} 1899 1900/* 1901 * Set the physical protection on the 1902 * specified range of this map as requested. 1903 */ 1904void 1905pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1906{ 1907 pt_entry_t pbits, *pte; 1908 pd_entry_t *pde, *pdpe; 1909 vm_offset_t va, va_next; 1910 vm_paddr_t pa; 1911 vm_page_t m; 1912 1913 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1914 pmap_remove(pmap, sva, eva); 1915 return; 1916 } 1917 if (prot & VM_PROT_WRITE) 1918 return; 1919 1920 rw_wlock(&pvh_global_lock); 1921 PMAP_LOCK(pmap); 1922 for (; sva < eva; sva = va_next) { 1923 pdpe = pmap_segmap(pmap, sva); 1924#ifdef __mips_n64 1925 if (*pdpe == 0) { 1926 va_next = (sva + NBSEG) & ~SEGMASK; 1927 if (va_next < sva) 1928 va_next = eva; 1929 continue; 1930 } 1931#endif 1932 va_next = (sva + NBPDR) & ~PDRMASK; 1933 if (va_next < sva) 1934 va_next = eva; 1935 1936 pde = pmap_pdpe_to_pde(pdpe, sva); 1937 if (*pde == NULL) 1938 continue; 1939 1940 /* 1941 * Limit our scan to either the end of the va represented 1942 * by the current page table page, or to the end of the 1943 * range being write protected. 1944 */ 1945 if (va_next > eva) 1946 va_next = eva; 1947 1948 va = va_next; 1949 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1950 sva += PAGE_SIZE) { 1951 pbits = *pte; 1952 if (!pte_test(&pbits, PTE_V) || pte_test(&pbits, 1953 PTE_RO)) { 1954 if (va != va_next) { 1955 pmap_invalidate_range(pmap, va, sva); 1956 va = va_next; 1957 } 1958 continue; 1959 } 1960 pte_set(&pbits, PTE_RO); 1961 if (pte_test(&pbits, PTE_D)) { 1962 pte_clear(&pbits, PTE_D); 1963 if (pte_test(&pbits, PTE_MANAGED)) { 1964 pa = TLBLO_PTE_TO_PA(pbits); 1965 m = PHYS_TO_VM_PAGE(pa); 1966 vm_page_dirty(m); 1967 } 1968 if (va == va_next) 1969 va = sva; 1970 } else { 1971 /* 1972 * Unless PTE_D is set, any TLB entries 1973 * mapping "sva" don't allow write access, so 1974 * they needn't be invalidated. 1975 */ 1976 if (va != va_next) { 1977 pmap_invalidate_range(pmap, va, sva); 1978 va = va_next; 1979 } 1980 } 1981 *pte = pbits; 1982 } 1983 if (va != va_next) 1984 pmap_invalidate_range(pmap, va, sva); 1985 } 1986 rw_wunlock(&pvh_global_lock); 1987 PMAP_UNLOCK(pmap); 1988} 1989 1990/* 1991 * Insert the given physical page (p) at 1992 * the specified virtual address (v) in the 1993 * target physical map with the protection requested. 1994 * 1995 * If specified, the page will be wired down, meaning 1996 * that the related pte can not be reclaimed. 1997 * 1998 * NB: This is the only routine which MAY NOT lazy-evaluate 1999 * or lose information. That is, this routine must actually 2000 * insert this page into the given map NOW. 2001 */ 2002void 2003pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2004 vm_prot_t prot, boolean_t wired) 2005{ 2006 vm_paddr_t pa, opa; 2007 pt_entry_t *pte; 2008 pt_entry_t origpte, newpte; 2009 pv_entry_t pv; 2010 vm_page_t mpte, om; 2011 2012 va &= ~PAGE_MASK; 2013 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2014 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || 2015 va >= kmi.clean_eva, 2016 ("pmap_enter: managed mapping within the clean submap")); 2017 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, 2018 ("pmap_enter: page %p is not busy", m)); 2019 pa = VM_PAGE_TO_PHYS(m); 2020 newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot); 2021 if (wired) 2022 newpte |= PTE_W; 2023 if (is_kernel_pmap(pmap)) 2024 newpte |= PTE_G; 2025 if (is_cacheable_mem(pa)) 2026 newpte |= PTE_C_CACHE; 2027 else 2028 newpte |= PTE_C_UNCACHED; 2029 2030 mpte = NULL; 2031 2032 rw_wlock(&pvh_global_lock); 2033 PMAP_LOCK(pmap); 2034 2035 /* 2036 * In the case that a page table page is not resident, we are 2037 * creating it here. 2038 */ 2039 if (va < VM_MAXUSER_ADDRESS) { 2040 mpte = pmap_allocpte(pmap, va, M_WAITOK); 2041 } 2042 pte = pmap_pte(pmap, va); 2043 2044 /* 2045 * Page Directory table entry not valid, we need a new PT page 2046 */ 2047 if (pte == NULL) { 2048 panic("pmap_enter: invalid page directory, pdir=%p, va=%p", 2049 (void *)pmap->pm_segtab, (void *)va); 2050 } 2051 om = NULL; 2052 origpte = *pte; 2053 opa = TLBLO_PTE_TO_PA(origpte); 2054 2055 /* 2056 * Mapping has not changed, must be protection or wiring change. 2057 */ 2058 if (pte_test(&origpte, PTE_V) && opa == pa) { 2059 /* 2060 * Wiring change, just update stats. We don't worry about 2061 * wiring PT pages as they remain resident as long as there 2062 * are valid mappings in them. Hence, if a user page is 2063 * wired, the PT page will be also. 2064 */ 2065 if (wired && !pte_test(&origpte, PTE_W)) 2066 pmap->pm_stats.wired_count++; 2067 else if (!wired && pte_test(&origpte, PTE_W)) 2068 pmap->pm_stats.wired_count--; 2069 2070 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO), 2071 ("%s: modified page not writable: va: %p, pte: %#jx", 2072 __func__, (void *)va, (uintmax_t)origpte)); 2073 2074 /* 2075 * Remove extra pte reference 2076 */ 2077 if (mpte) 2078 mpte->wire_count--; 2079 2080 if (pte_test(&origpte, PTE_MANAGED)) { 2081 m->md.pv_flags |= PV_TABLE_REF; 2082 om = m; 2083 newpte |= PTE_MANAGED; 2084 if (!pte_test(&newpte, PTE_RO)) 2085 vm_page_aflag_set(m, PGA_WRITEABLE); 2086 } 2087 goto validate; 2088 } 2089 2090 pv = NULL; 2091 2092 /* 2093 * Mapping has changed, invalidate old range and fall through to 2094 * handle validating new mapping. 2095 */ 2096 if (opa) { 2097 if (pte_test(&origpte, PTE_W)) 2098 pmap->pm_stats.wired_count--; 2099 2100 if (pte_test(&origpte, PTE_MANAGED)) { 2101 om = PHYS_TO_VM_PAGE(opa); 2102 pv = pmap_pvh_remove(&om->md, pmap, va); 2103 } 2104 if (mpte != NULL) { 2105 mpte->wire_count--; 2106 KASSERT(mpte->wire_count > 0, 2107 ("pmap_enter: missing reference to page table page," 2108 " va: %p", (void *)va)); 2109 } 2110 } else 2111 pmap->pm_stats.resident_count++; 2112 2113 /* 2114 * Enter on the PV list if part of our managed memory. 2115 */ 2116 if ((m->oflags & VPO_UNMANAGED) == 0) { 2117 m->md.pv_flags |= PV_TABLE_REF; 2118 if (pv == NULL) 2119 pv = get_pv_entry(pmap, FALSE); 2120 pv->pv_va = va; 2121 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2122 newpte |= PTE_MANAGED; 2123 if (!pte_test(&newpte, PTE_RO)) 2124 vm_page_aflag_set(m, PGA_WRITEABLE); 2125 } else if (pv != NULL) 2126 free_pv_entry(pmap, pv); 2127 2128 /* 2129 * Increment counters 2130 */ 2131 if (wired) 2132 pmap->pm_stats.wired_count++; 2133 2134validate: 2135 2136#ifdef PMAP_DEBUG 2137 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 2138#endif 2139 2140 /* 2141 * if the mapping or permission bits are different, we need to 2142 * update the pte. 2143 */ 2144 if (origpte != newpte) { 2145 *pte = newpte; 2146 if (pte_test(&origpte, PTE_V)) { 2147 if (pte_test(&origpte, PTE_MANAGED) && opa != pa) { 2148 if (om->md.pv_flags & PV_TABLE_REF) 2149 vm_page_aflag_set(om, PGA_REFERENCED); 2150 om->md.pv_flags &= ~PV_TABLE_REF; 2151 } 2152 if (pte_test(&origpte, PTE_D)) { 2153 KASSERT(!pte_test(&origpte, PTE_RO), 2154 ("pmap_enter: modified page not writable:" 2155 " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte)); 2156 if (pte_test(&origpte, PTE_MANAGED)) 2157 vm_page_dirty(om); 2158 } 2159 if (pte_test(&origpte, PTE_MANAGED) && 2160 TAILQ_EMPTY(&om->md.pv_list)) 2161 vm_page_aflag_clear(om, PGA_WRITEABLE); 2162 pmap_update_page(pmap, va, newpte); 2163 } 2164 } 2165 2166 /* 2167 * Sync I & D caches for executable pages. Do this only if the 2168 * target pmap belongs to the current process. Otherwise, an 2169 * unresolvable TLB miss may occur. 2170 */ 2171 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 2172 (prot & VM_PROT_EXECUTE)) { 2173 mips_icache_sync_range(va, PAGE_SIZE); 2174 mips_dcache_wbinv_range(va, PAGE_SIZE); 2175 } 2176 rw_wunlock(&pvh_global_lock); 2177 PMAP_UNLOCK(pmap); 2178} 2179 2180/* 2181 * this code makes some *MAJOR* assumptions: 2182 * 1. Current pmap & pmap exists. 2183 * 2. Not wired. 2184 * 3. Read access. 2185 * 4. No page table pages. 2186 * but is *MUCH* faster than pmap_enter... 2187 */ 2188 2189void 2190pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2191{ 2192 2193 rw_wlock(&pvh_global_lock); 2194 PMAP_LOCK(pmap); 2195 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2196 rw_wunlock(&pvh_global_lock); 2197 PMAP_UNLOCK(pmap); 2198} 2199 2200static vm_page_t 2201pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2202 vm_prot_t prot, vm_page_t mpte) 2203{ 2204 pt_entry_t *pte; 2205 vm_paddr_t pa; 2206 2207 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2208 (m->oflags & VPO_UNMANAGED) != 0, 2209 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2210 rw_assert(&pvh_global_lock, RA_WLOCKED); 2211 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2212 2213 /* 2214 * In the case that a page table page is not resident, we are 2215 * creating it here. 2216 */ 2217 if (va < VM_MAXUSER_ADDRESS) { 2218 pd_entry_t *pde; 2219 unsigned ptepindex; 2220 2221 /* 2222 * Calculate pagetable page index 2223 */ 2224 ptepindex = pmap_pde_pindex(va); 2225 if (mpte && (mpte->pindex == ptepindex)) { 2226 mpte->wire_count++; 2227 } else { 2228 /* 2229 * Get the page directory entry 2230 */ 2231 pde = pmap_pde(pmap, va); 2232 2233 /* 2234 * If the page table page is mapped, we just 2235 * increment the hold count, and activate it. 2236 */ 2237 if (pde && *pde != 0) { 2238 mpte = PHYS_TO_VM_PAGE( 2239 MIPS_DIRECT_TO_PHYS(*pde)); 2240 mpte->wire_count++; 2241 } else { 2242 mpte = _pmap_allocpte(pmap, ptepindex, 2243 M_NOWAIT); 2244 if (mpte == NULL) 2245 return (mpte); 2246 } 2247 } 2248 } else { 2249 mpte = NULL; 2250 } 2251 2252 pte = pmap_pte(pmap, va); 2253 if (pte_test(pte, PTE_V)) { 2254 if (mpte != NULL) { 2255 mpte->wire_count--; 2256 mpte = NULL; 2257 } 2258 return (mpte); 2259 } 2260 2261 /* 2262 * Enter on the PV list if part of our managed memory. 2263 */ 2264 if ((m->oflags & VPO_UNMANAGED) == 0 && 2265 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2266 if (mpte != NULL) { 2267 pmap_unwire_ptp(pmap, va, mpte); 2268 mpte = NULL; 2269 } 2270 return (mpte); 2271 } 2272 2273 /* 2274 * Increment counters 2275 */ 2276 pmap->pm_stats.resident_count++; 2277 2278 pa = VM_PAGE_TO_PHYS(m); 2279 2280 /* 2281 * Now validate mapping with RO protection 2282 */ 2283 *pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V; 2284 if ((m->oflags & VPO_UNMANAGED) == 0) 2285 *pte |= PTE_MANAGED; 2286 2287 if (is_cacheable_mem(pa)) 2288 *pte |= PTE_C_CACHE; 2289 else 2290 *pte |= PTE_C_UNCACHED; 2291 2292 if (is_kernel_pmap(pmap)) 2293 *pte |= PTE_G; 2294 else { 2295 /* 2296 * Sync I & D caches. Do this only if the target pmap 2297 * belongs to the current process. Otherwise, an 2298 * unresolvable TLB miss may occur. */ 2299 if (pmap == &curproc->p_vmspace->vm_pmap) { 2300 va &= ~PAGE_MASK; 2301 mips_icache_sync_range(va, PAGE_SIZE); 2302 mips_dcache_wbinv_range(va, PAGE_SIZE); 2303 } 2304 } 2305 return (mpte); 2306} 2307 2308/* 2309 * Make a temporary mapping for a physical address. This is only intended 2310 * to be used for panic dumps. 2311 * 2312 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2313 */ 2314void * 2315pmap_kenter_temporary(vm_paddr_t pa, int i) 2316{ 2317 vm_offset_t va; 2318 2319 if (i != 0) 2320 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2321 __func__); 2322 2323 if (MIPS_DIRECT_MAPPABLE(pa)) { 2324 va = MIPS_PHYS_TO_DIRECT(pa); 2325 } else { 2326#ifndef __mips_n64 /* XXX : to be converted to new style */ 2327 int cpu; 2328 register_t intr; 2329 struct local_sysmaps *sysm; 2330 pt_entry_t *pte, npte; 2331 2332 /* If this is used other than for dumps, we may need to leave 2333 * interrupts disasbled on return. If crash dumps don't work when 2334 * we get to this point, we might want to consider this (leaving things 2335 * disabled as a starting point ;-) 2336 */ 2337 intr = intr_disable(); 2338 cpu = PCPU_GET(cpuid); 2339 sysm = &sysmap_lmem[cpu]; 2340 /* Since this is for the debugger, no locks or any other fun */ 2341 npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V | 2342 PTE_G; 2343 pte = pmap_pte(kernel_pmap, sysm->base); 2344 *pte = npte; 2345 sysm->valid1 = 1; 2346 pmap_update_page(kernel_pmap, sysm->base, npte); 2347 va = sysm->base; 2348 intr_restore(intr); 2349#endif 2350 } 2351 return ((void *)va); 2352} 2353 2354void 2355pmap_kenter_temporary_free(vm_paddr_t pa) 2356{ 2357#ifndef __mips_n64 /* XXX : to be converted to new style */ 2358 int cpu; 2359 register_t intr; 2360 struct local_sysmaps *sysm; 2361#endif 2362 2363 if (MIPS_DIRECT_MAPPABLE(pa)) { 2364 /* nothing to do for this case */ 2365 return; 2366 } 2367#ifndef __mips_n64 /* XXX : to be converted to new style */ 2368 cpu = PCPU_GET(cpuid); 2369 sysm = &sysmap_lmem[cpu]; 2370 if (sysm->valid1) { 2371 pt_entry_t *pte; 2372 2373 intr = intr_disable(); 2374 pte = pmap_pte(kernel_pmap, sysm->base); 2375 *pte = PTE_G; 2376 pmap_invalidate_page(kernel_pmap, sysm->base); 2377 intr_restore(intr); 2378 sysm->valid1 = 0; 2379 } 2380#endif 2381} 2382 2383/* 2384 * Maps a sequence of resident pages belonging to the same object. 2385 * The sequence begins with the given page m_start. This page is 2386 * mapped at the given virtual address start. Each subsequent page is 2387 * mapped at a virtual address that is offset from start by the same 2388 * amount as the page is offset from m_start within the object. The 2389 * last page in the sequence is the page with the largest offset from 2390 * m_start that can be mapped at a virtual address less than the given 2391 * virtual address end. Not every virtual page between start and end 2392 * is mapped; only those for which a resident page exists with the 2393 * corresponding offset from m_start are mapped. 2394 */ 2395void 2396pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2397 vm_page_t m_start, vm_prot_t prot) 2398{ 2399 vm_page_t m, mpte; 2400 vm_pindex_t diff, psize; 2401 2402 VM_OBJECT_ASSERT_WLOCKED(m_start->object); 2403 psize = atop(end - start); 2404 mpte = NULL; 2405 m = m_start; 2406 rw_wlock(&pvh_global_lock); 2407 PMAP_LOCK(pmap); 2408 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2409 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2410 prot, mpte); 2411 m = TAILQ_NEXT(m, listq); 2412 } 2413 rw_wunlock(&pvh_global_lock); 2414 PMAP_UNLOCK(pmap); 2415} 2416 2417/* 2418 * pmap_object_init_pt preloads the ptes for a given object 2419 * into the specified pmap. This eliminates the blast of soft 2420 * faults on process startup and immediately after an mmap. 2421 */ 2422void 2423pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2424 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2425{ 2426 VM_OBJECT_ASSERT_WLOCKED(object); 2427 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2428 ("pmap_object_init_pt: non-device object")); 2429} 2430 2431/* 2432 * Routine: pmap_change_wiring 2433 * Function: Change the wiring attribute for a map/virtual-address 2434 * pair. 2435 * In/out conditions: 2436 * The mapping must already exist in the pmap. 2437 */ 2438void 2439pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2440{ 2441 pt_entry_t *pte; 2442 2443 PMAP_LOCK(pmap); 2444 pte = pmap_pte(pmap, va); 2445 2446 if (wired && !pte_test(pte, PTE_W)) 2447 pmap->pm_stats.wired_count++; 2448 else if (!wired && pte_test(pte, PTE_W)) 2449 pmap->pm_stats.wired_count--; 2450 2451 /* 2452 * Wiring is not a hardware characteristic so there is no need to 2453 * invalidate TLB. 2454 */ 2455 if (wired) 2456 pte_set(pte, PTE_W); 2457 else 2458 pte_clear(pte, PTE_W); 2459 PMAP_UNLOCK(pmap); 2460} 2461 2462/* 2463 * Copy the range specified by src_addr/len 2464 * from the source map to the range dst_addr/len 2465 * in the destination map. 2466 * 2467 * This routine is only advisory and need not do anything. 2468 */ 2469 2470void 2471pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2472 vm_size_t len, vm_offset_t src_addr) 2473{ 2474} 2475 2476/* 2477 * pmap_zero_page zeros the specified hardware page by mapping 2478 * the page into KVM and using bzero to clear its contents. 2479 * 2480 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2481 */ 2482void 2483pmap_zero_page(vm_page_t m) 2484{ 2485 vm_offset_t va; 2486 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2487 2488 if (MIPS_DIRECT_MAPPABLE(phys)) { 2489 va = MIPS_PHYS_TO_DIRECT(phys); 2490 bzero((caddr_t)va, PAGE_SIZE); 2491 mips_dcache_wbinv_range(va, PAGE_SIZE); 2492 } else { 2493 va = pmap_lmem_map1(phys); 2494 bzero((caddr_t)va, PAGE_SIZE); 2495 mips_dcache_wbinv_range(va, PAGE_SIZE); 2496 pmap_lmem_unmap(); 2497 } 2498} 2499 2500/* 2501 * pmap_zero_page_area zeros the specified hardware page by mapping 2502 * the page into KVM and using bzero to clear its contents. 2503 * 2504 * off and size may not cover an area beyond a single hardware page. 2505 */ 2506void 2507pmap_zero_page_area(vm_page_t m, int off, int size) 2508{ 2509 vm_offset_t va; 2510 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2511 2512 if (MIPS_DIRECT_MAPPABLE(phys)) { 2513 va = MIPS_PHYS_TO_DIRECT(phys); 2514 bzero((char *)(caddr_t)va + off, size); 2515 mips_dcache_wbinv_range(va + off, size); 2516 } else { 2517 va = pmap_lmem_map1(phys); 2518 bzero((char *)va + off, size); 2519 mips_dcache_wbinv_range(va + off, size); 2520 pmap_lmem_unmap(); 2521 } 2522} 2523 2524void 2525pmap_zero_page_idle(vm_page_t m) 2526{ 2527 vm_offset_t va; 2528 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2529 2530 if (MIPS_DIRECT_MAPPABLE(phys)) { 2531 va = MIPS_PHYS_TO_DIRECT(phys); 2532 bzero((caddr_t)va, PAGE_SIZE); 2533 mips_dcache_wbinv_range(va, PAGE_SIZE); 2534 } else { 2535 va = pmap_lmem_map1(phys); 2536 bzero((caddr_t)va, PAGE_SIZE); 2537 mips_dcache_wbinv_range(va, PAGE_SIZE); 2538 pmap_lmem_unmap(); 2539 } 2540} 2541 2542/* 2543 * pmap_copy_page copies the specified (machine independent) 2544 * page by mapping the page into virtual memory and using 2545 * bcopy to copy the page, one machine dependent page at a 2546 * time. 2547 * 2548 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2549 */ 2550void 2551pmap_copy_page(vm_page_t src, vm_page_t dst) 2552{ 2553 vm_offset_t va_src, va_dst; 2554 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src); 2555 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst); 2556 2557 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) { 2558 /* easy case, all can be accessed via KSEG0 */ 2559 /* 2560 * Flush all caches for VA that are mapped to this page 2561 * to make sure that data in SDRAM is up to date 2562 */ 2563 pmap_flush_pvcache(src); 2564 mips_dcache_wbinv_range_index( 2565 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE); 2566 va_src = MIPS_PHYS_TO_DIRECT(phys_src); 2567 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst); 2568 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2569 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2570 } else { 2571 va_src = pmap_lmem_map2(phys_src, phys_dst); 2572 va_dst = va_src + PAGE_SIZE; 2573 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2574 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2575 pmap_lmem_unmap(); 2576 } 2577} 2578 2579/* 2580 * Returns true if the pmap's pv is one of the first 2581 * 16 pvs linked to from this page. This count may 2582 * be changed upwards or downwards in the future; it 2583 * is only necessary that true be returned for a small 2584 * subset of pmaps for proper page aging. 2585 */ 2586boolean_t 2587pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2588{ 2589 pv_entry_t pv; 2590 int loops = 0; 2591 boolean_t rv; 2592 2593 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2594 ("pmap_page_exists_quick: page %p is not managed", m)); 2595 rv = FALSE; 2596 rw_wlock(&pvh_global_lock); 2597 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2598 if (PV_PMAP(pv) == pmap) { 2599 rv = TRUE; 2600 break; 2601 } 2602 loops++; 2603 if (loops >= 16) 2604 break; 2605 } 2606 rw_wunlock(&pvh_global_lock); 2607 return (rv); 2608} 2609 2610/* 2611 * Remove all pages from specified address space 2612 * this aids process exit speeds. Also, this code 2613 * is special cased for current process only, but 2614 * can have the more generic (and slightly slower) 2615 * mode enabled. This is much faster than pmap_remove 2616 * in the case of running down an entire address space. 2617 */ 2618void 2619pmap_remove_pages(pmap_t pmap) 2620{ 2621 pd_entry_t *pde; 2622 pt_entry_t *pte, tpte; 2623 pv_entry_t pv; 2624 vm_page_t m; 2625 struct pv_chunk *pc, *npc; 2626 u_long inuse, bitmask; 2627 int allfree, bit, field, idx; 2628 2629 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2630 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2631 return; 2632 } 2633 rw_wlock(&pvh_global_lock); 2634 PMAP_LOCK(pmap); 2635 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2636 allfree = 1; 2637 for (field = 0; field < _NPCM; field++) { 2638 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2639 while (inuse != 0) { 2640 bit = ffsl(inuse) - 1; 2641 bitmask = 1UL << bit; 2642 idx = field * sizeof(inuse) * NBBY + bit; 2643 pv = &pc->pc_pventry[idx]; 2644 inuse &= ~bitmask; 2645 2646 pde = pmap_pde(pmap, pv->pv_va); 2647 KASSERT(pde != NULL && *pde != 0, 2648 ("pmap_remove_pages: pde")); 2649 pte = pmap_pde_to_pte(pde, pv->pv_va); 2650 if (!pte_test(pte, PTE_V)) 2651 panic("pmap_remove_pages: bad pte"); 2652 tpte = *pte; 2653 2654/* 2655 * We cannot remove wired pages from a process' mapping at this time 2656 */ 2657 if (pte_test(&tpte, PTE_W)) { 2658 allfree = 0; 2659 continue; 2660 } 2661 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2662 2663 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); 2664 KASSERT(m != NULL, 2665 ("pmap_remove_pages: bad tpte %#jx", 2666 (uintmax_t)tpte)); 2667 2668 /* 2669 * Update the vm_page_t clean and reference bits. 2670 */ 2671 if (pte_test(&tpte, PTE_D)) 2672 vm_page_dirty(m); 2673 2674 /* Mark free */ 2675 PV_STAT(pv_entry_frees++); 2676 PV_STAT(pv_entry_spare++); 2677 pv_entry_count--; 2678 pc->pc_map[field] |= bitmask; 2679 pmap->pm_stats.resident_count--; 2680 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2681 if (TAILQ_EMPTY(&m->md.pv_list)) 2682 vm_page_aflag_clear(m, PGA_WRITEABLE); 2683 pmap_unuse_pt(pmap, pv->pv_va, *pde); 2684 } 2685 } 2686 if (allfree) { 2687 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2688 free_pv_chunk(pc); 2689 } 2690 } 2691 pmap_invalidate_all(pmap); 2692 PMAP_UNLOCK(pmap); 2693 rw_wunlock(&pvh_global_lock); 2694} 2695 2696/* 2697 * pmap_testbit tests bits in pte's 2698 */ 2699static boolean_t 2700pmap_testbit(vm_page_t m, int bit) 2701{ 2702 pv_entry_t pv; 2703 pmap_t pmap; 2704 pt_entry_t *pte; 2705 boolean_t rv = FALSE; 2706 2707 if (m->oflags & VPO_UNMANAGED) 2708 return (rv); 2709 2710 rw_assert(&pvh_global_lock, RA_WLOCKED); 2711 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2712 pmap = PV_PMAP(pv); 2713 PMAP_LOCK(pmap); 2714 pte = pmap_pte(pmap, pv->pv_va); 2715 rv = pte_test(pte, bit); 2716 PMAP_UNLOCK(pmap); 2717 if (rv) 2718 break; 2719 } 2720 return (rv); 2721} 2722 2723/* 2724 * pmap_page_wired_mappings: 2725 * 2726 * Return the number of managed mappings to the given physical page 2727 * that are wired. 2728 */ 2729int 2730pmap_page_wired_mappings(vm_page_t m) 2731{ 2732 pv_entry_t pv; 2733 pmap_t pmap; 2734 pt_entry_t *pte; 2735 int count; 2736 2737 count = 0; 2738 if ((m->oflags & VPO_UNMANAGED) != 0) 2739 return (count); 2740 rw_wlock(&pvh_global_lock); 2741 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2742 pmap = PV_PMAP(pv); 2743 PMAP_LOCK(pmap); 2744 pte = pmap_pte(pmap, pv->pv_va); 2745 if (pte_test(pte, PTE_W)) 2746 count++; 2747 PMAP_UNLOCK(pmap); 2748 } 2749 rw_wunlock(&pvh_global_lock); 2750 return (count); 2751} 2752 2753/* 2754 * Clear the write and modified bits in each of the given page's mappings. 2755 */ 2756void 2757pmap_remove_write(vm_page_t m) 2758{ 2759 pmap_t pmap; 2760 pt_entry_t pbits, *pte; 2761 pv_entry_t pv; 2762 2763 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2764 ("pmap_remove_write: page %p is not managed", m)); 2765 2766 /* 2767 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 2768 * another thread while the object is locked. Thus, if PGA_WRITEABLE 2769 * is clear, no page table entries need updating. 2770 */ 2771 VM_OBJECT_ASSERT_WLOCKED(m->object); 2772 if ((m->oflags & VPO_BUSY) == 0 && 2773 (m->aflags & PGA_WRITEABLE) == 0) 2774 return; 2775 rw_wlock(&pvh_global_lock); 2776 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2777 pmap = PV_PMAP(pv); 2778 PMAP_LOCK(pmap); 2779 pte = pmap_pte(pmap, pv->pv_va); 2780 KASSERT(pte != NULL && pte_test(pte, PTE_V), 2781 ("page on pv_list has no pte")); 2782 pbits = *pte; 2783 if (pte_test(&pbits, PTE_D)) { 2784 pte_clear(&pbits, PTE_D); 2785 vm_page_dirty(m); 2786 } 2787 pte_set(&pbits, PTE_RO); 2788 if (pbits != *pte) { 2789 *pte = pbits; 2790 pmap_update_page(pmap, pv->pv_va, pbits); 2791 } 2792 PMAP_UNLOCK(pmap); 2793 } 2794 vm_page_aflag_clear(m, PGA_WRITEABLE); 2795 rw_wunlock(&pvh_global_lock); 2796} 2797 2798/* 2799 * pmap_ts_referenced: 2800 * 2801 * Return the count of reference bits for a page, clearing all of them. 2802 */ 2803int 2804pmap_ts_referenced(vm_page_t m) 2805{ 2806 2807 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2808 ("pmap_ts_referenced: page %p is not managed", m)); 2809 if (m->md.pv_flags & PV_TABLE_REF) { 2810 rw_wlock(&pvh_global_lock); 2811 m->md.pv_flags &= ~PV_TABLE_REF; 2812 rw_wunlock(&pvh_global_lock); 2813 return (1); 2814 } 2815 return (0); 2816} 2817 2818/* 2819 * pmap_is_modified: 2820 * 2821 * Return whether or not the specified physical page was modified 2822 * in any physical maps. 2823 */ 2824boolean_t 2825pmap_is_modified(vm_page_t m) 2826{ 2827 boolean_t rv; 2828 2829 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2830 ("pmap_is_modified: page %p is not managed", m)); 2831 2832 /* 2833 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 2834 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2835 * is clear, no PTEs can have PTE_D set. 2836 */ 2837 VM_OBJECT_ASSERT_WLOCKED(m->object); 2838 if ((m->oflags & VPO_BUSY) == 0 && 2839 (m->aflags & PGA_WRITEABLE) == 0) 2840 return (FALSE); 2841 rw_wlock(&pvh_global_lock); 2842 rv = pmap_testbit(m, PTE_D); 2843 rw_wunlock(&pvh_global_lock); 2844 return (rv); 2845} 2846 2847/* N/C */ 2848 2849/* 2850 * pmap_is_prefaultable: 2851 * 2852 * Return whether or not the specified virtual address is elgible 2853 * for prefault. 2854 */ 2855boolean_t 2856pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2857{ 2858 pd_entry_t *pde; 2859 pt_entry_t *pte; 2860 boolean_t rv; 2861 2862 rv = FALSE; 2863 PMAP_LOCK(pmap); 2864 pde = pmap_pde(pmap, addr); 2865 if (pde != NULL && *pde != 0) { 2866 pte = pmap_pde_to_pte(pde, addr); 2867 rv = (*pte == 0); 2868 } 2869 PMAP_UNLOCK(pmap); 2870 return (rv); 2871} 2872 2873/* 2874 * Clear the modify bits on the specified physical page. 2875 */ 2876void 2877pmap_clear_modify(vm_page_t m) 2878{ 2879 pmap_t pmap; 2880 pt_entry_t *pte; 2881 pv_entry_t pv; 2882 2883 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2884 ("pmap_clear_modify: page %p is not managed", m)); 2885 VM_OBJECT_ASSERT_WLOCKED(m->object); 2886 KASSERT((m->oflags & VPO_BUSY) == 0, 2887 ("pmap_clear_modify: page %p is busy", m)); 2888 2889 /* 2890 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set. 2891 * If the object containing the page is locked and the page is not 2892 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 2893 */ 2894 if ((m->aflags & PGA_WRITEABLE) == 0) 2895 return; 2896 rw_wlock(&pvh_global_lock); 2897 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2898 pmap = PV_PMAP(pv); 2899 PMAP_LOCK(pmap); 2900 pte = pmap_pte(pmap, pv->pv_va); 2901 if (pte_test(pte, PTE_D)) { 2902 pte_clear(pte, PTE_D); 2903 pmap_update_page(pmap, pv->pv_va, *pte); 2904 } 2905 PMAP_UNLOCK(pmap); 2906 } 2907 rw_wunlock(&pvh_global_lock); 2908} 2909 2910/* 2911 * pmap_is_referenced: 2912 * 2913 * Return whether or not the specified physical page was referenced 2914 * in any physical maps. 2915 */ 2916boolean_t 2917pmap_is_referenced(vm_page_t m) 2918{ 2919 2920 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2921 ("pmap_is_referenced: page %p is not managed", m)); 2922 return ((m->md.pv_flags & PV_TABLE_REF) != 0); 2923} 2924 2925/* 2926 * pmap_clear_reference: 2927 * 2928 * Clear the reference bit on the specified physical page. 2929 */ 2930void 2931pmap_clear_reference(vm_page_t m) 2932{ 2933 2934 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2935 ("pmap_clear_reference: page %p is not managed", m)); 2936 rw_wlock(&pvh_global_lock); 2937 if (m->md.pv_flags & PV_TABLE_REF) { 2938 m->md.pv_flags &= ~PV_TABLE_REF; 2939 } 2940 rw_wunlock(&pvh_global_lock); 2941} 2942 2943/* 2944 * Miscellaneous support routines follow 2945 */ 2946 2947/* 2948 * Map a set of physical memory pages into the kernel virtual 2949 * address space. Return a pointer to where it is mapped. This 2950 * routine is intended to be used for mapping device memory, 2951 * NOT real memory. 2952 * 2953 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. 2954 */ 2955void * 2956pmap_mapdev(vm_paddr_t pa, vm_size_t size) 2957{ 2958 vm_offset_t va, tmpva, offset; 2959 2960 /* 2961 * KSEG1 maps only first 512M of phys address space. For 2962 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2963 */ 2964 if (MIPS_DIRECT_MAPPABLE(pa + size - 1)) 2965 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa)); 2966 else { 2967 offset = pa & PAGE_MASK; 2968 size = roundup(size + offset, PAGE_SIZE); 2969 2970 va = kmem_alloc_nofault(kernel_map, size); 2971 if (!va) 2972 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2973 pa = trunc_page(pa); 2974 for (tmpva = va; size > 0;) { 2975 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED); 2976 size -= PAGE_SIZE; 2977 tmpva += PAGE_SIZE; 2978 pa += PAGE_SIZE; 2979 } 2980 } 2981 2982 return ((void *)(va + offset)); 2983} 2984 2985void 2986pmap_unmapdev(vm_offset_t va, vm_size_t size) 2987{ 2988#ifndef __mips_n64 2989 vm_offset_t base, offset; 2990 2991 /* If the address is within KSEG1 then there is nothing to do */ 2992 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 2993 return; 2994 2995 base = trunc_page(va); 2996 offset = va & PAGE_MASK; 2997 size = roundup(size + offset, PAGE_SIZE); 2998 kmem_free(kernel_map, base, size); 2999#endif 3000} 3001 3002/* 3003 * perform the pmap work for mincore 3004 */ 3005int 3006pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3007{ 3008 pt_entry_t *ptep, pte; 3009 vm_paddr_t pa; 3010 vm_page_t m; 3011 int val; 3012 3013 PMAP_LOCK(pmap); 3014retry: 3015 ptep = pmap_pte(pmap, addr); 3016 pte = (ptep != NULL) ? *ptep : 0; 3017 if (!pte_test(&pte, PTE_V)) { 3018 val = 0; 3019 goto out; 3020 } 3021 val = MINCORE_INCORE; 3022 if (pte_test(&pte, PTE_D)) 3023 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3024 pa = TLBLO_PTE_TO_PA(pte); 3025 if (pte_test(&pte, PTE_MANAGED)) { 3026 /* 3027 * This may falsely report the given address as 3028 * MINCORE_REFERENCED. Unfortunately, due to the lack of 3029 * per-PTE reference information, it is impossible to 3030 * determine if the address is MINCORE_REFERENCED. 3031 */ 3032 m = PHYS_TO_VM_PAGE(pa); 3033 if ((m->aflags & PGA_REFERENCED) != 0) 3034 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3035 } 3036 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3037 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 3038 pte_test(&pte, PTE_MANAGED)) { 3039 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3040 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3041 goto retry; 3042 } else 3043out: 3044 PA_UNLOCK_COND(*locked_pa); 3045 PMAP_UNLOCK(pmap); 3046 return (val); 3047} 3048 3049void 3050pmap_activate(struct thread *td) 3051{ 3052 pmap_t pmap, oldpmap; 3053 struct proc *p = td->td_proc; 3054 u_int cpuid; 3055 3056 critical_enter(); 3057 3058 pmap = vmspace_pmap(p->p_vmspace); 3059 oldpmap = PCPU_GET(curpmap); 3060 cpuid = PCPU_GET(cpuid); 3061 3062 if (oldpmap) 3063 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 3064 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 3065 pmap_asid_alloc(pmap); 3066 if (td == curthread) { 3067 PCPU_SET(segbase, pmap->pm_segtab); 3068 mips_wr_entryhi(pmap->pm_asid[cpuid].asid); 3069 } 3070 3071 PCPU_SET(curpmap, pmap); 3072 critical_exit(); 3073} 3074 3075void 3076pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3077{ 3078} 3079 3080/* 3081 * Increase the starting virtual address of the given mapping if a 3082 * different alignment might result in more superpage mappings. 3083 */ 3084void 3085pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3086 vm_offset_t *addr, vm_size_t size) 3087{ 3088 vm_offset_t superpage_offset; 3089 3090 if (size < NBSEG) 3091 return; 3092 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 3093 offset += ptoa(object->pg_color); 3094 superpage_offset = offset & SEGMASK; 3095 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || 3096 (*addr & SEGMASK) == superpage_offset) 3097 return; 3098 if ((*addr & SEGMASK) < superpage_offset) 3099 *addr = (*addr & ~SEGMASK) + superpage_offset; 3100 else 3101 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; 3102} 3103 3104/* 3105 * Increase the starting virtual address of the given mapping so 3106 * that it is aligned to not be the second page in a TLB entry. 3107 * This routine assumes that the length is appropriately-sized so 3108 * that the allocation does not share a TLB entry at all if required. 3109 */ 3110void 3111pmap_align_tlb(vm_offset_t *addr) 3112{ 3113 if ((*addr & PAGE_SIZE) == 0) 3114 return; 3115 *addr += PAGE_SIZE; 3116 return; 3117} 3118 3119#ifdef DDB 3120DB_SHOW_COMMAND(ptable, ddb_pid_dump) 3121{ 3122 pmap_t pmap; 3123 struct thread *td = NULL; 3124 struct proc *p; 3125 int i, j, k; 3126 vm_paddr_t pa; 3127 vm_offset_t va; 3128 3129 if (have_addr) { 3130 td = db_lookup_thread(addr, TRUE); 3131 if (td == NULL) { 3132 db_printf("Invalid pid or tid"); 3133 return; 3134 } 3135 p = td->td_proc; 3136 if (p->p_vmspace == NULL) { 3137 db_printf("No vmspace for process"); 3138 return; 3139 } 3140 pmap = vmspace_pmap(p->p_vmspace); 3141 } else 3142 pmap = kernel_pmap; 3143 3144 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n", 3145 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid, 3146 pmap->pm_asid[0].gen); 3147 for (i = 0; i < NPDEPG; i++) { 3148 pd_entry_t *pdpe; 3149 pt_entry_t *pde; 3150 pt_entry_t pte; 3151 3152 pdpe = (pd_entry_t *)pmap->pm_segtab[i]; 3153 if (pdpe == NULL) 3154 continue; 3155 db_printf("[%4d] %p\n", i, pdpe); 3156#ifdef __mips_n64 3157 for (j = 0; j < NPDEPG; j++) { 3158 pde = (pt_entry_t *)pdpe[j]; 3159 if (pde == NULL) 3160 continue; 3161 db_printf("\t[%4d] %p\n", j, pde); 3162#else 3163 { 3164 j = 0; 3165 pde = (pt_entry_t *)pdpe; 3166#endif 3167 for (k = 0; k < NPTEPG; k++) { 3168 pte = pde[k]; 3169 if (pte == 0 || !pte_test(&pte, PTE_V)) 3170 continue; 3171 pa = TLBLO_PTE_TO_PA(pte); 3172 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); 3173 db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n", 3174 k, (void *)va, (uintmax_t)pte, (uintmax_t)pa); 3175 } 3176 } 3177 } 3178} 3179#endif 3180 3181#if defined(DEBUG) 3182 3183static void pads(pmap_t pm); 3184void pmap_pvdump(vm_offset_t pa); 3185 3186/* print address space of pmap*/ 3187static void 3188pads(pmap_t pm) 3189{ 3190 unsigned va, i, j; 3191 pt_entry_t *ptep; 3192 3193 if (pm == kernel_pmap) 3194 return; 3195 for (i = 0; i < NPTEPG; i++) 3196 if (pm->pm_segtab[i]) 3197 for (j = 0; j < NPTEPG; j++) { 3198 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3199 if (pm == kernel_pmap && va < KERNBASE) 3200 continue; 3201 if (pm != kernel_pmap && 3202 va >= VM_MAXUSER_ADDRESS) 3203 continue; 3204 ptep = pmap_pte(pm, va); 3205 if (pte_test(ptep, PTE_V)) 3206 printf("%x:%x ", va, *(int *)ptep); 3207 } 3208 3209} 3210 3211void 3212pmap_pvdump(vm_offset_t pa) 3213{ 3214 register pv_entry_t pv; 3215 vm_page_t m; 3216 3217 printf("pa %x", pa); 3218 m = PHYS_TO_VM_PAGE(pa); 3219 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3220 pv = TAILQ_NEXT(pv, pv_list)) { 3221 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3222 pads(pv->pv_pmap); 3223 } 3224 printf(" "); 3225} 3226 3227/* N/C */ 3228#endif 3229 3230 3231/* 3232 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3233 * It takes almost as much or more time to search the TLB for a 3234 * specific ASID and flush those entries as it does to flush the entire TLB. 3235 * Therefore, when we allocate a new ASID, we just take the next number. When 3236 * we run out of numbers, we flush the TLB, increment the generation count 3237 * and start over. ASID zero is reserved for kernel use. 3238 */ 3239static void 3240pmap_asid_alloc(pmap) 3241 pmap_t pmap; 3242{ 3243 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3244 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3245 else { 3246 if (PCPU_GET(next_asid) == pmap_max_asid) { 3247 tlb_invalidate_all_user(NULL); 3248 PCPU_SET(asid_generation, 3249 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3250 if (PCPU_GET(asid_generation) == 0) { 3251 PCPU_SET(asid_generation, 1); 3252 } 3253 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3254 } 3255 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3256 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3257 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3258 } 3259} 3260 3261static pt_entry_t 3262init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot) 3263{ 3264 pt_entry_t rw; 3265 3266 if (!(prot & VM_PROT_WRITE)) 3267 rw = PTE_V | PTE_RO; 3268 else if ((m->oflags & VPO_UNMANAGED) == 0) { 3269 if ((access & VM_PROT_WRITE) != 0) 3270 rw = PTE_V | PTE_D; 3271 else 3272 rw = PTE_V; 3273 } else 3274 /* Needn't emulate a modified bit for unmanaged pages. */ 3275 rw = PTE_V | PTE_D; 3276 return (rw); 3277} 3278 3279/* 3280 * pmap_emulate_modified : do dirty bit emulation 3281 * 3282 * On SMP, update just the local TLB, other CPUs will update their 3283 * TLBs from PTE lazily, if they get the exception. 3284 * Returns 0 in case of sucess, 1 if the page is read only and we 3285 * need to fault. 3286 */ 3287int 3288pmap_emulate_modified(pmap_t pmap, vm_offset_t va) 3289{ 3290 pt_entry_t *pte; 3291 3292 PMAP_LOCK(pmap); 3293 pte = pmap_pte(pmap, va); 3294 if (pte == NULL) 3295 panic("pmap_emulate_modified: can't find PTE"); 3296#ifdef SMP 3297 /* It is possible that some other CPU changed m-bit */ 3298 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { 3299 tlb_update(pmap, va, *pte); 3300 PMAP_UNLOCK(pmap); 3301 return (0); 3302 } 3303#else 3304 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) 3305 panic("pmap_emulate_modified: invalid pte"); 3306#endif 3307 if (pte_test(pte, PTE_RO)) { 3308 PMAP_UNLOCK(pmap); 3309 return (1); 3310 } 3311 pte_set(pte, PTE_D); 3312 tlb_update(pmap, va, *pte); 3313 if (!pte_test(pte, PTE_MANAGED)) 3314 panic("pmap_emulate_modified: unmanaged page"); 3315 PMAP_UNLOCK(pmap); 3316 return (0); 3317} 3318 3319/* 3320 * Routine: pmap_kextract 3321 * Function: 3322 * Extract the physical page address associated 3323 * virtual address. 3324 */ 3325vm_paddr_t 3326pmap_kextract(vm_offset_t va) 3327{ 3328 int mapped; 3329 3330 /* 3331 * First, the direct-mapped regions. 3332 */ 3333#if defined(__mips_n64) 3334 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) 3335 return (MIPS_XKPHYS_TO_PHYS(va)); 3336#endif 3337 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) 3338 return (MIPS_KSEG0_TO_PHYS(va)); 3339 3340 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) 3341 return (MIPS_KSEG1_TO_PHYS(va)); 3342 3343 /* 3344 * User virtual addresses. 3345 */ 3346 if (va < VM_MAXUSER_ADDRESS) { 3347 pt_entry_t *ptep; 3348 3349 if (curproc && curproc->p_vmspace) { 3350 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3351 if (ptep) { 3352 return (TLBLO_PTE_TO_PA(*ptep) | 3353 (va & PAGE_MASK)); 3354 } 3355 return (0); 3356 } 3357 } 3358 3359 /* 3360 * Should be kernel virtual here, otherwise fail 3361 */ 3362 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); 3363#if defined(__mips_n64) 3364 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); 3365#endif 3366 /* 3367 * Kernel virtual. 3368 */ 3369 3370 if (mapped) { 3371 pt_entry_t *ptep; 3372 3373 /* Is the kernel pmap initialized? */ 3374 if (!CPU_EMPTY(&kernel_pmap->pm_active)) { 3375 /* It's inside the virtual address range */ 3376 ptep = pmap_pte(kernel_pmap, va); 3377 if (ptep) { 3378 return (TLBLO_PTE_TO_PA(*ptep) | 3379 (va & PAGE_MASK)); 3380 } 3381 } 3382 return (0); 3383 } 3384 3385 panic("%s for unknown address space %p.", __func__, (void *)va); 3386} 3387 3388 3389void 3390pmap_flush_pvcache(vm_page_t m) 3391{ 3392 pv_entry_t pv; 3393 3394 if (m != NULL) { 3395 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3396 pv = TAILQ_NEXT(pv, pv_list)) { 3397 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3398 } 3399 } 3400} 3401