pmap.c revision 211958
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * In addition to hardware address maps, this 46 * module is called upon to provide software-use-only 47 * maps which may or may not be stored in the same 48 * form as hardware maps. These pseudo-maps are 49 * used to store intermediate results from copy 50 * operations to and from address spaces. 51 * 52 * Since the information managed by this module is 53 * also stored by the logical address mapping module, 54 * this module may throw away valid virtual-to-physical 55 * mappings at almost any time. However, invalidations 56 * of virtual-to-physical mappings must be done as 57 * requested. 58 * 59 * In order to cope with hardware architectures which 60 * make virtual-to-physical map invalidates expensive, 61 * this module may delay invalidate or reduced protection 62 * operations until such time as they are actually 63 * necessary. This module is given full information as 64 * to which processors are currently using which maps, 65 * and to when physical maps must be made correct. 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 211958 2010-08-29 05:39:21Z jchandra $"); 70 71#include "opt_msgbuf.h" 72#include "opt_ddb.h" 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/proc.h> 77#include <sys/msgbuf.h> 78#include <sys/vmmeter.h> 79#include <sys/mman.h> 80#include <sys/smp.h> 81#ifdef DDB 82#include <ddb/ddb.h> 83#endif 84 85#include <vm/vm.h> 86#include <vm/vm_param.h> 87#include <vm/vm_phys.h> 88#include <sys/lock.h> 89#include <sys/mutex.h> 90#include <vm/vm_kern.h> 91#include <vm/vm_page.h> 92#include <vm/vm_map.h> 93#include <vm/vm_object.h> 94#include <vm/vm_extern.h> 95#include <vm/vm_pageout.h> 96#include <vm/vm_pager.h> 97#include <vm/uma.h> 98#include <sys/pcpu.h> 99#include <sys/sched.h> 100#ifdef SMP 101#include <sys/smp.h> 102#endif 103 104#include <machine/cache.h> 105#include <machine/md_var.h> 106#include <machine/tlb.h> 107 108#undef PMAP_DEBUG 109 110#ifndef PMAP_SHPGPERPROC 111#define PMAP_SHPGPERPROC 200 112#endif 113 114#if !defined(DIAGNOSTIC) 115#define PMAP_INLINE __inline 116#else 117#define PMAP_INLINE 118#endif 119 120/* 121 * Get PDEs and PTEs for user/kernel address space 122 * 123 * XXX The & for pmap_segshift() is wrong, as is the fact that it doesn't 124 * trim off gratuitous bits of the address space. By having the & 125 * there, we break defining NUSERPGTBLS below because the address space 126 * is defined such that it ends immediately after NPDEPG*NPTEPG*PAGE_SIZE, 127 * so we end up getting NUSERPGTBLS of 0. 128 */ 129#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) 130#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) 131#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) 132#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) 133 134#ifdef __mips_n64 135#define NUPDE (NPDEPG * NPDEPG) 136#define NUSERPGTBLS (NUPDE + NPDEPG) 137#else 138#define NUPDE (NPDEPG) 139#define NUSERPGTBLS (NUPDE) 140#endif 141 142#define is_kernel_pmap(x) ((x) == kernel_pmap) 143 144struct pmap kernel_pmap_store; 145pd_entry_t *kernel_segmap; 146 147vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 148vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 149 150static int nkpt; 151unsigned pmap_max_asid; /* max ASID supported by the system */ 152 153#define PMAP_ASID_RESERVED 0 154 155vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 156 157static void pmap_asid_alloc(pmap_t pmap); 158 159/* 160 * Data for the pv entry allocation mechanism 161 */ 162static uma_zone_t pvzone; 163static struct vm_object pvzone_obj; 164static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 165 166static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 167static pv_entry_t get_pv_entry(pmap_t locked_pmap); 168static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 169static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 170 vm_offset_t va); 171static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 172static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 173 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 174static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); 175static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 176static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 177static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 178 vm_offset_t va, vm_page_t m); 179static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); 180static void pmap_invalidate_all(pmap_t pmap); 181static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); 182static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m); 183 184static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 185static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 186static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 187static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); 188static vm_page_t pmap_alloc_pte_page(unsigned int index, int req); 189static void pmap_grow_pte_page_cache(void); 190 191#ifdef SMP 192static void pmap_invalidate_page_action(void *arg); 193static void pmap_invalidate_all_action(void *arg); 194static void pmap_update_page_action(void *arg); 195#endif 196 197#ifndef __mips_n64 198/* 199 * This structure is for high memory (memory above 512Meg in 32 bit) 200 * This memory area does not have direct mapping, so we a mechanism to do 201 * temporary per-CPU mapping to access these addresses. 202 * 203 * At bootup we reserve 2 virtual pages per CPU for mapping highmem pages, to 204 * access a highmem physical address on a CPU, we will disable interrupts and 205 * add the mapping from the reserved virtual address for the CPU to the physical 206 * address in the kernel pagetable. 207 */ 208struct local_sysmaps { 209 vm_offset_t base; 210 uint32_t saved_intr; 211 uint16_t valid1, valid2; 212}; 213static struct local_sysmaps sysmap_lmem[MAXCPU]; 214 215static __inline void 216pmap_alloc_lmem_map(void) 217{ 218 int i; 219 220 for (i = 0; i < MAXCPU; i++) { 221 sysmap_lmem[i].base = virtual_avail; 222 virtual_avail += PAGE_SIZE * 2; 223 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 224 } 225} 226 227static __inline vm_offset_t 228pmap_lmem_map1(vm_paddr_t phys) 229{ 230 struct local_sysmaps *sysm; 231 pt_entry_t *pte, npte; 232 vm_offset_t va; 233 uint32_t intr; 234 int cpu; 235 236 intr = intr_disable(); 237 cpu = PCPU_GET(cpuid); 238 sysm = &sysmap_lmem[cpu]; 239 sysm->saved_intr = intr; 240 va = sysm->base; 241 npte = TLBLO_PA_TO_PFN(phys) | 242 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 243 pte = pmap_pte(kernel_pmap, va); 244 *pte = npte; 245 sysm->valid1 = 1; 246 return (va); 247} 248 249static __inline vm_offset_t 250pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 251{ 252 struct local_sysmaps *sysm; 253 pt_entry_t *pte, npte; 254 vm_offset_t va1, va2; 255 uint32_t intr; 256 int cpu; 257 258 intr = intr_disable(); 259 cpu = PCPU_GET(cpuid); 260 sysm = &sysmap_lmem[cpu]; 261 sysm->saved_intr = intr; 262 va1 = sysm->base; 263 va2 = sysm->base + PAGE_SIZE; 264 npte = TLBLO_PA_TO_PFN(phys1) | 265 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 266 pte = pmap_pte(kernel_pmap, va1); 267 *pte = npte; 268 npte = TLBLO_PA_TO_PFN(phys2) | 269 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 270 pte = pmap_pte(kernel_pmap, va2); 271 *pte = npte; 272 sysm->valid1 = 1; 273 sysm->valid2 = 1; 274 return (va1); 275} 276 277static __inline void 278pmap_lmem_unmap(void) 279{ 280 struct local_sysmaps *sysm; 281 pt_entry_t *pte; 282 int cpu; 283 284 cpu = PCPU_GET(cpuid); 285 sysm = &sysmap_lmem[cpu]; 286 pte = pmap_pte(kernel_pmap, sysm->base); 287 *pte = PTE_G; 288 tlb_invalidate_address(kernel_pmap, sysm->base); 289 sysm->valid1 = 0; 290 if (sysm->valid2) { 291 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); 292 *pte = PTE_G; 293 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); 294 sysm->valid2 = 0; 295 } 296 intr_restore(sysm->saved_intr); 297} 298#else /* __mips_n64 */ 299 300static __inline void 301pmap_alloc_lmem_map(void) 302{ 303} 304 305static __inline vm_offset_t 306pmap_lmem_map1(vm_paddr_t phys) 307{ 308 309 return (0); 310} 311 312static __inline vm_offset_t 313pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 314{ 315 316 return (0); 317} 318 319static __inline vm_offset_t 320pmap_lmem_unmap(void) 321{ 322 323 return (0); 324} 325#endif /* !__mips_n64 */ 326 327/* 328 * Page table entry lookup routines. 329 */ 330static __inline pd_entry_t * 331pmap_segmap(pmap_t pmap, vm_offset_t va) 332{ 333 334 return (&pmap->pm_segtab[pmap_seg_index(va)]); 335} 336 337#ifdef __mips_n64 338static __inline pd_entry_t * 339pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 340{ 341 pd_entry_t *pde; 342 343 pde = (pd_entry_t *)*pdpe; 344 return (&pde[pmap_pde_index(va)]); 345} 346 347static __inline pd_entry_t * 348pmap_pde(pmap_t pmap, vm_offset_t va) 349{ 350 pd_entry_t *pdpe; 351 352 pdpe = pmap_segmap(pmap, va); 353 if (pdpe == NULL || *pdpe == NULL) 354 return (NULL); 355 356 return (pmap_pdpe_to_pde(pdpe, va)); 357} 358#else 359static __inline pd_entry_t * 360pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 361{ 362 363 return (pdpe); 364} 365 366static __inline 367pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va) 368{ 369 370 return (pmap_segmap(pmap, va)); 371} 372#endif 373 374static __inline pt_entry_t * 375pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 376{ 377 pt_entry_t *pte; 378 379 pte = (pt_entry_t *)*pde; 380 return (&pte[pmap_pte_index(va)]); 381} 382 383pt_entry_t * 384pmap_pte(pmap_t pmap, vm_offset_t va) 385{ 386 pd_entry_t *pde; 387 388 pde = pmap_pde(pmap, va); 389 if (pde == NULL || *pde == NULL) 390 return (NULL); 391 392 return (pmap_pde_to_pte(pde, va)); 393} 394 395vm_offset_t 396pmap_steal_memory(vm_size_t size) 397{ 398 vm_size_t bank_size; 399 vm_offset_t pa, va; 400 401 size = round_page(size); 402 403 bank_size = phys_avail[1] - phys_avail[0]; 404 while (size > bank_size) { 405 int i; 406 407 for (i = 0; phys_avail[i + 2]; i += 2) { 408 phys_avail[i] = phys_avail[i + 2]; 409 phys_avail[i + 1] = phys_avail[i + 3]; 410 } 411 phys_avail[i] = 0; 412 phys_avail[i + 1] = 0; 413 if (!phys_avail[0]) 414 panic("pmap_steal_memory: out of memory"); 415 bank_size = phys_avail[1] - phys_avail[0]; 416 } 417 418 pa = phys_avail[0]; 419 phys_avail[0] += size; 420 if (MIPS_DIRECT_MAPPABLE(pa) == 0) 421 panic("Out of memory below 512Meg?"); 422 va = MIPS_PHYS_TO_DIRECT(pa); 423 bzero((caddr_t)va, size); 424 return (va); 425} 426 427/* 428 * Bootstrap the system enough to run with virtual memory. This 429 * assumes that the phys_avail array has been initialized. 430 */ 431static void 432pmap_create_kernel_pagetable(void) 433{ 434 int i, j; 435 vm_offset_t ptaddr; 436 pt_entry_t *pte; 437#ifdef __mips_n64 438 pd_entry_t *pde; 439 vm_offset_t pdaddr; 440 int npt, npde; 441#endif 442 443 /* 444 * Allocate segment table for the kernel 445 */ 446 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 447 448 /* 449 * Allocate second level page tables for the kernel 450 */ 451#ifdef __mips_n64 452 npde = howmany(NKPT, NPDEPG); 453 pdaddr = pmap_steal_memory(PAGE_SIZE * npde); 454#endif 455 nkpt = NKPT; 456 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); 457 458 /* 459 * The R[4-7]?00 stores only one copy of the Global bit in the 460 * translation lookaside buffer for each 2 page entry. Thus invalid 461 * entrys must have the Global bit set so when Entry LO and Entry HI 462 * G bits are anded together they will produce a global bit to store 463 * in the tlb. 464 */ 465 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) 466 *pte = PTE_G; 467 468#ifdef __mips_n64 469 for (i = 0, npt = nkpt; npt > 0; i++) { 470 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); 471 pde = (pd_entry_t *)kernel_segmap[i]; 472 473 for (j = 0; j < NPDEPG && npt > 0; j++, npt--) 474 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); 475 } 476#else 477 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++) 478 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE)); 479#endif 480 481 PMAP_LOCK_INIT(kernel_pmap); 482 kernel_pmap->pm_segtab = kernel_segmap; 483 kernel_pmap->pm_active = ~0; 484 TAILQ_INIT(&kernel_pmap->pm_pvlist); 485 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 486 kernel_pmap->pm_asid[0].gen = 0; 487 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE; 488} 489 490void 491pmap_bootstrap(void) 492{ 493 int i; 494 int need_local_mappings = 0; 495 496 /* Sort. */ 497again: 498 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 499 /* 500 * Keep the memory aligned on page boundary. 501 */ 502 phys_avail[i] = round_page(phys_avail[i]); 503 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 504 505 if (i < 2) 506 continue; 507 if (phys_avail[i - 2] > phys_avail[i]) { 508 vm_paddr_t ptemp[2]; 509 510 ptemp[0] = phys_avail[i + 0]; 511 ptemp[1] = phys_avail[i + 1]; 512 513 phys_avail[i + 0] = phys_avail[i - 2]; 514 phys_avail[i + 1] = phys_avail[i - 1]; 515 516 phys_avail[i - 2] = ptemp[0]; 517 phys_avail[i - 1] = ptemp[1]; 518 goto again; 519 } 520 } 521 522 /* 523 * In 32 bit, we may have memory which cannot be mapped directly 524 * this memory will need temporary mapping before it can be 525 * accessed. 526 */ 527 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1])) 528 need_local_mappings = 1; 529 530 /* 531 * Copy the phys_avail[] array before we start stealing memory from it. 532 */ 533 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 534 physmem_desc[i] = phys_avail[i]; 535 physmem_desc[i + 1] = phys_avail[i + 1]; 536 } 537 538 Maxmem = atop(phys_avail[i - 1]); 539 540 if (bootverbose) { 541 printf("Physical memory chunk(s):\n"); 542 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 543 vm_paddr_t size; 544 545 size = phys_avail[i + 1] - phys_avail[i]; 546 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 547 (uintmax_t) phys_avail[i], 548 (uintmax_t) phys_avail[i + 1] - 1, 549 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 550 } 551 printf("Maxmem is 0x%0lx\n", ptoa(Maxmem)); 552 } 553 /* 554 * Steal the message buffer from the beginning of memory. 555 */ 556 msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); 557 msgbufinit(msgbufp, MSGBUF_SIZE); 558 559 /* 560 * Steal thread0 kstack. 561 */ 562 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 563 564 virtual_avail = VM_MIN_KERNEL_ADDRESS; 565 virtual_end = VM_MAX_KERNEL_ADDRESS; 566 567#ifdef SMP 568 /* 569 * Steal some virtual address space to map the pcpu area. 570 */ 571 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 572 pcpup = (struct pcpu *)virtual_avail; 573 virtual_avail += PAGE_SIZE * 2; 574 575 /* 576 * Initialize the wired TLB entry mapping the pcpu region for 577 * the BSP at 'pcpup'. Up until this point we were operating 578 * with the 'pcpup' for the BSP pointing to a virtual address 579 * in KSEG0 so there was no need for a TLB mapping. 580 */ 581 mips_pcpu_tlb_init(PCPU_ADDR(0)); 582 583 if (bootverbose) 584 printf("pcpu is available at virtual address %p.\n", pcpup); 585#endif 586 587 if (need_local_mappings) 588 pmap_alloc_lmem_map(); 589 pmap_create_kernel_pagetable(); 590 pmap_max_asid = VMNUM_PIDS; 591 mips_wr_entryhi(0); 592 mips_wr_pagemask(0); 593} 594 595/* 596 * Initialize a vm_page's machine-dependent fields. 597 */ 598void 599pmap_page_init(vm_page_t m) 600{ 601 602 TAILQ_INIT(&m->md.pv_list); 603 m->md.pv_list_count = 0; 604 m->md.pv_flags = 0; 605} 606 607/* 608 * Initialize the pmap module. 609 * Called by vm_init, to initialize any structures that the pmap 610 * system needs to map virtual memory. 611 * pmap_init has been enhanced to support in a fairly consistant 612 * way, discontiguous physical memory. 613 */ 614void 615pmap_init(void) 616{ 617 618 /* 619 * Initialize the address space (zone) for the pv entries. Set a 620 * high water mark so that the system can recover from excessive 621 * numbers of pv entries. 622 */ 623 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 624 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 625 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; 626 pv_entry_high_water = 9 * (pv_entry_max / 10); 627 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 628} 629 630/*************************************************** 631 * Low level helper routines..... 632 ***************************************************/ 633 634static __inline void 635pmap_invalidate_all_local(pmap_t pmap) 636{ 637 638 if (pmap == kernel_pmap) { 639 tlb_invalidate_all(); 640 return; 641 } 642 if (pmap->pm_active & PCPU_GET(cpumask)) 643 tlb_invalidate_all_user(pmap); 644 else 645 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 646} 647 648#ifdef SMP 649static void 650pmap_invalidate_all(pmap_t pmap) 651{ 652 653 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap); 654} 655 656static void 657pmap_invalidate_all_action(void *arg) 658{ 659 660 pmap_invalidate_all_local((pmap_t)arg); 661} 662#else 663static void 664pmap_invalidate_all(pmap_t pmap) 665{ 666 667 pmap_invalidate_all_local(pmap); 668} 669#endif 670 671static __inline void 672pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) 673{ 674 675 if (is_kernel_pmap(pmap)) { 676 tlb_invalidate_address(pmap, va); 677 return; 678 } 679 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 680 return; 681 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 682 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 683 return; 684 } 685 tlb_invalidate_address(pmap, va); 686} 687 688#ifdef SMP 689struct pmap_invalidate_page_arg { 690 pmap_t pmap; 691 vm_offset_t va; 692}; 693 694static void 695pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 696{ 697 struct pmap_invalidate_page_arg arg; 698 699 arg.pmap = pmap; 700 arg.va = va; 701 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg); 702} 703 704static void 705pmap_invalidate_page_action(void *arg) 706{ 707 struct pmap_invalidate_page_arg *p = arg; 708 709 pmap_invalidate_page_local(p->pmap, p->va); 710} 711#else 712static void 713pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 714{ 715 716 pmap_invalidate_page_local(pmap, va); 717} 718#endif 719 720static __inline void 721pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 722{ 723 724 if (is_kernel_pmap(pmap)) { 725 tlb_update(pmap, va, pte); 726 return; 727 } 728 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 729 return; 730 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 731 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 732 return; 733 } 734 tlb_update(pmap, va, pte); 735} 736 737#ifdef SMP 738struct pmap_update_page_arg { 739 pmap_t pmap; 740 vm_offset_t va; 741 pt_entry_t pte; 742}; 743 744static void 745pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 746{ 747 struct pmap_update_page_arg arg; 748 749 arg.pmap = pmap; 750 arg.va = va; 751 arg.pte = pte; 752 smp_rendezvous(0, pmap_update_page_action, 0, &arg); 753} 754 755static void 756pmap_update_page_action(void *arg) 757{ 758 struct pmap_update_page_arg *p = arg; 759 760 pmap_update_page_local(p->pmap, p->va, p->pte); 761} 762#else 763static void 764pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 765{ 766 767 pmap_update_page_local(pmap, va, pte); 768} 769#endif 770 771/* 772 * Routine: pmap_extract 773 * Function: 774 * Extract the physical page address associated 775 * with the given map/virtual_address pair. 776 */ 777vm_paddr_t 778pmap_extract(pmap_t pmap, vm_offset_t va) 779{ 780 pt_entry_t *pte; 781 vm_offset_t retval = 0; 782 783 PMAP_LOCK(pmap); 784 pte = pmap_pte(pmap, va); 785 if (pte) { 786 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); 787 } 788 PMAP_UNLOCK(pmap); 789 return (retval); 790} 791 792/* 793 * Routine: pmap_extract_and_hold 794 * Function: 795 * Atomically extract and hold the physical page 796 * with the given pmap and virtual address pair 797 * if that mapping permits the given protection. 798 */ 799vm_page_t 800pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 801{ 802 pt_entry_t pte; 803 vm_page_t m; 804 vm_paddr_t pa; 805 806 m = NULL; 807 pa = 0; 808 PMAP_LOCK(pmap); 809retry: 810 pte = *pmap_pte(pmap, va); 811 if (pte != 0 && pte_test(&pte, PTE_V) && 812 (pte_test(&pte, PTE_D) || (prot & VM_PROT_WRITE) == 0)) { 813 if (vm_page_pa_tryrelock(pmap, TLBLO_PTE_TO_PA(pte), &pa)) 814 goto retry; 815 816 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte)); 817 vm_page_hold(m); 818 } 819 PA_UNLOCK_COND(pa); 820 PMAP_UNLOCK(pmap); 821 return (m); 822} 823 824/*************************************************** 825 * Low level mapping routines..... 826 ***************************************************/ 827 828/* 829 * add a wired page to the kva 830 */ 831 /* PMAP_INLINE */ void 832pmap_kenter(vm_offset_t va, vm_paddr_t pa) 833{ 834 pt_entry_t *pte; 835 pt_entry_t opte, npte; 836 837#ifdef PMAP_DEBUG 838 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 839#endif 840 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W; 841 842 if (is_cacheable_mem(pa)) 843 npte |= PTE_C_CACHE; 844 else 845 npte |= PTE_C_UNCACHED; 846 847 pte = pmap_pte(kernel_pmap, va); 848 opte = *pte; 849 *pte = npte; 850 if (pte_test(&opte, PTE_V) && opte != npte) 851 pmap_update_page(kernel_pmap, va, npte); 852} 853 854/* 855 * remove a page from the kernel pagetables 856 */ 857 /* PMAP_INLINE */ void 858pmap_kremove(vm_offset_t va) 859{ 860 pt_entry_t *pte; 861 862 /* 863 * Write back all caches from the page being destroyed 864 */ 865 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 866 867 pte = pmap_pte(kernel_pmap, va); 868 *pte = PTE_G; 869 pmap_invalidate_page(kernel_pmap, va); 870} 871 872/* 873 * Used to map a range of physical addresses into kernel 874 * virtual address space. 875 * 876 * The value passed in '*virt' is a suggested virtual address for 877 * the mapping. Architectures which can support a direct-mapped 878 * physical to virtual region can return the appropriate address 879 * within that region, leaving '*virt' unchanged. Other 880 * architectures should map the pages starting at '*virt' and 881 * update '*virt' with the first usable address after the mapped 882 * region. 883 * 884 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 885 */ 886vm_offset_t 887pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 888{ 889 vm_offset_t va, sva; 890 891 if (MIPS_DIRECT_MAPPABLE(end)) 892 return (MIPS_PHYS_TO_DIRECT(start)); 893 894 va = sva = *virt; 895 while (start < end) { 896 pmap_kenter(va, start); 897 va += PAGE_SIZE; 898 start += PAGE_SIZE; 899 } 900 *virt = va; 901 return (sva); 902} 903 904/* 905 * Add a list of wired pages to the kva 906 * this routine is only used for temporary 907 * kernel mappings that do not need to have 908 * page modification or references recorded. 909 * Note that old mappings are simply written 910 * over. The page *must* be wired. 911 */ 912void 913pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 914{ 915 int i; 916 vm_offset_t origva = va; 917 918 for (i = 0; i < count; i++) { 919 pmap_flush_pvcache(m[i]); 920 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 921 va += PAGE_SIZE; 922 } 923 924 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 925} 926 927/* 928 * this routine jerks page mappings from the 929 * kernel -- it is meant only for temporary mappings. 930 */ 931void 932pmap_qremove(vm_offset_t va, int count) 933{ 934 /* 935 * No need to wb/inv caches here, 936 * pmap_kremove will do it for us 937 */ 938 939 while (count-- > 0) { 940 pmap_kremove(va); 941 va += PAGE_SIZE; 942 } 943} 944 945/*************************************************** 946 * Page table page management routines..... 947 ***************************************************/ 948 949/* Revision 1.507 950 * 951 * Simplify the reference counting of page table pages. Specifically, use 952 * the page table page's wired count rather than its hold count to contain 953 * the reference count. 954 */ 955 956/* 957 * This routine unholds page table pages, and if the hold count 958 * drops to zero, then it decrements the wire count. 959 */ 960static PMAP_INLINE int 961pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 962{ 963 --m->wire_count; 964 if (m->wire_count == 0) 965 return (_pmap_unwire_pte_hold(pmap, va, m)); 966 else 967 return (0); 968} 969 970static int 971_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 972{ 973 pd_entry_t *pde; 974 975 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 976 /* 977 * unmap the page table page 978 */ 979#ifdef __mips_n64 980 if (m->pindex < NUPDE) 981 pde = pmap_pde(pmap, va); 982 else 983 pde = pmap_segmap(pmap, va); 984#else 985 pde = pmap_pde(pmap, va); 986#endif 987 *pde = 0; 988 pmap->pm_stats.resident_count--; 989 990#ifdef __mips_n64 991 if (m->pindex < NUPDE) { 992 pd_entry_t *pdp; 993 vm_page_t pdpg; 994 995 /* 996 * Recursively decrement next level pagetable refcount 997 */ 998 pdp = (pd_entry_t *)*pmap_segmap(pmap, va); 999 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); 1000 pmap_unwire_pte_hold(pmap, va, pdpg); 1001 } 1002#endif 1003 if (pmap->pm_ptphint == m) 1004 pmap->pm_ptphint = NULL; 1005 1006 /* 1007 * If the page is finally unwired, simply free it. 1008 */ 1009 vm_page_free_zero(m); 1010 atomic_subtract_int(&cnt.v_wire_count, 1); 1011 return (1); 1012} 1013 1014/* 1015 * After removing a page table entry, this routine is used to 1016 * conditionally free the page, and manage the hold/wire counts. 1017 */ 1018static int 1019pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1020{ 1021 unsigned ptepindex; 1022 pd_entry_t pteva; 1023 1024 if (va >= VM_MAXUSER_ADDRESS) 1025 return (0); 1026 1027 if (mpte == NULL) { 1028 ptepindex = pmap_pde_pindex(va); 1029 if (pmap->pm_ptphint && 1030 (pmap->pm_ptphint->pindex == ptepindex)) { 1031 mpte = pmap->pm_ptphint; 1032 } else { 1033 pteva = *pmap_pde(pmap, va); 1034 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pteva)); 1035 pmap->pm_ptphint = mpte; 1036 } 1037 } 1038 return (pmap_unwire_pte_hold(pmap, va, mpte)); 1039} 1040 1041void 1042pmap_pinit0(pmap_t pmap) 1043{ 1044 int i; 1045 1046 PMAP_LOCK_INIT(pmap); 1047 pmap->pm_segtab = kernel_segmap; 1048 pmap->pm_active = 0; 1049 pmap->pm_ptphint = NULL; 1050 for (i = 0; i < MAXCPU; i++) { 1051 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1052 pmap->pm_asid[i].gen = 0; 1053 } 1054 PCPU_SET(curpmap, pmap); 1055 TAILQ_INIT(&pmap->pm_pvlist); 1056 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1057} 1058 1059static void 1060pmap_grow_pte_page_cache() 1061{ 1062 1063#ifdef __mips_n64 1064 vm_contig_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS); 1065#else 1066 vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); 1067#endif 1068} 1069 1070static vm_page_t 1071pmap_alloc_pte_page(unsigned int index, int req) 1072{ 1073 vm_page_t m; 1074 1075 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, 0, req); 1076 if (m == NULL) 1077 return (NULL); 1078 1079 if ((m->flags & PG_ZERO) == 0) 1080 pmap_zero_page(m); 1081 1082 m->pindex = index; 1083 atomic_add_int(&cnt.v_wire_count, 1); 1084 m->wire_count = 1; 1085 return (m); 1086} 1087 1088/* 1089 * Initialize a preallocated and zeroed pmap structure, 1090 * such as one in a vmspace structure. 1091 */ 1092int 1093pmap_pinit(pmap_t pmap) 1094{ 1095 vm_offset_t ptdva; 1096 vm_page_t ptdpg; 1097 int i; 1098 1099 PMAP_LOCK_INIT(pmap); 1100 1101 /* 1102 * allocate the page directory page 1103 */ 1104 while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) 1105 pmap_grow_pte_page_cache(); 1106 1107 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); 1108 pmap->pm_segtab = (pd_entry_t *)ptdva; 1109 pmap->pm_active = 0; 1110 pmap->pm_ptphint = NULL; 1111 for (i = 0; i < MAXCPU; i++) { 1112 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1113 pmap->pm_asid[i].gen = 0; 1114 } 1115 TAILQ_INIT(&pmap->pm_pvlist); 1116 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1117 1118 return (1); 1119} 1120 1121/* 1122 * this routine is called if the page table page is not 1123 * mapped correctly. 1124 */ 1125static vm_page_t 1126_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1127{ 1128 vm_offset_t pageva; 1129 vm_page_t m; 1130 1131 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1132 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1133 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1134 1135 /* 1136 * Find or fabricate a new pagetable page 1137 */ 1138 if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { 1139 if (flags & M_WAITOK) { 1140 PMAP_UNLOCK(pmap); 1141 vm_page_unlock_queues(); 1142 pmap_grow_pte_page_cache(); 1143 vm_page_lock_queues(); 1144 PMAP_LOCK(pmap); 1145 } 1146 1147 /* 1148 * Indicate the need to retry. While waiting, the page 1149 * table page may have been allocated. 1150 */ 1151 return (NULL); 1152 } 1153 1154 /* 1155 * Map the pagetable page into the process address space, if it 1156 * isn't already there. 1157 */ 1158 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1159 1160#ifdef __mips_n64 1161 if (ptepindex >= NUPDE) { 1162 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; 1163 } else { 1164 pd_entry_t *pdep, *pde; 1165 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); 1166 int pdeindex = ptepindex & (NPDEPG - 1); 1167 vm_page_t pg; 1168 1169 pdep = &pmap->pm_segtab[segindex]; 1170 if (*pdep == NULL) { 1171 /* recurse for allocating page dir */ 1172 if (_pmap_allocpte(pmap, NUPDE + segindex, 1173 flags) == NULL) { 1174 /* alloc failed, release current */ 1175 --m->wire_count; 1176 atomic_subtract_int(&cnt.v_wire_count, 1); 1177 vm_page_free_zero(m); 1178 return (NULL); 1179 } 1180 } else { 1181 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep)); 1182 pg->wire_count++; 1183 } 1184 /* Next level entry */ 1185 pde = (pd_entry_t *)*pdep; 1186 pde[pdeindex] = (pd_entry_t)pageva; 1187 pmap->pm_ptphint = m; 1188 } 1189#else 1190 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva; 1191#endif 1192 pmap->pm_stats.resident_count++; 1193 1194 /* 1195 * Set the page table hint 1196 */ 1197 pmap->pm_ptphint = m; 1198 return (m); 1199} 1200 1201static vm_page_t 1202pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1203{ 1204 unsigned ptepindex; 1205 pd_entry_t *pde; 1206 vm_page_t m; 1207 1208 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1209 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1210 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1211 1212 /* 1213 * Calculate pagetable page index 1214 */ 1215 ptepindex = pmap_pde_pindex(va); 1216retry: 1217 /* 1218 * Get the page directory entry 1219 */ 1220 pde = pmap_pde(pmap, va); 1221 1222 /* 1223 * If the page table page is mapped, we just increment the hold 1224 * count, and activate it. 1225 */ 1226 if (pde != NULL && *pde != NULL) { 1227 /* 1228 * In order to get the page table page, try the hint first. 1229 */ 1230 if (pmap->pm_ptphint && 1231 (pmap->pm_ptphint->pindex == ptepindex)) { 1232 m = pmap->pm_ptphint; 1233 } else { 1234 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde)); 1235 pmap->pm_ptphint = m; 1236 } 1237 m->wire_count++; 1238 } else { 1239 /* 1240 * Here if the pte page isn't mapped, or if it has been 1241 * deallocated. 1242 */ 1243 m = _pmap_allocpte(pmap, ptepindex, flags); 1244 if (m == NULL && (flags & M_WAITOK)) 1245 goto retry; 1246 } 1247 return (m); 1248} 1249 1250 1251/*************************************************** 1252* Pmap allocation/deallocation routines. 1253 ***************************************************/ 1254/* 1255 * Revision 1.397 1256 * - Merged pmap_release and pmap_release_free_page. When pmap_release is 1257 * called only the page directory page(s) can be left in the pmap pte 1258 * object, since all page table pages will have been freed by 1259 * pmap_remove_pages and pmap_remove. In addition, there can only be one 1260 * reference to the pmap and the page directory is wired, so the page(s) 1261 * can never be busy. So all there is to do is clear the magic mappings 1262 * from the page directory and free the page(s). 1263 */ 1264 1265 1266/* 1267 * Release any resources held by the given physical map. 1268 * Called when a pmap initialized by pmap_pinit is being released. 1269 * Should only be called if the map contains no valid mappings. 1270 */ 1271void 1272pmap_release(pmap_t pmap) 1273{ 1274 vm_offset_t ptdva; 1275 vm_page_t ptdpg; 1276 1277 KASSERT(pmap->pm_stats.resident_count == 0, 1278 ("pmap_release: pmap resident count %ld != 0", 1279 pmap->pm_stats.resident_count)); 1280 1281 ptdva = (vm_offset_t)pmap->pm_segtab; 1282 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); 1283 1284 ptdpg->wire_count--; 1285 atomic_subtract_int(&cnt.v_wire_count, 1); 1286 vm_page_free_zero(ptdpg); 1287 PMAP_LOCK_DESTROY(pmap); 1288} 1289 1290/* 1291 * grow the number of kernel page table entries, if needed 1292 */ 1293void 1294pmap_growkernel(vm_offset_t addr) 1295{ 1296 vm_page_t nkpg; 1297 pd_entry_t *pde, *pdpe; 1298 pt_entry_t *pte; 1299 int i; 1300 1301 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1302 addr = roundup2(addr, NBSEG); 1303 if (addr - 1 >= kernel_map->max_offset) 1304 addr = kernel_map->max_offset; 1305 while (kernel_vm_end < addr) { 1306 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); 1307#ifdef __mips_n64 1308 if (*pdpe == 0) { 1309 /* new intermediate page table entry */ 1310 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); 1311 if (nkpg == NULL) 1312 panic("pmap_growkernel: no memory to grow kernel"); 1313 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1314 continue; /* try again */ 1315 } 1316#endif 1317 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); 1318 if (*pde != 0) { 1319 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1320 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1321 kernel_vm_end = kernel_map->max_offset; 1322 break; 1323 } 1324 continue; 1325 } 1326 1327 /* 1328 * This index is bogus, but out of the way 1329 */ 1330 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); 1331 if (!nkpg) 1332 panic("pmap_growkernel: no memory to grow kernel"); 1333 nkpt++; 1334 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1335 1336 /* 1337 * The R[4-7]?00 stores only one copy of the Global bit in 1338 * the translation lookaside buffer for each 2 page entry. 1339 * Thus invalid entrys must have the Global bit set so when 1340 * Entry LO and Entry HI G bits are anded together they will 1341 * produce a global bit to store in the tlb. 1342 */ 1343 pte = (pt_entry_t *)*pde; 1344 for (i = 0; i < NPTEPG; i++) 1345 pte[i] = PTE_G; 1346 1347 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1348 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1349 kernel_vm_end = kernel_map->max_offset; 1350 break; 1351 } 1352 } 1353} 1354 1355/*************************************************** 1356* page management routines. 1357 ***************************************************/ 1358 1359/* 1360 * free the pv_entry back to the free list 1361 */ 1362static PMAP_INLINE void 1363free_pv_entry(pv_entry_t pv) 1364{ 1365 1366 pv_entry_count--; 1367 uma_zfree(pvzone, pv); 1368} 1369 1370/* 1371 * get a new pv_entry, allocating a block from the system 1372 * when needed. 1373 * the memory allocation is performed bypassing the malloc code 1374 * because of the possibility of allocations at interrupt time. 1375 */ 1376static pv_entry_t 1377get_pv_entry(pmap_t locked_pmap) 1378{ 1379 static const struct timeval printinterval = { 60, 0 }; 1380 static struct timeval lastprint; 1381 struct vpgqueues *vpq; 1382 pt_entry_t *pte, oldpte; 1383 pmap_t pmap; 1384 pv_entry_t allocated_pv, next_pv, pv; 1385 vm_offset_t va; 1386 vm_page_t m; 1387 1388 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1389 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1390 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 1391 if (allocated_pv != NULL) { 1392 pv_entry_count++; 1393 if (pv_entry_count > pv_entry_high_water) 1394 pagedaemon_wakeup(); 1395 else 1396 return (allocated_pv); 1397 } 1398 /* 1399 * Reclaim pv entries: At first, destroy mappings to inactive 1400 * pages. After that, if a pv entry is still needed, destroy 1401 * mappings to active pages. 1402 */ 1403 if (ratecheck(&lastprint, &printinterval)) 1404 printf("Approaching the limit on PV entries, " 1405 "increase the vm.pmap.shpgperproc tunable.\n"); 1406 vpq = &vm_page_queues[PQ_INACTIVE]; 1407retry: 1408 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1409 if (m->hold_count || m->busy) 1410 continue; 1411 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1412 va = pv->pv_va; 1413 pmap = pv->pv_pmap; 1414 /* Avoid deadlock and lock recursion. */ 1415 if (pmap > locked_pmap) 1416 PMAP_LOCK(pmap); 1417 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1418 continue; 1419 pmap->pm_stats.resident_count--; 1420 pte = pmap_pte(pmap, va); 1421 KASSERT(pte != NULL, ("pte")); 1422 oldpte = *pte; 1423 if (is_kernel_pmap(pmap)) 1424 *pte = PTE_G; 1425 else 1426 *pte = 0; 1427 KASSERT(!pte_test(&oldpte, PTE_W), 1428 ("wired pte for unwired page")); 1429 if (m->md.pv_flags & PV_TABLE_REF) 1430 vm_page_flag_set(m, PG_REFERENCED); 1431 if (pte_test(&oldpte, PTE_D)) 1432 vm_page_dirty(m); 1433 pmap_invalidate_page(pmap, va); 1434 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1435 m->md.pv_list_count--; 1436 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1437 pmap_unuse_pt(pmap, va, pv->pv_ptem); 1438 if (pmap != locked_pmap) 1439 PMAP_UNLOCK(pmap); 1440 if (allocated_pv == NULL) 1441 allocated_pv = pv; 1442 else 1443 free_pv_entry(pv); 1444 } 1445 if (TAILQ_EMPTY(&m->md.pv_list)) { 1446 vm_page_flag_clear(m, PG_WRITEABLE); 1447 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1448 } 1449 } 1450 if (allocated_pv == NULL) { 1451 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 1452 vpq = &vm_page_queues[PQ_ACTIVE]; 1453 goto retry; 1454 } 1455 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 1456 } 1457 return (allocated_pv); 1458} 1459 1460/* 1461 * Revision 1.370 1462 * 1463 * Move pmap_collect() out of the machine-dependent code, rename it 1464 * to reflect its new location, and add page queue and flag locking. 1465 * 1466 * Notes: (1) alpha, i386, and ia64 had identical implementations 1467 * of pmap_collect() in terms of machine-independent interfaces; 1468 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. 1469 * 1470 * MIPS implementation was identical to alpha [Junos 8.2] 1471 */ 1472 1473/* 1474 * If it is the first entry on the list, it is actually 1475 * in the header and we must copy the following entry up 1476 * to the header. Otherwise we must search the list for 1477 * the entry. In either case we free the now unused entry. 1478 */ 1479 1480static pv_entry_t 1481pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1482{ 1483 pv_entry_t pv; 1484 1485 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1486 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1487 if (pvh->pv_list_count < pmap->pm_stats.resident_count) { 1488 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 1489 if (pmap == pv->pv_pmap && va == pv->pv_va) 1490 break; 1491 } 1492 } else { 1493 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1494 if (va == pv->pv_va) 1495 break; 1496 } 1497 } 1498 if (pv != NULL) { 1499 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 1500 pvh->pv_list_count--; 1501 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1502 } 1503 return (pv); 1504} 1505 1506static void 1507pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1508{ 1509 pv_entry_t pv; 1510 1511 pv = pmap_pvh_remove(pvh, pmap, va); 1512 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", 1513 (u_long)VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)), 1514 (u_long)va)); 1515 free_pv_entry(pv); 1516} 1517 1518static void 1519pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1520{ 1521 1522 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1523 pmap_pvh_free(&m->md, pmap, va); 1524 if (TAILQ_EMPTY(&m->md.pv_list)) 1525 vm_page_flag_clear(m, PG_WRITEABLE); 1526} 1527 1528/* 1529 * Conditionally create a pv entry. 1530 */ 1531static boolean_t 1532pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1533 vm_page_t m) 1534{ 1535 pv_entry_t pv; 1536 1537 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1538 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1539 if (pv_entry_count < pv_entry_high_water && 1540 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 1541 pv_entry_count++; 1542 pv->pv_va = va; 1543 pv->pv_pmap = pmap; 1544 pv->pv_ptem = mpte; 1545 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1546 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1547 m->md.pv_list_count++; 1548 return (TRUE); 1549 } else 1550 return (FALSE); 1551} 1552 1553/* 1554 * pmap_remove_pte: do the things to unmap a page in a process 1555 */ 1556static int 1557pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1558{ 1559 pt_entry_t oldpte; 1560 vm_page_t m; 1561 vm_offset_t pa; 1562 1563 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1564 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1565 1566 oldpte = *ptq; 1567 if (is_kernel_pmap(pmap)) 1568 *ptq = PTE_G; 1569 else 1570 *ptq = 0; 1571 1572 if (pte_test(&oldpte, PTE_W)) 1573 pmap->pm_stats.wired_count -= 1; 1574 1575 pmap->pm_stats.resident_count -= 1; 1576 pa = TLBLO_PTE_TO_PA(oldpte); 1577 1578 if (page_is_managed(pa)) { 1579 m = PHYS_TO_VM_PAGE(pa); 1580 if (pte_test(&oldpte, PTE_D)) { 1581 KASSERT(!pte_test(&oldpte, PTE_RO), 1582 ("%s: modified page not writable: va: %p, pte: 0x%x", 1583 __func__, (void *)va, oldpte)); 1584 vm_page_dirty(m); 1585 } 1586 if (m->md.pv_flags & PV_TABLE_REF) 1587 vm_page_flag_set(m, PG_REFERENCED); 1588 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1589 1590 pmap_remove_entry(pmap, m, va); 1591 } 1592 return (pmap_unuse_pt(pmap, va, NULL)); 1593} 1594 1595/* 1596 * Remove a single page from a process address space 1597 */ 1598static void 1599pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1600{ 1601 pt_entry_t *ptq; 1602 1603 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1604 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1605 ptq = pmap_pte(pmap, va); 1606 1607 /* 1608 * if there is no pte for this address, just skip it!!! 1609 */ 1610 if (!ptq || !pte_test(ptq, PTE_V)) { 1611 return; 1612 } 1613 1614 /* 1615 * Write back all caches from the page being destroyed 1616 */ 1617 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1618 1619 /* 1620 * get a local va for mappings for this pmap. 1621 */ 1622 (void)pmap_remove_pte(pmap, ptq, va); 1623 pmap_invalidate_page(pmap, va); 1624 1625 return; 1626} 1627 1628/* 1629 * Remove the given range of addresses from the specified map. 1630 * 1631 * It is assumed that the start and end are properly 1632 * rounded to the page size. 1633 */ 1634void 1635pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1636{ 1637 vm_offset_t va_next; 1638 pd_entry_t *pde, *pdpe; 1639 pt_entry_t *pte; 1640 1641 if (pmap == NULL) 1642 return; 1643 1644 if (pmap->pm_stats.resident_count == 0) 1645 return; 1646 1647 vm_page_lock_queues(); 1648 PMAP_LOCK(pmap); 1649 1650 /* 1651 * special handling of removing one page. a very common operation 1652 * and easy to short circuit some code. 1653 */ 1654 if ((sva + PAGE_SIZE) == eva) { 1655 pmap_remove_page(pmap, sva); 1656 goto out; 1657 } 1658 for (; sva < eva; sva = va_next) { 1659 pdpe = pmap_segmap(pmap, sva); 1660#ifdef __mips_n64 1661 if (*pdpe == 0) { 1662 va_next = (sva + NBSEG) & ~SEGMASK; 1663 if (va_next < sva) 1664 va_next = eva; 1665 continue; 1666 } 1667#endif 1668 va_next = (sva + NBPDR) & ~PDRMASK; 1669 if (va_next < sva) 1670 va_next = eva; 1671 1672 pde = pmap_pdpe_to_pde(pdpe, sva); 1673 if (*pde == 0) 1674 continue; 1675 if (va_next > eva) 1676 va_next = eva; 1677 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; 1678 pte++, sva += PAGE_SIZE) { 1679 pmap_remove_page(pmap, sva); 1680 } 1681 } 1682out: 1683 vm_page_unlock_queues(); 1684 PMAP_UNLOCK(pmap); 1685} 1686 1687/* 1688 * Routine: pmap_remove_all 1689 * Function: 1690 * Removes this physical page from 1691 * all physical maps in which it resides. 1692 * Reflects back modify bits to the pager. 1693 * 1694 * Notes: 1695 * Original versions of this routine were very 1696 * inefficient because they iteratively called 1697 * pmap_remove (slow...) 1698 */ 1699 1700void 1701pmap_remove_all(vm_page_t m) 1702{ 1703 pv_entry_t pv; 1704 pt_entry_t *pte, tpte; 1705 1706 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1707 ("pmap_remove_all: page %p is fictitious", m)); 1708 vm_page_lock_queues(); 1709 1710 if (m->md.pv_flags & PV_TABLE_REF) 1711 vm_page_flag_set(m, PG_REFERENCED); 1712 1713 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1714 PMAP_LOCK(pv->pv_pmap); 1715 1716 /* 1717 * If it's last mapping writeback all caches from 1718 * the page being destroyed 1719 */ 1720 if (m->md.pv_list_count == 1) 1721 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1722 1723 pv->pv_pmap->pm_stats.resident_count--; 1724 1725 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1726 1727 tpte = *pte; 1728 if (is_kernel_pmap(pv->pv_pmap)) 1729 *pte = PTE_G; 1730 else 1731 *pte = 0; 1732 1733 if (pte_test(&tpte, PTE_W)) 1734 pv->pv_pmap->pm_stats.wired_count--; 1735 1736 /* 1737 * Update the vm_page_t clean and reference bits. 1738 */ 1739 if (pte_test(&tpte, PTE_D)) { 1740 KASSERT(!pte_test(&tpte, PTE_RO), 1741 ("%s: modified page not writable: va: %p, pte: 0x%x", 1742 __func__, (void *)pv->pv_va, tpte)); 1743 vm_page_dirty(m); 1744 } 1745 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1746 1747 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1748 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1749 m->md.pv_list_count--; 1750 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1751 PMAP_UNLOCK(pv->pv_pmap); 1752 free_pv_entry(pv); 1753 } 1754 1755 vm_page_flag_clear(m, PG_WRITEABLE); 1756 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1757 vm_page_unlock_queues(); 1758} 1759 1760/* 1761 * Set the physical protection on the 1762 * specified range of this map as requested. 1763 */ 1764void 1765pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1766{ 1767 pt_entry_t *pte; 1768 pd_entry_t *pde, *pdpe; 1769 vm_offset_t va_next; 1770 1771 if (pmap == NULL) 1772 return; 1773 1774 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1775 pmap_remove(pmap, sva, eva); 1776 return; 1777 } 1778 if (prot & VM_PROT_WRITE) 1779 return; 1780 1781 vm_page_lock_queues(); 1782 PMAP_LOCK(pmap); 1783 for (; sva < eva; sva = va_next) { 1784 pt_entry_t pbits; 1785 vm_page_t m; 1786 vm_paddr_t pa; 1787 1788 pdpe = pmap_segmap(pmap, sva); 1789#ifdef __mips_n64 1790 if (*pdpe == 0) { 1791 va_next = (sva + NBSEG) & ~SEGMASK; 1792 if (va_next < sva) 1793 va_next = eva; 1794 continue; 1795 } 1796#endif 1797 va_next = (sva + NBPDR) & ~PDRMASK; 1798 if (va_next < sva) 1799 va_next = eva; 1800 1801 pde = pmap_pdpe_to_pde(pdpe, sva); 1802 if (pde == NULL || *pde == NULL) 1803 continue; 1804 if (va_next > eva) 1805 va_next = eva; 1806 1807 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1808 sva += PAGE_SIZE) { 1809 1810 /* Skip invalid PTEs */ 1811 if (!pte_test(pte, PTE_V)) 1812 continue; 1813 pbits = *pte; 1814 pa = TLBLO_PTE_TO_PA(pbits); 1815 if (page_is_managed(pa) && pte_test(&pbits, PTE_D)) { 1816 m = PHYS_TO_VM_PAGE(pa); 1817 vm_page_dirty(m); 1818 m->md.pv_flags &= ~PV_TABLE_MOD; 1819 } 1820 pte_clear(&pbits, PTE_D); 1821 pte_set(&pbits, PTE_RO); 1822 1823 if (pbits != *pte) { 1824 *pte = pbits; 1825 pmap_update_page(pmap, sva, pbits); 1826 } 1827 } 1828 } 1829 vm_page_unlock_queues(); 1830 PMAP_UNLOCK(pmap); 1831} 1832 1833/* 1834 * Insert the given physical page (p) at 1835 * the specified virtual address (v) in the 1836 * target physical map with the protection requested. 1837 * 1838 * If specified, the page will be wired down, meaning 1839 * that the related pte can not be reclaimed. 1840 * 1841 * NB: This is the only routine which MAY NOT lazy-evaluate 1842 * or lose information. That is, this routine must actually 1843 * insert this page into the given map NOW. 1844 */ 1845void 1846pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1847 vm_prot_t prot, boolean_t wired) 1848{ 1849 vm_offset_t pa, opa; 1850 pt_entry_t *pte; 1851 pt_entry_t origpte, newpte; 1852 pv_entry_t pv; 1853 vm_page_t mpte, om; 1854 int rw = 0; 1855 1856 if (pmap == NULL) 1857 return; 1858 1859 va &= ~PAGE_MASK; 1860 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1861 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1862 (m->oflags & VPO_BUSY) != 0, 1863 ("pmap_enter: page %p is not busy", m)); 1864 1865 mpte = NULL; 1866 1867 vm_page_lock_queues(); 1868 PMAP_LOCK(pmap); 1869 1870 /* 1871 * In the case that a page table page is not resident, we are 1872 * creating it here. 1873 */ 1874 if (va < VM_MAXUSER_ADDRESS) { 1875 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1876 } 1877 pte = pmap_pte(pmap, va); 1878 1879 /* 1880 * Page Directory table entry not valid, we need a new PT page 1881 */ 1882 if (pte == NULL) { 1883 panic("pmap_enter: invalid page directory, pdir=%p, va=%p", 1884 (void *)pmap->pm_segtab, (void *)va); 1885 } 1886 pa = VM_PAGE_TO_PHYS(m); 1887 om = NULL; 1888 origpte = *pte; 1889 opa = TLBLO_PTE_TO_PA(origpte); 1890 1891 /* 1892 * Mapping has not changed, must be protection or wiring change. 1893 */ 1894 if (pte_test(&origpte, PTE_V) && opa == pa) { 1895 /* 1896 * Wiring change, just update stats. We don't worry about 1897 * wiring PT pages as they remain resident as long as there 1898 * are valid mappings in them. Hence, if a user page is 1899 * wired, the PT page will be also. 1900 */ 1901 if (wired && !pte_test(&origpte, PTE_W)) 1902 pmap->pm_stats.wired_count++; 1903 else if (!wired && pte_test(&origpte, PTE_W)) 1904 pmap->pm_stats.wired_count--; 1905 1906 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO), 1907 ("%s: modified page not writable: va: %p, pte: 0x%x", 1908 __func__, (void *)va, origpte)); 1909 1910 /* 1911 * Remove extra pte reference 1912 */ 1913 if (mpte) 1914 mpte->wire_count--; 1915 1916 if (page_is_managed(opa)) { 1917 om = m; 1918 } 1919 goto validate; 1920 } 1921 1922 pv = NULL; 1923 1924 /* 1925 * Mapping has changed, invalidate old range and fall through to 1926 * handle validating new mapping. 1927 */ 1928 if (opa) { 1929 if (pte_test(&origpte, PTE_W)) 1930 pmap->pm_stats.wired_count--; 1931 1932 if (page_is_managed(opa)) { 1933 om = PHYS_TO_VM_PAGE(opa); 1934 pv = pmap_pvh_remove(&om->md, pmap, va); 1935 } 1936 if (mpte != NULL) { 1937 mpte->wire_count--; 1938 KASSERT(mpte->wire_count > 0, 1939 ("pmap_enter: missing reference to page table page," 1940 " va: %p", (void *)va)); 1941 } 1942 } else 1943 pmap->pm_stats.resident_count++; 1944 1945 /* 1946 * Enter on the PV list if part of our managed memory. Note that we 1947 * raise IPL while manipulating pv_table since pmap_enter can be 1948 * called at interrupt time. 1949 */ 1950 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1951 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1952 ("pmap_enter: managed mapping within the clean submap")); 1953 if (pv == NULL) 1954 pv = get_pv_entry(pmap); 1955 pv->pv_va = va; 1956 pv->pv_pmap = pmap; 1957 pv->pv_ptem = mpte; 1958 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1959 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1960 m->md.pv_list_count++; 1961 } else if (pv != NULL) 1962 free_pv_entry(pv); 1963 1964 /* 1965 * Increment counters 1966 */ 1967 if (wired) 1968 pmap->pm_stats.wired_count++; 1969 1970validate: 1971 if ((access & VM_PROT_WRITE) != 0) 1972 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; 1973 rw = init_pte_prot(va, m, prot); 1974 1975#ifdef PMAP_DEBUG 1976 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 1977#endif 1978 /* 1979 * Now validate mapping with desired protection/wiring. 1980 */ 1981 newpte = TLBLO_PA_TO_PFN(pa) | rw | PTE_V; 1982 1983 if (is_cacheable_mem(pa)) 1984 newpte |= PTE_C_CACHE; 1985 else 1986 newpte |= PTE_C_UNCACHED; 1987 1988 if (wired) 1989 newpte |= PTE_W; 1990 1991 if (is_kernel_pmap(pmap)) 1992 newpte |= PTE_G; 1993 1994 /* 1995 * if the mapping or permission bits are different, we need to 1996 * update the pte. 1997 */ 1998 if (origpte != newpte) { 1999 if (pte_test(&origpte, PTE_V)) { 2000 *pte = newpte; 2001 if (page_is_managed(opa) && (opa != pa)) { 2002 if (om->md.pv_flags & PV_TABLE_REF) 2003 vm_page_flag_set(om, PG_REFERENCED); 2004 om->md.pv_flags &= 2005 ~(PV_TABLE_REF | PV_TABLE_MOD); 2006 } 2007 if (pte_test(&origpte, PTE_D)) { 2008 KASSERT(!pte_test(&origpte, PTE_RO), 2009 ("pmap_enter: modified page not writable:" 2010 " va: %p, pte: 0x%x", (void *)va, origpte)); 2011 if (page_is_managed(opa)) 2012 vm_page_dirty(om); 2013 } 2014 if (page_is_managed(opa) && 2015 TAILQ_EMPTY(&om->md.pv_list)) 2016 vm_page_flag_clear(om, PG_WRITEABLE); 2017 } else { 2018 *pte = newpte; 2019 } 2020 } 2021 pmap_update_page(pmap, va, newpte); 2022 2023 /* 2024 * Sync I & D caches for executable pages. Do this only if the the 2025 * target pmap belongs to the current process. Otherwise, an 2026 * unresolvable TLB miss may occur. 2027 */ 2028 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 2029 (prot & VM_PROT_EXECUTE)) { 2030 mips_icache_sync_range(va, PAGE_SIZE); 2031 mips_dcache_wbinv_range(va, PAGE_SIZE); 2032 } 2033 vm_page_unlock_queues(); 2034 PMAP_UNLOCK(pmap); 2035} 2036 2037/* 2038 * this code makes some *MAJOR* assumptions: 2039 * 1. Current pmap & pmap exists. 2040 * 2. Not wired. 2041 * 3. Read access. 2042 * 4. No page table pages. 2043 * but is *MUCH* faster than pmap_enter... 2044 */ 2045 2046void 2047pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2048{ 2049 2050 vm_page_lock_queues(); 2051 PMAP_LOCK(pmap); 2052 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2053 vm_page_unlock_queues(); 2054 PMAP_UNLOCK(pmap); 2055} 2056 2057static vm_page_t 2058pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2059 vm_prot_t prot, vm_page_t mpte) 2060{ 2061 pt_entry_t *pte; 2062 vm_offset_t pa; 2063 2064 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2065 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 2066 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2067 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2068 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2069 2070 /* 2071 * In the case that a page table page is not resident, we are 2072 * creating it here. 2073 */ 2074 if (va < VM_MAXUSER_ADDRESS) { 2075 pd_entry_t *pde; 2076 unsigned ptepindex; 2077 2078 /* 2079 * Calculate pagetable page index 2080 */ 2081 ptepindex = pmap_pde_pindex(va); 2082 if (mpte && (mpte->pindex == ptepindex)) { 2083 mpte->wire_count++; 2084 } else { 2085 /* 2086 * Get the page directory entry 2087 */ 2088 pde = pmap_pde(pmap, va); 2089 2090 /* 2091 * If the page table page is mapped, we just 2092 * increment the hold count, and activate it. 2093 */ 2094 if (pde && *pde != 0) { 2095 if (pmap->pm_ptphint && 2096 (pmap->pm_ptphint->pindex == ptepindex)) { 2097 mpte = pmap->pm_ptphint; 2098 } else { 2099 mpte = PHYS_TO_VM_PAGE( 2100 MIPS_DIRECT_TO_PHYS(*pde)); 2101 pmap->pm_ptphint = mpte; 2102 } 2103 mpte->wire_count++; 2104 } else { 2105 mpte = _pmap_allocpte(pmap, ptepindex, 2106 M_NOWAIT); 2107 if (mpte == NULL) 2108 return (mpte); 2109 } 2110 } 2111 } else { 2112 mpte = NULL; 2113 } 2114 2115 pte = pmap_pte(pmap, va); 2116 if (pte_test(pte, PTE_V)) { 2117 if (mpte != NULL) { 2118 mpte->wire_count--; 2119 mpte = NULL; 2120 } 2121 return (mpte); 2122 } 2123 2124 /* 2125 * Enter on the PV list if part of our managed memory. 2126 */ 2127 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2128 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2129 if (mpte != NULL) { 2130 pmap_unwire_pte_hold(pmap, va, mpte); 2131 mpte = NULL; 2132 } 2133 return (mpte); 2134 } 2135 2136 /* 2137 * Increment counters 2138 */ 2139 pmap->pm_stats.resident_count++; 2140 2141 pa = VM_PAGE_TO_PHYS(m); 2142 2143 /* 2144 * Now validate mapping with RO protection 2145 */ 2146 *pte = TLBLO_PA_TO_PFN(pa) | PTE_V; 2147 2148 if (is_cacheable_mem(pa)) 2149 *pte |= PTE_C_CACHE; 2150 else 2151 *pte |= PTE_C_UNCACHED; 2152 2153 if (is_kernel_pmap(pmap)) 2154 *pte |= PTE_G; 2155 else { 2156 *pte |= PTE_RO; 2157 /* 2158 * Sync I & D caches. Do this only if the the target pmap 2159 * belongs to the current process. Otherwise, an 2160 * unresolvable TLB miss may occur. */ 2161 if (pmap == &curproc->p_vmspace->vm_pmap) { 2162 va &= ~PAGE_MASK; 2163 mips_icache_sync_range(va, PAGE_SIZE); 2164 mips_dcache_wbinv_range(va, PAGE_SIZE); 2165 } 2166 } 2167 return (mpte); 2168} 2169 2170/* 2171 * Make a temporary mapping for a physical address. This is only intended 2172 * to be used for panic dumps. 2173 * 2174 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2175 */ 2176void * 2177pmap_kenter_temporary(vm_paddr_t pa, int i) 2178{ 2179 vm_offset_t va; 2180 2181 if (i != 0) 2182 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2183 __func__); 2184 2185 if (MIPS_DIRECT_MAPPABLE(pa)) { 2186 va = MIPS_PHYS_TO_DIRECT(pa); 2187 } else { 2188#ifndef __mips_n64 /* XXX : to be converted to new style */ 2189 int cpu; 2190 register_t intr; 2191 struct local_sysmaps *sysm; 2192 pt_entry_t *pte, npte; 2193 2194 /* If this is used other than for dumps, we may need to leave 2195 * interrupts disasbled on return. If crash dumps don't work when 2196 * we get to this point, we might want to consider this (leaving things 2197 * disabled as a starting point ;-) 2198 */ 2199 intr = intr_disable(); 2200 cpu = PCPU_GET(cpuid); 2201 sysm = &sysmap_lmem[cpu]; 2202 /* Since this is for the debugger, no locks or any other fun */ 2203 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 2204 pte = pmap_pte(kernel_pmap, sysm->base); 2205 *pte = npte; 2206 sysm->valid1 = 1; 2207 pmap_update_page(kernel_pmap, sysm->base, npte); 2208 va = sysm->base; 2209 intr_restore(intr); 2210#endif 2211 } 2212 return ((void *)va); 2213} 2214 2215void 2216pmap_kenter_temporary_free(vm_paddr_t pa) 2217{ 2218#ifndef __mips_n64 /* XXX : to be converted to new style */ 2219 int cpu; 2220 register_t intr; 2221 struct local_sysmaps *sysm; 2222#endif 2223 2224 if (MIPS_DIRECT_MAPPABLE(pa)) { 2225 /* nothing to do for this case */ 2226 return; 2227 } 2228#ifndef __mips_n64 /* XXX : to be converted to new style */ 2229 cpu = PCPU_GET(cpuid); 2230 sysm = &sysmap_lmem[cpu]; 2231 if (sysm->valid1) { 2232 pt_entry_t *pte; 2233 2234 intr = intr_disable(); 2235 pte = pmap_pte(kernel_pmap, sysm->base); 2236 *pte = PTE_G; 2237 pmap_invalidate_page(kernel_pmap, sysm->base); 2238 intr_restore(intr); 2239 sysm->valid1 = 0; 2240 } 2241#endif 2242} 2243 2244/* 2245 * Moved the code to Machine Independent 2246 * vm_map_pmap_enter() 2247 */ 2248 2249/* 2250 * Maps a sequence of resident pages belonging to the same object. 2251 * The sequence begins with the given page m_start. This page is 2252 * mapped at the given virtual address start. Each subsequent page is 2253 * mapped at a virtual address that is offset from start by the same 2254 * amount as the page is offset from m_start within the object. The 2255 * last page in the sequence is the page with the largest offset from 2256 * m_start that can be mapped at a virtual address less than the given 2257 * virtual address end. Not every virtual page between start and end 2258 * is mapped; only those for which a resident page exists with the 2259 * corresponding offset from m_start are mapped. 2260 */ 2261void 2262pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2263 vm_page_t m_start, vm_prot_t prot) 2264{ 2265 vm_page_t m, mpte; 2266 vm_pindex_t diff, psize; 2267 2268 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2269 psize = atop(end - start); 2270 mpte = NULL; 2271 m = m_start; 2272 vm_page_lock_queues(); 2273 PMAP_LOCK(pmap); 2274 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2275 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2276 prot, mpte); 2277 m = TAILQ_NEXT(m, listq); 2278 } 2279 vm_page_unlock_queues(); 2280 PMAP_UNLOCK(pmap); 2281} 2282 2283/* 2284 * pmap_object_init_pt preloads the ptes for a given object 2285 * into the specified pmap. This eliminates the blast of soft 2286 * faults on process startup and immediately after an mmap. 2287 */ 2288void 2289pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2290 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2291{ 2292 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2293 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2294 ("pmap_object_init_pt: non-device object")); 2295} 2296 2297/* 2298 * Routine: pmap_change_wiring 2299 * Function: Change the wiring attribute for a map/virtual-address 2300 * pair. 2301 * In/out conditions: 2302 * The mapping must already exist in the pmap. 2303 */ 2304void 2305pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2306{ 2307 pt_entry_t *pte; 2308 2309 if (pmap == NULL) 2310 return; 2311 2312 PMAP_LOCK(pmap); 2313 pte = pmap_pte(pmap, va); 2314 2315 if (wired && !pte_test(pte, PTE_W)) 2316 pmap->pm_stats.wired_count++; 2317 else if (!wired && pte_test(pte, PTE_W)) 2318 pmap->pm_stats.wired_count--; 2319 2320 /* 2321 * Wiring is not a hardware characteristic so there is no need to 2322 * invalidate TLB. 2323 */ 2324 if (wired) 2325 pte_set(pte, PTE_W); 2326 else 2327 pte_clear(pte, PTE_W); 2328 PMAP_UNLOCK(pmap); 2329} 2330 2331/* 2332 * Copy the range specified by src_addr/len 2333 * from the source map to the range dst_addr/len 2334 * in the destination map. 2335 * 2336 * This routine is only advisory and need not do anything. 2337 */ 2338 2339void 2340pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2341 vm_size_t len, vm_offset_t src_addr) 2342{ 2343} 2344 2345/* 2346 * pmap_zero_page zeros the specified hardware page by mapping 2347 * the page into KVM and using bzero to clear its contents. 2348 * 2349 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2350 */ 2351void 2352pmap_zero_page(vm_page_t m) 2353{ 2354 vm_offset_t va; 2355 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2356 2357 if (MIPS_DIRECT_MAPPABLE(phys)) { 2358 va = MIPS_PHYS_TO_DIRECT(phys); 2359 bzero((caddr_t)va, PAGE_SIZE); 2360 mips_dcache_wbinv_range(va, PAGE_SIZE); 2361 } else { 2362 va = pmap_lmem_map1(phys); 2363 bzero((caddr_t)va, PAGE_SIZE); 2364 mips_dcache_wbinv_range(va, PAGE_SIZE); 2365 pmap_lmem_unmap(); 2366 } 2367} 2368 2369/* 2370 * pmap_zero_page_area zeros the specified hardware page by mapping 2371 * the page into KVM and using bzero to clear its contents. 2372 * 2373 * off and size may not cover an area beyond a single hardware page. 2374 */ 2375void 2376pmap_zero_page_area(vm_page_t m, int off, int size) 2377{ 2378 vm_offset_t va; 2379 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2380 2381 if (MIPS_DIRECT_MAPPABLE(phys)) { 2382 va = MIPS_PHYS_TO_DIRECT(phys); 2383 bzero((char *)(caddr_t)va + off, size); 2384 mips_dcache_wbinv_range(va + off, size); 2385 } else { 2386 va = pmap_lmem_map1(phys); 2387 bzero((char *)va + off, size); 2388 mips_dcache_wbinv_range(va + off, size); 2389 pmap_lmem_unmap(); 2390 } 2391} 2392 2393void 2394pmap_zero_page_idle(vm_page_t m) 2395{ 2396 vm_offset_t va; 2397 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2398 2399 if (MIPS_DIRECT_MAPPABLE(phys)) { 2400 va = MIPS_PHYS_TO_DIRECT(phys); 2401 bzero((caddr_t)va, PAGE_SIZE); 2402 mips_dcache_wbinv_range(va, PAGE_SIZE); 2403 } else { 2404 va = pmap_lmem_map1(phys); 2405 bzero((caddr_t)va, PAGE_SIZE); 2406 mips_dcache_wbinv_range(va, PAGE_SIZE); 2407 pmap_lmem_unmap(); 2408 } 2409} 2410 2411/* 2412 * pmap_copy_page copies the specified (machine independent) 2413 * page by mapping the page into virtual memory and using 2414 * bcopy to copy the page, one machine dependent page at a 2415 * time. 2416 * 2417 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2418 */ 2419void 2420pmap_copy_page(vm_page_t src, vm_page_t dst) 2421{ 2422 vm_offset_t va_src, va_dst; 2423 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src); 2424 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst); 2425 2426 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) { 2427 /* easy case, all can be accessed via KSEG0 */ 2428 /* 2429 * Flush all caches for VA that are mapped to this page 2430 * to make sure that data in SDRAM is up to date 2431 */ 2432 pmap_flush_pvcache(src); 2433 mips_dcache_wbinv_range_index( 2434 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE); 2435 va_src = MIPS_PHYS_TO_DIRECT(phys_src); 2436 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst); 2437 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2438 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2439 } else { 2440 va_src = pmap_lmem_map2(phys_src, phys_dst); 2441 va_dst = va_src + PAGE_SIZE; 2442 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2443 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2444 pmap_lmem_unmap(); 2445 } 2446} 2447 2448/* 2449 * Returns true if the pmap's pv is one of the first 2450 * 16 pvs linked to from this page. This count may 2451 * be changed upwards or downwards in the future; it 2452 * is only necessary that true be returned for a small 2453 * subset of pmaps for proper page aging. 2454 */ 2455boolean_t 2456pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2457{ 2458 pv_entry_t pv; 2459 int loops = 0; 2460 boolean_t rv; 2461 2462 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2463 ("pmap_page_exists_quick: page %p is not managed", m)); 2464 rv = FALSE; 2465 vm_page_lock_queues(); 2466 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2467 if (pv->pv_pmap == pmap) { 2468 rv = TRUE; 2469 break; 2470 } 2471 loops++; 2472 if (loops >= 16) 2473 break; 2474 } 2475 vm_page_unlock_queues(); 2476 return (rv); 2477} 2478 2479/* 2480 * Remove all pages from specified address space 2481 * this aids process exit speeds. Also, this code 2482 * is special cased for current process only, but 2483 * can have the more generic (and slightly slower) 2484 * mode enabled. This is much faster than pmap_remove 2485 * in the case of running down an entire address space. 2486 */ 2487void 2488pmap_remove_pages(pmap_t pmap) 2489{ 2490 pt_entry_t *pte, tpte; 2491 pv_entry_t pv, npv; 2492 vm_page_t m; 2493 2494 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2495 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2496 return; 2497 } 2498 vm_page_lock_queues(); 2499 PMAP_LOCK(pmap); 2500 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv != NULL; pv = npv) { 2501 2502 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2503 if (!pte_test(pte, PTE_V)) 2504 panic("pmap_remove_pages: page on pm_pvlist has no pte"); 2505 tpte = *pte; 2506 2507/* 2508 * We cannot remove wired pages from a process' mapping at this time 2509 */ 2510 if (pte_test(&tpte, PTE_W)) { 2511 npv = TAILQ_NEXT(pv, pv_plist); 2512 continue; 2513 } 2514 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2515 2516 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); 2517 KASSERT(m != NULL, 2518 ("pmap_remove_pages: bad tpte %x", tpte)); 2519 2520 pv->pv_pmap->pm_stats.resident_count--; 2521 2522 /* 2523 * Update the vm_page_t clean and reference bits. 2524 */ 2525 if (pte_test(&tpte, PTE_D)) { 2526 vm_page_dirty(m); 2527 } 2528 npv = TAILQ_NEXT(pv, pv_plist); 2529 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2530 2531 m->md.pv_list_count--; 2532 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2533 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2534 vm_page_flag_clear(m, PG_WRITEABLE); 2535 } 2536 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2537 free_pv_entry(pv); 2538 } 2539 pmap_invalidate_all(pmap); 2540 PMAP_UNLOCK(pmap); 2541 vm_page_unlock_queues(); 2542} 2543 2544/* 2545 * pmap_testbit tests bits in pte's 2546 * note that the testbit/changebit routines are inline, 2547 * and a lot of things compile-time evaluate. 2548 */ 2549static boolean_t 2550pmap_testbit(vm_page_t m, int bit) 2551{ 2552 pv_entry_t pv; 2553 pt_entry_t *pte; 2554 boolean_t rv = FALSE; 2555 2556 if (m->flags & PG_FICTITIOUS) 2557 return (rv); 2558 2559 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2560 return (rv); 2561 2562 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2563 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2564 PMAP_LOCK(pv->pv_pmap); 2565 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2566 rv = pte_test(pte, bit); 2567 PMAP_UNLOCK(pv->pv_pmap); 2568 if (rv) 2569 break; 2570 } 2571 return (rv); 2572} 2573 2574/* 2575 * this routine is used to clear dirty bits in ptes 2576 */ 2577static __inline void 2578pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2579{ 2580 pv_entry_t pv; 2581 pt_entry_t *pte; 2582 2583 if (m->flags & PG_FICTITIOUS) 2584 return; 2585 2586 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2587 /* 2588 * Loop over all current mappings setting/clearing as appropos If 2589 * setting RO do we need to clear the VAC? 2590 */ 2591 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2592 PMAP_LOCK(pv->pv_pmap); 2593 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2594 if (setem) { 2595 *pte |= bit; 2596 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2597 } else { 2598 pt_entry_t pbits = *pte; 2599 2600 if (pbits & bit) { 2601 if (bit == PTE_D) { 2602 if (pbits & PTE_D) 2603 vm_page_dirty(m); 2604 *pte = (pbits & ~PTE_D) | PTE_RO; 2605 } else { 2606 *pte = pbits & ~bit; 2607 } 2608 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2609 } 2610 } 2611 PMAP_UNLOCK(pv->pv_pmap); 2612 } 2613 if (!setem && bit == PTE_D) 2614 vm_page_flag_clear(m, PG_WRITEABLE); 2615} 2616 2617/* 2618 * pmap_page_wired_mappings: 2619 * 2620 * Return the number of managed mappings to the given physical page 2621 * that are wired. 2622 */ 2623int 2624pmap_page_wired_mappings(vm_page_t m) 2625{ 2626 pv_entry_t pv; 2627 pmap_t pmap; 2628 pt_entry_t *pte; 2629 int count; 2630 2631 count = 0; 2632 if ((m->flags & PG_FICTITIOUS) != 0) 2633 return (count); 2634 vm_page_lock_queues(); 2635 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2636 pmap = pv->pv_pmap; 2637 PMAP_LOCK(pmap); 2638 pte = pmap_pte(pmap, pv->pv_va); 2639 if (pte_test(pte, PTE_W)) 2640 count++; 2641 PMAP_UNLOCK(pmap); 2642 } 2643 vm_page_unlock_queues(); 2644 return (count); 2645} 2646 2647/* 2648 * Clear the write and modified bits in each of the given page's mappings. 2649 */ 2650void 2651pmap_remove_write(vm_page_t m) 2652{ 2653 pv_entry_t pv, npv; 2654 vm_offset_t va; 2655 pt_entry_t *pte; 2656 2657 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2658 ("pmap_remove_write: page %p is not managed", m)); 2659 2660 /* 2661 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 2662 * another thread while the object is locked. Thus, if PG_WRITEABLE 2663 * is clear, no page table entries need updating. 2664 */ 2665 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2666 if ((m->oflags & VPO_BUSY) == 0 && 2667 (m->flags & PG_WRITEABLE) == 0) 2668 return; 2669 2670 /* 2671 * Loop over all current mappings setting/clearing as appropos. 2672 */ 2673 vm_page_lock_queues(); 2674 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { 2675 npv = TAILQ_NEXT(pv, pv_plist); 2676 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2677 if (pte == NULL || !pte_test(pte, PTE_V)) 2678 panic("page on pm_pvlist has no pte"); 2679 2680 va = pv->pv_va; 2681 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 2682 VM_PROT_READ | VM_PROT_EXECUTE); 2683 } 2684 vm_page_flag_clear(m, PG_WRITEABLE); 2685 vm_page_unlock_queues(); 2686} 2687 2688/* 2689 * pmap_ts_referenced: 2690 * 2691 * Return the count of reference bits for a page, clearing all of them. 2692 */ 2693int 2694pmap_ts_referenced(vm_page_t m) 2695{ 2696 2697 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2698 ("pmap_ts_referenced: page %p is not managed", m)); 2699 if (m->md.pv_flags & PV_TABLE_REF) { 2700 vm_page_lock_queues(); 2701 m->md.pv_flags &= ~PV_TABLE_REF; 2702 vm_page_unlock_queues(); 2703 return (1); 2704 } 2705 return (0); 2706} 2707 2708/* 2709 * pmap_is_modified: 2710 * 2711 * Return whether or not the specified physical page was modified 2712 * in any physical maps. 2713 */ 2714boolean_t 2715pmap_is_modified(vm_page_t m) 2716{ 2717 boolean_t rv; 2718 2719 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2720 ("pmap_is_modified: page %p is not managed", m)); 2721 2722 /* 2723 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 2724 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 2725 * is clear, no PTEs can have PTE_D set. 2726 */ 2727 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2728 if ((m->oflags & VPO_BUSY) == 0 && 2729 (m->flags & PG_WRITEABLE) == 0) 2730 return (FALSE); 2731 vm_page_lock_queues(); 2732 if (m->md.pv_flags & PV_TABLE_MOD) 2733 rv = TRUE; 2734 else 2735 rv = pmap_testbit(m, PTE_D); 2736 vm_page_unlock_queues(); 2737 return (rv); 2738} 2739 2740/* N/C */ 2741 2742/* 2743 * pmap_is_prefaultable: 2744 * 2745 * Return whether or not the specified virtual address is elgible 2746 * for prefault. 2747 */ 2748boolean_t 2749pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2750{ 2751 pd_entry_t *pde; 2752 pt_entry_t *pte; 2753 boolean_t rv; 2754 2755 rv = FALSE; 2756 PMAP_LOCK(pmap); 2757 pde = pmap_pde(pmap, addr); 2758 if (pde != NULL && *pde != 0) { 2759 pte = pmap_pde_to_pte(pde, addr); 2760 rv = (*pte == 0); 2761 } 2762 PMAP_UNLOCK(pmap); 2763 return (rv); 2764} 2765 2766/* 2767 * Clear the modify bits on the specified physical page. 2768 */ 2769void 2770pmap_clear_modify(vm_page_t m) 2771{ 2772 2773 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2774 ("pmap_clear_modify: page %p is not managed", m)); 2775 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2776 KASSERT((m->oflags & VPO_BUSY) == 0, 2777 ("pmap_clear_modify: page %p is busy", m)); 2778 2779 /* 2780 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_D set. 2781 * If the object containing the page is locked and the page is not 2782 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 2783 */ 2784 if ((m->flags & PG_WRITEABLE) == 0) 2785 return; 2786 vm_page_lock_queues(); 2787 if (m->md.pv_flags & PV_TABLE_MOD) { 2788 pmap_changebit(m, PTE_D, FALSE); 2789 m->md.pv_flags &= ~PV_TABLE_MOD; 2790 } 2791 vm_page_unlock_queues(); 2792} 2793 2794/* 2795 * pmap_is_referenced: 2796 * 2797 * Return whether or not the specified physical page was referenced 2798 * in any physical maps. 2799 */ 2800boolean_t 2801pmap_is_referenced(vm_page_t m) 2802{ 2803 2804 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2805 ("pmap_is_referenced: page %p is not managed", m)); 2806 return ((m->md.pv_flags & PV_TABLE_REF) != 0); 2807} 2808 2809/* 2810 * pmap_clear_reference: 2811 * 2812 * Clear the reference bit on the specified physical page. 2813 */ 2814void 2815pmap_clear_reference(vm_page_t m) 2816{ 2817 2818 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2819 ("pmap_clear_reference: page %p is not managed", m)); 2820 vm_page_lock_queues(); 2821 if (m->md.pv_flags & PV_TABLE_REF) { 2822 m->md.pv_flags &= ~PV_TABLE_REF; 2823 } 2824 vm_page_unlock_queues(); 2825} 2826 2827/* 2828 * Miscellaneous support routines follow 2829 */ 2830 2831/* 2832 * Map a set of physical memory pages into the kernel virtual 2833 * address space. Return a pointer to where it is mapped. This 2834 * routine is intended to be used for mapping device memory, 2835 * NOT real memory. 2836 */ 2837 2838/* 2839 * Map a set of physical memory pages into the kernel virtual 2840 * address space. Return a pointer to where it is mapped. This 2841 * routine is intended to be used for mapping device memory, 2842 * NOT real memory. 2843 * 2844 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. 2845 */ 2846void * 2847pmap_mapdev(vm_offset_t pa, vm_size_t size) 2848{ 2849 vm_offset_t va, tmpva, offset; 2850 2851 /* 2852 * KSEG1 maps only first 512M of phys address space. For 2853 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2854 */ 2855 if (MIPS_DIRECT_MAPPABLE(pa + size - 1)) 2856 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa)); 2857 else { 2858 offset = pa & PAGE_MASK; 2859 size = roundup(size + offset, PAGE_SIZE); 2860 2861 va = kmem_alloc_nofault(kernel_map, size); 2862 if (!va) 2863 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2864 pa = trunc_page(pa); 2865 for (tmpva = va; size > 0;) { 2866 pmap_kenter(tmpva, pa); 2867 size -= PAGE_SIZE; 2868 tmpva += PAGE_SIZE; 2869 pa += PAGE_SIZE; 2870 } 2871 } 2872 2873 return ((void *)(va + offset)); 2874} 2875 2876void 2877pmap_unmapdev(vm_offset_t va, vm_size_t size) 2878{ 2879#ifndef __mips_n64 2880 vm_offset_t base, offset, tmpva; 2881 2882 /* If the address is within KSEG1 then there is nothing to do */ 2883 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 2884 return; 2885 2886 base = trunc_page(va); 2887 offset = va & PAGE_MASK; 2888 size = roundup(size + offset, PAGE_SIZE); 2889 for (tmpva = base; tmpva < base + size; tmpva += PAGE_SIZE) 2890 pmap_kremove(tmpva); 2891 kmem_free(kernel_map, base, size); 2892#endif 2893} 2894 2895/* 2896 * perform the pmap work for mincore 2897 */ 2898int 2899pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2900{ 2901 pt_entry_t *ptep, pte; 2902 vm_offset_t pa; 2903 vm_page_t m; 2904 int val; 2905 boolean_t managed; 2906 2907 PMAP_LOCK(pmap); 2908retry: 2909 ptep = pmap_pte(pmap, addr); 2910 pte = (ptep != NULL) ? *ptep : 0; 2911 if (!pte_test(&pte, PTE_V)) { 2912 val = 0; 2913 goto out; 2914 } 2915 val = MINCORE_INCORE; 2916 if (pte_test(&pte, PTE_D)) 2917 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2918 pa = TLBLO_PTE_TO_PA(pte); 2919 managed = page_is_managed(pa); 2920 if (managed) { 2921 /* 2922 * This may falsely report the given address as 2923 * MINCORE_REFERENCED. Unfortunately, due to the lack of 2924 * per-PTE reference information, it is impossible to 2925 * determine if the address is MINCORE_REFERENCED. 2926 */ 2927 m = PHYS_TO_VM_PAGE(pa); 2928 if ((m->flags & PG_REFERENCED) != 0) 2929 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2930 } 2931 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 2932 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 2933 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 2934 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 2935 goto retry; 2936 } else 2937out: 2938 PA_UNLOCK_COND(*locked_pa); 2939 PMAP_UNLOCK(pmap); 2940 return (val); 2941} 2942 2943void 2944pmap_activate(struct thread *td) 2945{ 2946 pmap_t pmap, oldpmap; 2947 struct proc *p = td->td_proc; 2948 2949 critical_enter(); 2950 2951 pmap = vmspace_pmap(p->p_vmspace); 2952 oldpmap = PCPU_GET(curpmap); 2953 2954 if (oldpmap) 2955 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); 2956 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2957 pmap_asid_alloc(pmap); 2958 if (td == curthread) { 2959 PCPU_SET(segbase, pmap->pm_segtab); 2960 mips_wr_entryhi(pmap->pm_asid[PCPU_GET(cpuid)].asid); 2961 } 2962 2963 PCPU_SET(curpmap, pmap); 2964 critical_exit(); 2965} 2966 2967void 2968pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2969{ 2970} 2971 2972/* 2973 * Increase the starting virtual address of the given mapping if a 2974 * different alignment might result in more superpage mappings. 2975 */ 2976void 2977pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2978 vm_offset_t *addr, vm_size_t size) 2979{ 2980 vm_offset_t superpage_offset; 2981 2982 if (size < NBSEG) 2983 return; 2984 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 2985 offset += ptoa(object->pg_color); 2986 superpage_offset = offset & SEGMASK; 2987 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || 2988 (*addr & SEGMASK) == superpage_offset) 2989 return; 2990 if ((*addr & SEGMASK) < superpage_offset) 2991 *addr = (*addr & ~SEGMASK) + superpage_offset; 2992 else 2993 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; 2994} 2995 2996/* 2997 * Increase the starting virtual address of the given mapping so 2998 * that it is aligned to not be the second page in a TLB entry. 2999 * This routine assumes that the length is appropriately-sized so 3000 * that the allocation does not share a TLB entry at all if required. 3001 */ 3002void 3003pmap_align_tlb(vm_offset_t *addr) 3004{ 3005 if ((*addr & PAGE_SIZE) == 0) 3006 return; 3007 *addr += PAGE_SIZE; 3008 return; 3009} 3010 3011#ifdef DDB 3012DB_SHOW_COMMAND(ptable, ddb_pid_dump) 3013{ 3014 pmap_t pmap; 3015 struct thread *td = NULL; 3016 struct proc *p; 3017 int i, j, k; 3018 vm_paddr_t pa; 3019 vm_offset_t va; 3020 3021 if (have_addr) { 3022 td = db_lookup_thread(addr, TRUE); 3023 if (td == NULL) { 3024 db_printf("Invalid pid or tid"); 3025 return; 3026 } 3027 p = td->td_proc; 3028 if (p->p_vmspace == NULL) { 3029 db_printf("No vmspace for process"); 3030 return; 3031 } 3032 pmap = vmspace_pmap(p->p_vmspace); 3033 } else 3034 pmap = kernel_pmap; 3035 3036 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n", 3037 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid, 3038 pmap->pm_asid[0].gen); 3039 for (i = 0; i < NPDEPG; i++) { 3040 pd_entry_t *pdpe; 3041 pt_entry_t *pde; 3042 pt_entry_t pte; 3043 3044 pdpe = (pd_entry_t *)pmap->pm_segtab[i]; 3045 if (pdpe == NULL) 3046 continue; 3047 db_printf("[%4d] %p\n", i, pdpe); 3048#ifdef __mips_n64 3049 for (j = 0; j < NPDEPG; j++) { 3050 pde = (pt_entry_t *)pdpe[j]; 3051 if (pde == NULL) 3052 continue; 3053 db_printf("\t[%4d] %p\n", j, pde); 3054#else 3055 { 3056 j = 0; 3057 pde = (pt_entry_t *)pdpe; 3058#endif 3059 for (k = 0; k < NPTEPG; k++) { 3060 pte = pde[k]; 3061 if (pte == 0 || !pte_test(&pte, PTE_V)) 3062 continue; 3063 pa = TLBLO_PTE_TO_PA(pte); 3064 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); 3065 db_printf("\t\t[%04d] va: %p pte: %8x pa:%lx\n", 3066 k, (void *)va, pte, (u_long)pa); 3067 } 3068 } 3069 } 3070} 3071#endif 3072 3073#if defined(DEBUG) 3074 3075static void pads(pmap_t pm); 3076void pmap_pvdump(vm_offset_t pa); 3077 3078/* print address space of pmap*/ 3079static void 3080pads(pmap_t pm) 3081{ 3082 unsigned va, i, j; 3083 pt_entry_t *ptep; 3084 3085 if (pm == kernel_pmap) 3086 return; 3087 for (i = 0; i < NPTEPG; i++) 3088 if (pm->pm_segtab[i]) 3089 for (j = 0; j < NPTEPG; j++) { 3090 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3091 if (pm == kernel_pmap && va < KERNBASE) 3092 continue; 3093 if (pm != kernel_pmap && 3094 va >= VM_MAXUSER_ADDRESS) 3095 continue; 3096 ptep = pmap_pte(pm, va); 3097 if (pmap_pte_v(ptep)) 3098 printf("%x:%x ", va, *(int *)ptep); 3099 } 3100 3101} 3102 3103void 3104pmap_pvdump(vm_offset_t pa) 3105{ 3106 register pv_entry_t pv; 3107 vm_page_t m; 3108 3109 printf("pa %x", pa); 3110 m = PHYS_TO_VM_PAGE(pa); 3111 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3112 pv = TAILQ_NEXT(pv, pv_list)) { 3113 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3114 pads(pv->pv_pmap); 3115 } 3116 printf(" "); 3117} 3118 3119/* N/C */ 3120#endif 3121 3122 3123/* 3124 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3125 * It takes almost as much or more time to search the TLB for a 3126 * specific ASID and flush those entries as it does to flush the entire TLB. 3127 * Therefore, when we allocate a new ASID, we just take the next number. When 3128 * we run out of numbers, we flush the TLB, increment the generation count 3129 * and start over. ASID zero is reserved for kernel use. 3130 */ 3131static void 3132pmap_asid_alloc(pmap) 3133 pmap_t pmap; 3134{ 3135 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3136 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3137 else { 3138 if (PCPU_GET(next_asid) == pmap_max_asid) { 3139 tlb_invalidate_all_user(NULL); 3140 PCPU_SET(asid_generation, 3141 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3142 if (PCPU_GET(asid_generation) == 0) { 3143 PCPU_SET(asid_generation, 1); 3144 } 3145 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3146 } 3147 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3148 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3149 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3150 } 3151} 3152 3153int 3154page_is_managed(vm_offset_t pa) 3155{ 3156 vm_offset_t pgnum = mips_btop(pa); 3157 3158 if (pgnum >= first_page) { 3159 vm_page_t m; 3160 3161 m = PHYS_TO_VM_PAGE(pa); 3162 if (m == NULL) 3163 return (0); 3164 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 3165 return (1); 3166 } 3167 return (0); 3168} 3169 3170static int 3171init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) 3172{ 3173 int rw; 3174 3175 if (!(prot & VM_PROT_WRITE)) 3176 rw = PTE_V | PTE_RO | PTE_C_CACHE; 3177 else if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 3178 if ((m->md.pv_flags & PV_TABLE_MOD) != 0) 3179 rw = PTE_V | PTE_D | PTE_C_CACHE; 3180 else 3181 rw = PTE_V | PTE_C_CACHE; 3182 vm_page_flag_set(m, PG_WRITEABLE); 3183 } else 3184 /* Needn't emulate a modified bit for unmanaged pages. */ 3185 rw = PTE_V | PTE_D | PTE_C_CACHE; 3186 return (rw); 3187} 3188 3189/* 3190 * pmap_emulate_modified : do dirty bit emulation 3191 * 3192 * On SMP, update just the local TLB, other CPUs will update their 3193 * TLBs from PTE lazily, if they get the exception. 3194 * Returns 0 in case of sucess, 1 if the page is read only and we 3195 * need to fault. 3196 */ 3197int 3198pmap_emulate_modified(pmap_t pmap, vm_offset_t va) 3199{ 3200 vm_page_t m; 3201 pt_entry_t *pte; 3202 vm_offset_t pa; 3203 3204 PMAP_LOCK(pmap); 3205 pte = pmap_pte(pmap, va); 3206 if (pte == NULL) 3207 panic("pmap_emulate_modified: can't find PTE"); 3208#ifdef SMP 3209 /* It is possible that some other CPU changed m-bit */ 3210 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { 3211 pmap_update_page_local(pmap, va, *pte); 3212 PMAP_UNLOCK(pmap); 3213 return (0); 3214 } 3215#else 3216 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) 3217 panic("pmap_emulate_modified: invalid pte"); 3218#endif 3219 if (pte_test(pte, PTE_RO)) { 3220 /* write to read only page in the kernel */ 3221 PMAP_UNLOCK(pmap); 3222 return (1); 3223 } 3224 pte_set(pte, PTE_D); 3225 pmap_update_page_local(pmap, va, *pte); 3226 pa = TLBLO_PTE_TO_PA(*pte); 3227 if (!page_is_managed(pa)) 3228 panic("pmap_emulate_modified: unmanaged page"); 3229 m = PHYS_TO_VM_PAGE(pa); 3230 m->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); 3231 PMAP_UNLOCK(pmap); 3232 return (0); 3233} 3234 3235/* 3236 * Routine: pmap_kextract 3237 * Function: 3238 * Extract the physical page address associated 3239 * virtual address. 3240 */ 3241 /* PMAP_INLINE */ vm_offset_t 3242pmap_kextract(vm_offset_t va) 3243{ 3244 int mapped; 3245 3246 /* 3247 * First, the direct-mapped regions. 3248 */ 3249#if defined(__mips_n64) 3250 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) 3251 return (MIPS_XKPHYS_TO_PHYS(va)); 3252#endif 3253 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) 3254 return (MIPS_KSEG0_TO_PHYS(va)); 3255 3256 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) 3257 return (MIPS_KSEG1_TO_PHYS(va)); 3258 3259 /* 3260 * User virtual addresses. 3261 */ 3262 if (va < VM_MAXUSER_ADDRESS) { 3263 pt_entry_t *ptep; 3264 3265 if (curproc && curproc->p_vmspace) { 3266 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3267 if (ptep) { 3268 return (TLBLO_PTE_TO_PA(*ptep) | 3269 (va & PAGE_MASK)); 3270 } 3271 return (0); 3272 } 3273 } 3274 3275 /* 3276 * Should be kernel virtual here, otherwise fail 3277 */ 3278 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); 3279#if defined(__mips_n64) 3280 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); 3281#endif 3282 /* 3283 * Kernel virtual. 3284 */ 3285 3286 if (mapped) { 3287 pt_entry_t *ptep; 3288 3289 /* Is the kernel pmap initialized? */ 3290 if (kernel_pmap->pm_active) { 3291 /* It's inside the virtual address range */ 3292 ptep = pmap_pte(kernel_pmap, va); 3293 if (ptep) { 3294 return (TLBLO_PTE_TO_PA(*ptep) | 3295 (va & PAGE_MASK)); 3296 } 3297 } 3298 return (0); 3299 } 3300 3301 panic("%s for unknown address space %p.", __func__, (void *)va); 3302} 3303 3304 3305void 3306pmap_flush_pvcache(vm_page_t m) 3307{ 3308 pv_entry_t pv; 3309 3310 if (m != NULL) { 3311 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3312 pv = TAILQ_NEXT(pv, pv_list)) { 3313 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3314 } 3315 } 3316} 3317