pmap.c revision 212989
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * In addition to hardware address maps, this 46 * module is called upon to provide software-use-only 47 * maps which may or may not be stored in the same 48 * form as hardware maps. These pseudo-maps are 49 * used to store intermediate results from copy 50 * operations to and from address spaces. 51 * 52 * Since the information managed by this module is 53 * also stored by the logical address mapping module, 54 * this module may throw away valid virtual-to-physical 55 * mappings at almost any time. However, invalidations 56 * of virtual-to-physical mappings must be done as 57 * requested. 58 * 59 * In order to cope with hardware architectures which 60 * make virtual-to-physical map invalidates expensive, 61 * this module may delay invalidate or reduced protection 62 * operations until such time as they are actually 63 * necessary. This module is given full information as 64 * to which processors are currently using which maps, 65 * and to when physical maps must be made correct. 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 212989 2010-09-22 02:26:07Z neel $"); 70 71#include "opt_msgbuf.h" 72#include "opt_ddb.h" 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/proc.h> 77#include <sys/msgbuf.h> 78#include <sys/vmmeter.h> 79#include <sys/mman.h> 80#include <sys/smp.h> 81#ifdef DDB 82#include <ddb/ddb.h> 83#endif 84 85#include <vm/vm.h> 86#include <vm/vm_param.h> 87#include <vm/vm_phys.h> 88#include <sys/lock.h> 89#include <sys/mutex.h> 90#include <vm/vm_kern.h> 91#include <vm/vm_page.h> 92#include <vm/vm_map.h> 93#include <vm/vm_object.h> 94#include <vm/vm_extern.h> 95#include <vm/vm_pageout.h> 96#include <vm/vm_pager.h> 97#include <vm/uma.h> 98#include <sys/pcpu.h> 99#include <sys/sched.h> 100#ifdef SMP 101#include <sys/smp.h> 102#endif 103 104#include <machine/cache.h> 105#include <machine/md_var.h> 106#include <machine/tlb.h> 107 108#undef PMAP_DEBUG 109 110#ifndef PMAP_SHPGPERPROC 111#define PMAP_SHPGPERPROC 200 112#endif 113 114#if !defined(DIAGNOSTIC) 115#define PMAP_INLINE __inline 116#else 117#define PMAP_INLINE 118#endif 119 120/* 121 * Get PDEs and PTEs for user/kernel address space 122 * 123 * XXX The & for pmap_segshift() is wrong, as is the fact that it doesn't 124 * trim off gratuitous bits of the address space. By having the & 125 * there, we break defining NUSERPGTBLS below because the address space 126 * is defined such that it ends immediately after NPDEPG*NPTEPG*PAGE_SIZE, 127 * so we end up getting NUSERPGTBLS of 0. 128 */ 129#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) 130#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) 131#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) 132#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) 133 134#ifdef __mips_n64 135#define NUPDE (NPDEPG * NPDEPG) 136#define NUSERPGTBLS (NUPDE + NPDEPG) 137#else 138#define NUPDE (NPDEPG) 139#define NUSERPGTBLS (NUPDE) 140#endif 141 142#define is_kernel_pmap(x) ((x) == kernel_pmap) 143 144struct pmap kernel_pmap_store; 145pd_entry_t *kernel_segmap; 146 147vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 148vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 149 150static int nkpt; 151unsigned pmap_max_asid; /* max ASID supported by the system */ 152 153#define PMAP_ASID_RESERVED 0 154 155vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 156 157static void pmap_asid_alloc(pmap_t pmap); 158 159/* 160 * Data for the pv entry allocation mechanism 161 */ 162static uma_zone_t pvzone; 163static struct vm_object pvzone_obj; 164static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 165 166static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 167static pv_entry_t get_pv_entry(pmap_t locked_pmap); 168static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 169static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 170 vm_offset_t va); 171static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 172static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 173 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 174static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); 175static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 176static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 177static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 178 vm_offset_t va, vm_page_t m); 179static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); 180static void pmap_invalidate_all(pmap_t pmap); 181static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); 182static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m); 183 184static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 185static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 186static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 187static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); 188static vm_page_t pmap_alloc_pte_page(unsigned int index, int req); 189static void pmap_grow_pte_page_cache(void); 190 191#ifdef SMP 192static void pmap_invalidate_page_action(void *arg); 193static void pmap_invalidate_all_action(void *arg); 194static void pmap_update_page_action(void *arg); 195#endif 196 197#ifndef __mips_n64 198/* 199 * This structure is for high memory (memory above 512Meg in 32 bit) 200 * This memory area does not have direct mapping, so we a mechanism to do 201 * temporary per-CPU mapping to access these addresses. 202 * 203 * At bootup we reserve 2 virtual pages per CPU for mapping highmem pages, to 204 * access a highmem physical address on a CPU, we will disable interrupts and 205 * add the mapping from the reserved virtual address for the CPU to the physical 206 * address in the kernel pagetable. 207 */ 208struct local_sysmaps { 209 vm_offset_t base; 210 uint32_t saved_intr; 211 uint16_t valid1, valid2; 212}; 213static struct local_sysmaps sysmap_lmem[MAXCPU]; 214 215static __inline void 216pmap_alloc_lmem_map(void) 217{ 218 int i; 219 220 for (i = 0; i < MAXCPU; i++) { 221 sysmap_lmem[i].base = virtual_avail; 222 virtual_avail += PAGE_SIZE * 2; 223 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 224 } 225} 226 227static __inline vm_offset_t 228pmap_lmem_map1(vm_paddr_t phys) 229{ 230 struct local_sysmaps *sysm; 231 pt_entry_t *pte, npte; 232 vm_offset_t va; 233 uint32_t intr; 234 int cpu; 235 236 intr = intr_disable(); 237 cpu = PCPU_GET(cpuid); 238 sysm = &sysmap_lmem[cpu]; 239 sysm->saved_intr = intr; 240 va = sysm->base; 241 npte = TLBLO_PA_TO_PFN(phys) | 242 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 243 pte = pmap_pte(kernel_pmap, va); 244 *pte = npte; 245 sysm->valid1 = 1; 246 return (va); 247} 248 249static __inline vm_offset_t 250pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 251{ 252 struct local_sysmaps *sysm; 253 pt_entry_t *pte, npte; 254 vm_offset_t va1, va2; 255 uint32_t intr; 256 int cpu; 257 258 intr = intr_disable(); 259 cpu = PCPU_GET(cpuid); 260 sysm = &sysmap_lmem[cpu]; 261 sysm->saved_intr = intr; 262 va1 = sysm->base; 263 va2 = sysm->base + PAGE_SIZE; 264 npte = TLBLO_PA_TO_PFN(phys1) | 265 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 266 pte = pmap_pte(kernel_pmap, va1); 267 *pte = npte; 268 npte = TLBLO_PA_TO_PFN(phys2) | 269 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 270 pte = pmap_pte(kernel_pmap, va2); 271 *pte = npte; 272 sysm->valid1 = 1; 273 sysm->valid2 = 1; 274 return (va1); 275} 276 277static __inline void 278pmap_lmem_unmap(void) 279{ 280 struct local_sysmaps *sysm; 281 pt_entry_t *pte; 282 int cpu; 283 284 cpu = PCPU_GET(cpuid); 285 sysm = &sysmap_lmem[cpu]; 286 pte = pmap_pte(kernel_pmap, sysm->base); 287 *pte = PTE_G; 288 tlb_invalidate_address(kernel_pmap, sysm->base); 289 sysm->valid1 = 0; 290 if (sysm->valid2) { 291 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); 292 *pte = PTE_G; 293 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); 294 sysm->valid2 = 0; 295 } 296 intr_restore(sysm->saved_intr); 297} 298#else /* __mips_n64 */ 299 300static __inline void 301pmap_alloc_lmem_map(void) 302{ 303} 304 305static __inline vm_offset_t 306pmap_lmem_map1(vm_paddr_t phys) 307{ 308 309 return (0); 310} 311 312static __inline vm_offset_t 313pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 314{ 315 316 return (0); 317} 318 319static __inline vm_offset_t 320pmap_lmem_unmap(void) 321{ 322 323 return (0); 324} 325#endif /* !__mips_n64 */ 326 327/* 328 * Page table entry lookup routines. 329 */ 330static __inline pd_entry_t * 331pmap_segmap(pmap_t pmap, vm_offset_t va) 332{ 333 334 return (&pmap->pm_segtab[pmap_seg_index(va)]); 335} 336 337#ifdef __mips_n64 338static __inline pd_entry_t * 339pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 340{ 341 pd_entry_t *pde; 342 343 pde = (pd_entry_t *)*pdpe; 344 return (&pde[pmap_pde_index(va)]); 345} 346 347static __inline pd_entry_t * 348pmap_pde(pmap_t pmap, vm_offset_t va) 349{ 350 pd_entry_t *pdpe; 351 352 pdpe = pmap_segmap(pmap, va); 353 if (pdpe == NULL || *pdpe == NULL) 354 return (NULL); 355 356 return (pmap_pdpe_to_pde(pdpe, va)); 357} 358#else 359static __inline pd_entry_t * 360pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 361{ 362 363 return (pdpe); 364} 365 366static __inline 367pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va) 368{ 369 370 return (pmap_segmap(pmap, va)); 371} 372#endif 373 374static __inline pt_entry_t * 375pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 376{ 377 pt_entry_t *pte; 378 379 pte = (pt_entry_t *)*pde; 380 return (&pte[pmap_pte_index(va)]); 381} 382 383pt_entry_t * 384pmap_pte(pmap_t pmap, vm_offset_t va) 385{ 386 pd_entry_t *pde; 387 388 pde = pmap_pde(pmap, va); 389 if (pde == NULL || *pde == NULL) 390 return (NULL); 391 392 return (pmap_pde_to_pte(pde, va)); 393} 394 395vm_offset_t 396pmap_steal_memory(vm_size_t size) 397{ 398 vm_size_t bank_size; 399 vm_offset_t pa, va; 400 401 size = round_page(size); 402 403 bank_size = phys_avail[1] - phys_avail[0]; 404 while (size > bank_size) { 405 int i; 406 407 for (i = 0; phys_avail[i + 2]; i += 2) { 408 phys_avail[i] = phys_avail[i + 2]; 409 phys_avail[i + 1] = phys_avail[i + 3]; 410 } 411 phys_avail[i] = 0; 412 phys_avail[i + 1] = 0; 413 if (!phys_avail[0]) 414 panic("pmap_steal_memory: out of memory"); 415 bank_size = phys_avail[1] - phys_avail[0]; 416 } 417 418 pa = phys_avail[0]; 419 phys_avail[0] += size; 420 if (MIPS_DIRECT_MAPPABLE(pa) == 0) 421 panic("Out of memory below 512Meg?"); 422 va = MIPS_PHYS_TO_DIRECT(pa); 423 bzero((caddr_t)va, size); 424 return (va); 425} 426 427/* 428 * Bootstrap the system enough to run with virtual memory. This 429 * assumes that the phys_avail array has been initialized. 430 */ 431static void 432pmap_create_kernel_pagetable(void) 433{ 434 int i, j; 435 vm_offset_t ptaddr; 436 pt_entry_t *pte; 437#ifdef __mips_n64 438 pd_entry_t *pde; 439 vm_offset_t pdaddr; 440 int npt, npde; 441#endif 442 443 /* 444 * Allocate segment table for the kernel 445 */ 446 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 447 448 /* 449 * Allocate second level page tables for the kernel 450 */ 451#ifdef __mips_n64 452 npde = howmany(NKPT, NPDEPG); 453 pdaddr = pmap_steal_memory(PAGE_SIZE * npde); 454#endif 455 nkpt = NKPT; 456 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); 457 458 /* 459 * The R[4-7]?00 stores only one copy of the Global bit in the 460 * translation lookaside buffer for each 2 page entry. Thus invalid 461 * entrys must have the Global bit set so when Entry LO and Entry HI 462 * G bits are anded together they will produce a global bit to store 463 * in the tlb. 464 */ 465 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) 466 *pte = PTE_G; 467 468#ifdef __mips_n64 469 for (i = 0, npt = nkpt; npt > 0; i++) { 470 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); 471 pde = (pd_entry_t *)kernel_segmap[i]; 472 473 for (j = 0; j < NPDEPG && npt > 0; j++, npt--) 474 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); 475 } 476#else 477 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++) 478 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE)); 479#endif 480 481 PMAP_LOCK_INIT(kernel_pmap); 482 kernel_pmap->pm_segtab = kernel_segmap; 483 kernel_pmap->pm_active = ~0; 484 TAILQ_INIT(&kernel_pmap->pm_pvlist); 485 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 486 kernel_pmap->pm_asid[0].gen = 0; 487 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE; 488} 489 490void 491pmap_bootstrap(void) 492{ 493 int i; 494 int need_local_mappings = 0; 495 496 /* Sort. */ 497again: 498 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 499 /* 500 * Keep the memory aligned on page boundary. 501 */ 502 phys_avail[i] = round_page(phys_avail[i]); 503 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 504 505 if (i < 2) 506 continue; 507 if (phys_avail[i - 2] > phys_avail[i]) { 508 vm_paddr_t ptemp[2]; 509 510 ptemp[0] = phys_avail[i + 0]; 511 ptemp[1] = phys_avail[i + 1]; 512 513 phys_avail[i + 0] = phys_avail[i - 2]; 514 phys_avail[i + 1] = phys_avail[i - 1]; 515 516 phys_avail[i - 2] = ptemp[0]; 517 phys_avail[i - 1] = ptemp[1]; 518 goto again; 519 } 520 } 521 522 /* 523 * In 32 bit, we may have memory which cannot be mapped directly 524 * this memory will need temporary mapping before it can be 525 * accessed. 526 */ 527 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1])) 528 need_local_mappings = 1; 529 530 /* 531 * Copy the phys_avail[] array before we start stealing memory from it. 532 */ 533 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 534 physmem_desc[i] = phys_avail[i]; 535 physmem_desc[i + 1] = phys_avail[i + 1]; 536 } 537 538 Maxmem = atop(phys_avail[i - 1]); 539 540 if (bootverbose) { 541 printf("Physical memory chunk(s):\n"); 542 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 543 vm_paddr_t size; 544 545 size = phys_avail[i + 1] - phys_avail[i]; 546 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 547 (uintmax_t) phys_avail[i], 548 (uintmax_t) phys_avail[i + 1] - 1, 549 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 550 } 551 printf("Maxmem is 0x%0lx\n", ptoa(Maxmem)); 552 } 553 /* 554 * Steal the message buffer from the beginning of memory. 555 */ 556 msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); 557 msgbufinit(msgbufp, MSGBUF_SIZE); 558 559 /* 560 * Steal thread0 kstack. 561 */ 562 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 563 564 virtual_avail = VM_MIN_KERNEL_ADDRESS; 565 virtual_end = VM_MAX_KERNEL_ADDRESS; 566 567#ifdef SMP 568 /* 569 * Steal some virtual address space to map the pcpu area. 570 */ 571 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 572 pcpup = (struct pcpu *)virtual_avail; 573 virtual_avail += PAGE_SIZE * 2; 574 575 /* 576 * Initialize the wired TLB entry mapping the pcpu region for 577 * the BSP at 'pcpup'. Up until this point we were operating 578 * with the 'pcpup' for the BSP pointing to a virtual address 579 * in KSEG0 so there was no need for a TLB mapping. 580 */ 581 mips_pcpu_tlb_init(PCPU_ADDR(0)); 582 583 if (bootverbose) 584 printf("pcpu is available at virtual address %p.\n", pcpup); 585#endif 586 587 if (need_local_mappings) 588 pmap_alloc_lmem_map(); 589 pmap_create_kernel_pagetable(); 590 pmap_max_asid = VMNUM_PIDS; 591 mips_wr_entryhi(0); 592 mips_wr_pagemask(0); 593} 594 595/* 596 * Initialize a vm_page's machine-dependent fields. 597 */ 598void 599pmap_page_init(vm_page_t m) 600{ 601 602 TAILQ_INIT(&m->md.pv_list); 603 m->md.pv_list_count = 0; 604 m->md.pv_flags = 0; 605} 606 607/* 608 * Initialize the pmap module. 609 * Called by vm_init, to initialize any structures that the pmap 610 * system needs to map virtual memory. 611 * pmap_init has been enhanced to support in a fairly consistant 612 * way, discontiguous physical memory. 613 */ 614void 615pmap_init(void) 616{ 617 618 /* 619 * Initialize the address space (zone) for the pv entries. Set a 620 * high water mark so that the system can recover from excessive 621 * numbers of pv entries. 622 */ 623 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 624 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 625 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; 626 pv_entry_high_water = 9 * (pv_entry_max / 10); 627 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 628} 629 630/*************************************************** 631 * Low level helper routines..... 632 ***************************************************/ 633 634static __inline void 635pmap_invalidate_all_local(pmap_t pmap) 636{ 637 638 if (pmap == kernel_pmap) { 639 tlb_invalidate_all(); 640 return; 641 } 642 if (pmap->pm_active & PCPU_GET(cpumask)) 643 tlb_invalidate_all_user(pmap); 644 else 645 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 646} 647 648#ifdef SMP 649static void 650pmap_invalidate_all(pmap_t pmap) 651{ 652 653 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap); 654} 655 656static void 657pmap_invalidate_all_action(void *arg) 658{ 659 660 pmap_invalidate_all_local((pmap_t)arg); 661} 662#else 663static void 664pmap_invalidate_all(pmap_t pmap) 665{ 666 667 pmap_invalidate_all_local(pmap); 668} 669#endif 670 671static __inline void 672pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) 673{ 674 675 if (is_kernel_pmap(pmap)) { 676 tlb_invalidate_address(pmap, va); 677 return; 678 } 679 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 680 return; 681 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 682 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 683 return; 684 } 685 tlb_invalidate_address(pmap, va); 686} 687 688#ifdef SMP 689struct pmap_invalidate_page_arg { 690 pmap_t pmap; 691 vm_offset_t va; 692}; 693 694static void 695pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 696{ 697 struct pmap_invalidate_page_arg arg; 698 699 arg.pmap = pmap; 700 arg.va = va; 701 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg); 702} 703 704static void 705pmap_invalidate_page_action(void *arg) 706{ 707 struct pmap_invalidate_page_arg *p = arg; 708 709 pmap_invalidate_page_local(p->pmap, p->va); 710} 711#else 712static void 713pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 714{ 715 716 pmap_invalidate_page_local(pmap, va); 717} 718#endif 719 720static __inline void 721pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 722{ 723 724 if (is_kernel_pmap(pmap)) { 725 tlb_update(pmap, va, pte); 726 return; 727 } 728 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 729 return; 730 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 731 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 732 return; 733 } 734 tlb_update(pmap, va, pte); 735} 736 737#ifdef SMP 738struct pmap_update_page_arg { 739 pmap_t pmap; 740 vm_offset_t va; 741 pt_entry_t pte; 742}; 743 744static void 745pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 746{ 747 struct pmap_update_page_arg arg; 748 749 arg.pmap = pmap; 750 arg.va = va; 751 arg.pte = pte; 752 smp_rendezvous(0, pmap_update_page_action, 0, &arg); 753} 754 755static void 756pmap_update_page_action(void *arg) 757{ 758 struct pmap_update_page_arg *p = arg; 759 760 pmap_update_page_local(p->pmap, p->va, p->pte); 761} 762#else 763static void 764pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 765{ 766 767 pmap_update_page_local(pmap, va, pte); 768} 769#endif 770 771/* 772 * Routine: pmap_extract 773 * Function: 774 * Extract the physical page address associated 775 * with the given map/virtual_address pair. 776 */ 777vm_paddr_t 778pmap_extract(pmap_t pmap, vm_offset_t va) 779{ 780 pt_entry_t *pte; 781 vm_offset_t retval = 0; 782 783 PMAP_LOCK(pmap); 784 pte = pmap_pte(pmap, va); 785 if (pte) { 786 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); 787 } 788 PMAP_UNLOCK(pmap); 789 return (retval); 790} 791 792/* 793 * Routine: pmap_extract_and_hold 794 * Function: 795 * Atomically extract and hold the physical page 796 * with the given pmap and virtual address pair 797 * if that mapping permits the given protection. 798 */ 799vm_page_t 800pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 801{ 802 pt_entry_t pte; 803 vm_page_t m; 804 vm_paddr_t pa; 805 806 m = NULL; 807 pa = 0; 808 PMAP_LOCK(pmap); 809retry: 810 pte = *pmap_pte(pmap, va); 811 if (pte != 0 && pte_test(&pte, PTE_V) && 812 (pte_test(&pte, PTE_D) || (prot & VM_PROT_WRITE) == 0)) { 813 if (vm_page_pa_tryrelock(pmap, TLBLO_PTE_TO_PA(pte), &pa)) 814 goto retry; 815 816 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte)); 817 vm_page_hold(m); 818 } 819 PA_UNLOCK_COND(pa); 820 PMAP_UNLOCK(pmap); 821 return (m); 822} 823 824/*************************************************** 825 * Low level mapping routines..... 826 ***************************************************/ 827 828/* 829 * add a wired page to the kva 830 */ 831void 832pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr) 833{ 834 pt_entry_t *pte; 835 pt_entry_t opte, npte; 836 837#ifdef PMAP_DEBUG 838 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 839#endif 840 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W | attr; 841 842 pte = pmap_pte(kernel_pmap, va); 843 opte = *pte; 844 *pte = npte; 845 if (pte_test(&opte, PTE_V) && opte != npte) 846 pmap_update_page(kernel_pmap, va, npte); 847} 848 849void 850pmap_kenter(vm_offset_t va, vm_paddr_t pa) 851{ 852 853 KASSERT(is_cacheable_mem(pa), 854 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa)); 855 856 pmap_kenter_attr(va, pa, PTE_C_CACHE); 857} 858 859/* 860 * remove a page from the kernel pagetables 861 */ 862 /* PMAP_INLINE */ void 863pmap_kremove(vm_offset_t va) 864{ 865 pt_entry_t *pte; 866 867 /* 868 * Write back all caches from the page being destroyed 869 */ 870 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 871 872 pte = pmap_pte(kernel_pmap, va); 873 *pte = PTE_G; 874 pmap_invalidate_page(kernel_pmap, va); 875} 876 877/* 878 * Used to map a range of physical addresses into kernel 879 * virtual address space. 880 * 881 * The value passed in '*virt' is a suggested virtual address for 882 * the mapping. Architectures which can support a direct-mapped 883 * physical to virtual region can return the appropriate address 884 * within that region, leaving '*virt' unchanged. Other 885 * architectures should map the pages starting at '*virt' and 886 * update '*virt' with the first usable address after the mapped 887 * region. 888 * 889 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 890 */ 891vm_offset_t 892pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 893{ 894 vm_offset_t va, sva; 895 896 if (MIPS_DIRECT_MAPPABLE(end)) 897 return (MIPS_PHYS_TO_DIRECT(start)); 898 899 va = sva = *virt; 900 while (start < end) { 901 pmap_kenter(va, start); 902 va += PAGE_SIZE; 903 start += PAGE_SIZE; 904 } 905 *virt = va; 906 return (sva); 907} 908 909/* 910 * Add a list of wired pages to the kva 911 * this routine is only used for temporary 912 * kernel mappings that do not need to have 913 * page modification or references recorded. 914 * Note that old mappings are simply written 915 * over. The page *must* be wired. 916 */ 917void 918pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 919{ 920 int i; 921 vm_offset_t origva = va; 922 923 for (i = 0; i < count; i++) { 924 pmap_flush_pvcache(m[i]); 925 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 926 va += PAGE_SIZE; 927 } 928 929 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 930} 931 932/* 933 * this routine jerks page mappings from the 934 * kernel -- it is meant only for temporary mappings. 935 */ 936void 937pmap_qremove(vm_offset_t va, int count) 938{ 939 /* 940 * No need to wb/inv caches here, 941 * pmap_kremove will do it for us 942 */ 943 944 while (count-- > 0) { 945 pmap_kremove(va); 946 va += PAGE_SIZE; 947 } 948} 949 950/*************************************************** 951 * Page table page management routines..... 952 ***************************************************/ 953 954/* Revision 1.507 955 * 956 * Simplify the reference counting of page table pages. Specifically, use 957 * the page table page's wired count rather than its hold count to contain 958 * the reference count. 959 */ 960 961/* 962 * This routine unholds page table pages, and if the hold count 963 * drops to zero, then it decrements the wire count. 964 */ 965static PMAP_INLINE int 966pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 967{ 968 --m->wire_count; 969 if (m->wire_count == 0) 970 return (_pmap_unwire_pte_hold(pmap, va, m)); 971 else 972 return (0); 973} 974 975static int 976_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 977{ 978 pd_entry_t *pde; 979 980 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 981 /* 982 * unmap the page table page 983 */ 984#ifdef __mips_n64 985 if (m->pindex < NUPDE) 986 pde = pmap_pde(pmap, va); 987 else 988 pde = pmap_segmap(pmap, va); 989#else 990 pde = pmap_pde(pmap, va); 991#endif 992 *pde = 0; 993 pmap->pm_stats.resident_count--; 994 995#ifdef __mips_n64 996 if (m->pindex < NUPDE) { 997 pd_entry_t *pdp; 998 vm_page_t pdpg; 999 1000 /* 1001 * Recursively decrement next level pagetable refcount 1002 */ 1003 pdp = (pd_entry_t *)*pmap_segmap(pmap, va); 1004 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); 1005 pmap_unwire_pte_hold(pmap, va, pdpg); 1006 } 1007#endif 1008 if (pmap->pm_ptphint == m) 1009 pmap->pm_ptphint = NULL; 1010 1011 /* 1012 * If the page is finally unwired, simply free it. 1013 */ 1014 vm_page_free_zero(m); 1015 atomic_subtract_int(&cnt.v_wire_count, 1); 1016 return (1); 1017} 1018 1019/* 1020 * After removing a page table entry, this routine is used to 1021 * conditionally free the page, and manage the hold/wire counts. 1022 */ 1023static int 1024pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1025{ 1026 unsigned ptepindex; 1027 pd_entry_t pteva; 1028 1029 if (va >= VM_MAXUSER_ADDRESS) 1030 return (0); 1031 1032 if (mpte == NULL) { 1033 ptepindex = pmap_pde_pindex(va); 1034 if (pmap->pm_ptphint && 1035 (pmap->pm_ptphint->pindex == ptepindex)) { 1036 mpte = pmap->pm_ptphint; 1037 } else { 1038 pteva = *pmap_pde(pmap, va); 1039 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pteva)); 1040 pmap->pm_ptphint = mpte; 1041 } 1042 } 1043 return (pmap_unwire_pte_hold(pmap, va, mpte)); 1044} 1045 1046void 1047pmap_pinit0(pmap_t pmap) 1048{ 1049 int i; 1050 1051 PMAP_LOCK_INIT(pmap); 1052 pmap->pm_segtab = kernel_segmap; 1053 pmap->pm_active = 0; 1054 pmap->pm_ptphint = NULL; 1055 for (i = 0; i < MAXCPU; i++) { 1056 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1057 pmap->pm_asid[i].gen = 0; 1058 } 1059 PCPU_SET(curpmap, pmap); 1060 TAILQ_INIT(&pmap->pm_pvlist); 1061 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1062} 1063 1064static void 1065pmap_grow_pte_page_cache() 1066{ 1067 1068#ifdef __mips_n64 1069 vm_contig_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS); 1070#else 1071 vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); 1072#endif 1073} 1074 1075static vm_page_t 1076pmap_alloc_pte_page(unsigned int index, int req) 1077{ 1078 vm_page_t m; 1079 1080 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, 0, req); 1081 if (m == NULL) 1082 return (NULL); 1083 1084 if ((m->flags & PG_ZERO) == 0) 1085 pmap_zero_page(m); 1086 1087 m->pindex = index; 1088 atomic_add_int(&cnt.v_wire_count, 1); 1089 m->wire_count = 1; 1090 return (m); 1091} 1092 1093/* 1094 * Initialize a preallocated and zeroed pmap structure, 1095 * such as one in a vmspace structure. 1096 */ 1097int 1098pmap_pinit(pmap_t pmap) 1099{ 1100 vm_offset_t ptdva; 1101 vm_page_t ptdpg; 1102 int i; 1103 1104 PMAP_LOCK_INIT(pmap); 1105 1106 /* 1107 * allocate the page directory page 1108 */ 1109 while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) 1110 pmap_grow_pte_page_cache(); 1111 1112 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); 1113 pmap->pm_segtab = (pd_entry_t *)ptdva; 1114 pmap->pm_active = 0; 1115 pmap->pm_ptphint = NULL; 1116 for (i = 0; i < MAXCPU; i++) { 1117 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1118 pmap->pm_asid[i].gen = 0; 1119 } 1120 TAILQ_INIT(&pmap->pm_pvlist); 1121 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1122 1123 return (1); 1124} 1125 1126/* 1127 * this routine is called if the page table page is not 1128 * mapped correctly. 1129 */ 1130static vm_page_t 1131_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1132{ 1133 vm_offset_t pageva; 1134 vm_page_t m; 1135 1136 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1137 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1138 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1139 1140 /* 1141 * Find or fabricate a new pagetable page 1142 */ 1143 if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { 1144 if (flags & M_WAITOK) { 1145 PMAP_UNLOCK(pmap); 1146 vm_page_unlock_queues(); 1147 pmap_grow_pte_page_cache(); 1148 vm_page_lock_queues(); 1149 PMAP_LOCK(pmap); 1150 } 1151 1152 /* 1153 * Indicate the need to retry. While waiting, the page 1154 * table page may have been allocated. 1155 */ 1156 return (NULL); 1157 } 1158 1159 /* 1160 * Map the pagetable page into the process address space, if it 1161 * isn't already there. 1162 */ 1163 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1164 1165#ifdef __mips_n64 1166 if (ptepindex >= NUPDE) { 1167 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; 1168 } else { 1169 pd_entry_t *pdep, *pde; 1170 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); 1171 int pdeindex = ptepindex & (NPDEPG - 1); 1172 vm_page_t pg; 1173 1174 pdep = &pmap->pm_segtab[segindex]; 1175 if (*pdep == NULL) { 1176 /* recurse for allocating page dir */ 1177 if (_pmap_allocpte(pmap, NUPDE + segindex, 1178 flags) == NULL) { 1179 /* alloc failed, release current */ 1180 --m->wire_count; 1181 atomic_subtract_int(&cnt.v_wire_count, 1); 1182 vm_page_free_zero(m); 1183 return (NULL); 1184 } 1185 } else { 1186 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep)); 1187 pg->wire_count++; 1188 } 1189 /* Next level entry */ 1190 pde = (pd_entry_t *)*pdep; 1191 pde[pdeindex] = (pd_entry_t)pageva; 1192 pmap->pm_ptphint = m; 1193 } 1194#else 1195 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva; 1196#endif 1197 pmap->pm_stats.resident_count++; 1198 1199 /* 1200 * Set the page table hint 1201 */ 1202 pmap->pm_ptphint = m; 1203 return (m); 1204} 1205 1206static vm_page_t 1207pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1208{ 1209 unsigned ptepindex; 1210 pd_entry_t *pde; 1211 vm_page_t m; 1212 1213 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1214 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1215 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1216 1217 /* 1218 * Calculate pagetable page index 1219 */ 1220 ptepindex = pmap_pde_pindex(va); 1221retry: 1222 /* 1223 * Get the page directory entry 1224 */ 1225 pde = pmap_pde(pmap, va); 1226 1227 /* 1228 * If the page table page is mapped, we just increment the hold 1229 * count, and activate it. 1230 */ 1231 if (pde != NULL && *pde != NULL) { 1232 /* 1233 * In order to get the page table page, try the hint first. 1234 */ 1235 if (pmap->pm_ptphint && 1236 (pmap->pm_ptphint->pindex == ptepindex)) { 1237 m = pmap->pm_ptphint; 1238 } else { 1239 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde)); 1240 pmap->pm_ptphint = m; 1241 } 1242 m->wire_count++; 1243 } else { 1244 /* 1245 * Here if the pte page isn't mapped, or if it has been 1246 * deallocated. 1247 */ 1248 m = _pmap_allocpte(pmap, ptepindex, flags); 1249 if (m == NULL && (flags & M_WAITOK)) 1250 goto retry; 1251 } 1252 return (m); 1253} 1254 1255 1256/*************************************************** 1257* Pmap allocation/deallocation routines. 1258 ***************************************************/ 1259/* 1260 * Revision 1.397 1261 * - Merged pmap_release and pmap_release_free_page. When pmap_release is 1262 * called only the page directory page(s) can be left in the pmap pte 1263 * object, since all page table pages will have been freed by 1264 * pmap_remove_pages and pmap_remove. In addition, there can only be one 1265 * reference to the pmap and the page directory is wired, so the page(s) 1266 * can never be busy. So all there is to do is clear the magic mappings 1267 * from the page directory and free the page(s). 1268 */ 1269 1270 1271/* 1272 * Release any resources held by the given physical map. 1273 * Called when a pmap initialized by pmap_pinit is being released. 1274 * Should only be called if the map contains no valid mappings. 1275 */ 1276void 1277pmap_release(pmap_t pmap) 1278{ 1279 vm_offset_t ptdva; 1280 vm_page_t ptdpg; 1281 1282 KASSERT(pmap->pm_stats.resident_count == 0, 1283 ("pmap_release: pmap resident count %ld != 0", 1284 pmap->pm_stats.resident_count)); 1285 1286 ptdva = (vm_offset_t)pmap->pm_segtab; 1287 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); 1288 1289 ptdpg->wire_count--; 1290 atomic_subtract_int(&cnt.v_wire_count, 1); 1291 vm_page_free_zero(ptdpg); 1292 PMAP_LOCK_DESTROY(pmap); 1293} 1294 1295/* 1296 * grow the number of kernel page table entries, if needed 1297 */ 1298void 1299pmap_growkernel(vm_offset_t addr) 1300{ 1301 vm_page_t nkpg; 1302 pd_entry_t *pde, *pdpe; 1303 pt_entry_t *pte; 1304 int i; 1305 1306 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1307 addr = roundup2(addr, NBSEG); 1308 if (addr - 1 >= kernel_map->max_offset) 1309 addr = kernel_map->max_offset; 1310 while (kernel_vm_end < addr) { 1311 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); 1312#ifdef __mips_n64 1313 if (*pdpe == 0) { 1314 /* new intermediate page table entry */ 1315 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); 1316 if (nkpg == NULL) 1317 panic("pmap_growkernel: no memory to grow kernel"); 1318 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1319 continue; /* try again */ 1320 } 1321#endif 1322 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); 1323 if (*pde != 0) { 1324 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1325 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1326 kernel_vm_end = kernel_map->max_offset; 1327 break; 1328 } 1329 continue; 1330 } 1331 1332 /* 1333 * This index is bogus, but out of the way 1334 */ 1335 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); 1336 if (!nkpg) 1337 panic("pmap_growkernel: no memory to grow kernel"); 1338 nkpt++; 1339 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1340 1341 /* 1342 * The R[4-7]?00 stores only one copy of the Global bit in 1343 * the translation lookaside buffer for each 2 page entry. 1344 * Thus invalid entrys must have the Global bit set so when 1345 * Entry LO and Entry HI G bits are anded together they will 1346 * produce a global bit to store in the tlb. 1347 */ 1348 pte = (pt_entry_t *)*pde; 1349 for (i = 0; i < NPTEPG; i++) 1350 pte[i] = PTE_G; 1351 1352 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1353 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1354 kernel_vm_end = kernel_map->max_offset; 1355 break; 1356 } 1357 } 1358} 1359 1360/*************************************************** 1361* page management routines. 1362 ***************************************************/ 1363 1364/* 1365 * free the pv_entry back to the free list 1366 */ 1367static PMAP_INLINE void 1368free_pv_entry(pv_entry_t pv) 1369{ 1370 1371 pv_entry_count--; 1372 uma_zfree(pvzone, pv); 1373} 1374 1375/* 1376 * get a new pv_entry, allocating a block from the system 1377 * when needed. 1378 * the memory allocation is performed bypassing the malloc code 1379 * because of the possibility of allocations at interrupt time. 1380 */ 1381static pv_entry_t 1382get_pv_entry(pmap_t locked_pmap) 1383{ 1384 static const struct timeval printinterval = { 60, 0 }; 1385 static struct timeval lastprint; 1386 struct vpgqueues *vpq; 1387 pt_entry_t *pte, oldpte; 1388 pmap_t pmap; 1389 pv_entry_t allocated_pv, next_pv, pv; 1390 vm_offset_t va; 1391 vm_page_t m; 1392 1393 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1394 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1395 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 1396 if (allocated_pv != NULL) { 1397 pv_entry_count++; 1398 if (pv_entry_count > pv_entry_high_water) 1399 pagedaemon_wakeup(); 1400 else 1401 return (allocated_pv); 1402 } 1403 /* 1404 * Reclaim pv entries: At first, destroy mappings to inactive 1405 * pages. After that, if a pv entry is still needed, destroy 1406 * mappings to active pages. 1407 */ 1408 if (ratecheck(&lastprint, &printinterval)) 1409 printf("Approaching the limit on PV entries, " 1410 "increase the vm.pmap.shpgperproc tunable.\n"); 1411 vpq = &vm_page_queues[PQ_INACTIVE]; 1412retry: 1413 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1414 if (m->hold_count || m->busy) 1415 continue; 1416 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1417 va = pv->pv_va; 1418 pmap = pv->pv_pmap; 1419 /* Avoid deadlock and lock recursion. */ 1420 if (pmap > locked_pmap) 1421 PMAP_LOCK(pmap); 1422 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1423 continue; 1424 pmap->pm_stats.resident_count--; 1425 pte = pmap_pte(pmap, va); 1426 KASSERT(pte != NULL, ("pte")); 1427 oldpte = *pte; 1428 if (is_kernel_pmap(pmap)) 1429 *pte = PTE_G; 1430 else 1431 *pte = 0; 1432 KASSERT(!pte_test(&oldpte, PTE_W), 1433 ("wired pte for unwired page")); 1434 if (m->md.pv_flags & PV_TABLE_REF) 1435 vm_page_flag_set(m, PG_REFERENCED); 1436 if (pte_test(&oldpte, PTE_D)) 1437 vm_page_dirty(m); 1438 pmap_invalidate_page(pmap, va); 1439 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1440 m->md.pv_list_count--; 1441 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1442 pmap_unuse_pt(pmap, va, pv->pv_ptem); 1443 if (pmap != locked_pmap) 1444 PMAP_UNLOCK(pmap); 1445 if (allocated_pv == NULL) 1446 allocated_pv = pv; 1447 else 1448 free_pv_entry(pv); 1449 } 1450 if (TAILQ_EMPTY(&m->md.pv_list)) { 1451 vm_page_flag_clear(m, PG_WRITEABLE); 1452 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1453 } 1454 } 1455 if (allocated_pv == NULL) { 1456 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 1457 vpq = &vm_page_queues[PQ_ACTIVE]; 1458 goto retry; 1459 } 1460 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 1461 } 1462 return (allocated_pv); 1463} 1464 1465/* 1466 * Revision 1.370 1467 * 1468 * Move pmap_collect() out of the machine-dependent code, rename it 1469 * to reflect its new location, and add page queue and flag locking. 1470 * 1471 * Notes: (1) alpha, i386, and ia64 had identical implementations 1472 * of pmap_collect() in terms of machine-independent interfaces; 1473 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. 1474 * 1475 * MIPS implementation was identical to alpha [Junos 8.2] 1476 */ 1477 1478/* 1479 * If it is the first entry on the list, it is actually 1480 * in the header and we must copy the following entry up 1481 * to the header. Otherwise we must search the list for 1482 * the entry. In either case we free the now unused entry. 1483 */ 1484 1485static pv_entry_t 1486pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1487{ 1488 pv_entry_t pv; 1489 1490 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1491 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1492 if (pvh->pv_list_count < pmap->pm_stats.resident_count) { 1493 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 1494 if (pmap == pv->pv_pmap && va == pv->pv_va) 1495 break; 1496 } 1497 } else { 1498 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1499 if (va == pv->pv_va) 1500 break; 1501 } 1502 } 1503 if (pv != NULL) { 1504 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 1505 pvh->pv_list_count--; 1506 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1507 } 1508 return (pv); 1509} 1510 1511static void 1512pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1513{ 1514 pv_entry_t pv; 1515 1516 pv = pmap_pvh_remove(pvh, pmap, va); 1517 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", 1518 (u_long)VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)), 1519 (u_long)va)); 1520 free_pv_entry(pv); 1521} 1522 1523static void 1524pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1525{ 1526 1527 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1528 pmap_pvh_free(&m->md, pmap, va); 1529 if (TAILQ_EMPTY(&m->md.pv_list)) 1530 vm_page_flag_clear(m, PG_WRITEABLE); 1531} 1532 1533/* 1534 * Conditionally create a pv entry. 1535 */ 1536static boolean_t 1537pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1538 vm_page_t m) 1539{ 1540 pv_entry_t pv; 1541 1542 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1543 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1544 if (pv_entry_count < pv_entry_high_water && 1545 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 1546 pv_entry_count++; 1547 pv->pv_va = va; 1548 pv->pv_pmap = pmap; 1549 pv->pv_ptem = mpte; 1550 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1551 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1552 m->md.pv_list_count++; 1553 return (TRUE); 1554 } else 1555 return (FALSE); 1556} 1557 1558/* 1559 * pmap_remove_pte: do the things to unmap a page in a process 1560 */ 1561static int 1562pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1563{ 1564 pt_entry_t oldpte; 1565 vm_page_t m; 1566 vm_offset_t pa; 1567 1568 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1569 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1570 1571 oldpte = *ptq; 1572 if (is_kernel_pmap(pmap)) 1573 *ptq = PTE_G; 1574 else 1575 *ptq = 0; 1576 1577 if (pte_test(&oldpte, PTE_W)) 1578 pmap->pm_stats.wired_count -= 1; 1579 1580 pmap->pm_stats.resident_count -= 1; 1581 pa = TLBLO_PTE_TO_PA(oldpte); 1582 1583 if (page_is_managed(pa)) { 1584 m = PHYS_TO_VM_PAGE(pa); 1585 if (pte_test(&oldpte, PTE_D)) { 1586 KASSERT(!pte_test(&oldpte, PTE_RO), 1587 ("%s: modified page not writable: va: %p, pte: 0x%x", 1588 __func__, (void *)va, oldpte)); 1589 vm_page_dirty(m); 1590 } 1591 if (m->md.pv_flags & PV_TABLE_REF) 1592 vm_page_flag_set(m, PG_REFERENCED); 1593 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1594 1595 pmap_remove_entry(pmap, m, va); 1596 } 1597 return (pmap_unuse_pt(pmap, va, NULL)); 1598} 1599 1600/* 1601 * Remove a single page from a process address space 1602 */ 1603static void 1604pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1605{ 1606 pt_entry_t *ptq; 1607 1608 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1609 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1610 ptq = pmap_pte(pmap, va); 1611 1612 /* 1613 * if there is no pte for this address, just skip it!!! 1614 */ 1615 if (!ptq || !pte_test(ptq, PTE_V)) { 1616 return; 1617 } 1618 1619 /* 1620 * Write back all caches from the page being destroyed 1621 */ 1622 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1623 1624 /* 1625 * get a local va for mappings for this pmap. 1626 */ 1627 (void)pmap_remove_pte(pmap, ptq, va); 1628 pmap_invalidate_page(pmap, va); 1629 1630 return; 1631} 1632 1633/* 1634 * Remove the given range of addresses from the specified map. 1635 * 1636 * It is assumed that the start and end are properly 1637 * rounded to the page size. 1638 */ 1639void 1640pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1641{ 1642 vm_offset_t va_next; 1643 pd_entry_t *pde, *pdpe; 1644 pt_entry_t *pte; 1645 1646 if (pmap == NULL) 1647 return; 1648 1649 if (pmap->pm_stats.resident_count == 0) 1650 return; 1651 1652 vm_page_lock_queues(); 1653 PMAP_LOCK(pmap); 1654 1655 /* 1656 * special handling of removing one page. a very common operation 1657 * and easy to short circuit some code. 1658 */ 1659 if ((sva + PAGE_SIZE) == eva) { 1660 pmap_remove_page(pmap, sva); 1661 goto out; 1662 } 1663 for (; sva < eva; sva = va_next) { 1664 pdpe = pmap_segmap(pmap, sva); 1665#ifdef __mips_n64 1666 if (*pdpe == 0) { 1667 va_next = (sva + NBSEG) & ~SEGMASK; 1668 if (va_next < sva) 1669 va_next = eva; 1670 continue; 1671 } 1672#endif 1673 va_next = (sva + NBPDR) & ~PDRMASK; 1674 if (va_next < sva) 1675 va_next = eva; 1676 1677 pde = pmap_pdpe_to_pde(pdpe, sva); 1678 if (*pde == 0) 1679 continue; 1680 if (va_next > eva) 1681 va_next = eva; 1682 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; 1683 pte++, sva += PAGE_SIZE) { 1684 pmap_remove_page(pmap, sva); 1685 } 1686 } 1687out: 1688 vm_page_unlock_queues(); 1689 PMAP_UNLOCK(pmap); 1690} 1691 1692/* 1693 * Routine: pmap_remove_all 1694 * Function: 1695 * Removes this physical page from 1696 * all physical maps in which it resides. 1697 * Reflects back modify bits to the pager. 1698 * 1699 * Notes: 1700 * Original versions of this routine were very 1701 * inefficient because they iteratively called 1702 * pmap_remove (slow...) 1703 */ 1704 1705void 1706pmap_remove_all(vm_page_t m) 1707{ 1708 pv_entry_t pv; 1709 pt_entry_t *pte, tpte; 1710 1711 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1712 ("pmap_remove_all: page %p is fictitious", m)); 1713 vm_page_lock_queues(); 1714 1715 if (m->md.pv_flags & PV_TABLE_REF) 1716 vm_page_flag_set(m, PG_REFERENCED); 1717 1718 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1719 PMAP_LOCK(pv->pv_pmap); 1720 1721 /* 1722 * If it's last mapping writeback all caches from 1723 * the page being destroyed 1724 */ 1725 if (m->md.pv_list_count == 1) 1726 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1727 1728 pv->pv_pmap->pm_stats.resident_count--; 1729 1730 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1731 1732 tpte = *pte; 1733 if (is_kernel_pmap(pv->pv_pmap)) 1734 *pte = PTE_G; 1735 else 1736 *pte = 0; 1737 1738 if (pte_test(&tpte, PTE_W)) 1739 pv->pv_pmap->pm_stats.wired_count--; 1740 1741 /* 1742 * Update the vm_page_t clean and reference bits. 1743 */ 1744 if (pte_test(&tpte, PTE_D)) { 1745 KASSERT(!pte_test(&tpte, PTE_RO), 1746 ("%s: modified page not writable: va: %p, pte: 0x%x", 1747 __func__, (void *)pv->pv_va, tpte)); 1748 vm_page_dirty(m); 1749 } 1750 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1751 1752 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1753 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1754 m->md.pv_list_count--; 1755 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1756 PMAP_UNLOCK(pv->pv_pmap); 1757 free_pv_entry(pv); 1758 } 1759 1760 vm_page_flag_clear(m, PG_WRITEABLE); 1761 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1762 vm_page_unlock_queues(); 1763} 1764 1765/* 1766 * Set the physical protection on the 1767 * specified range of this map as requested. 1768 */ 1769void 1770pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1771{ 1772 pt_entry_t *pte; 1773 pd_entry_t *pde, *pdpe; 1774 vm_offset_t va_next; 1775 1776 if (pmap == NULL) 1777 return; 1778 1779 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1780 pmap_remove(pmap, sva, eva); 1781 return; 1782 } 1783 if (prot & VM_PROT_WRITE) 1784 return; 1785 1786 vm_page_lock_queues(); 1787 PMAP_LOCK(pmap); 1788 for (; sva < eva; sva = va_next) { 1789 pt_entry_t pbits; 1790 vm_page_t m; 1791 vm_paddr_t pa; 1792 1793 pdpe = pmap_segmap(pmap, sva); 1794#ifdef __mips_n64 1795 if (*pdpe == 0) { 1796 va_next = (sva + NBSEG) & ~SEGMASK; 1797 if (va_next < sva) 1798 va_next = eva; 1799 continue; 1800 } 1801#endif 1802 va_next = (sva + NBPDR) & ~PDRMASK; 1803 if (va_next < sva) 1804 va_next = eva; 1805 1806 pde = pmap_pdpe_to_pde(pdpe, sva); 1807 if (pde == NULL || *pde == NULL) 1808 continue; 1809 if (va_next > eva) 1810 va_next = eva; 1811 1812 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1813 sva += PAGE_SIZE) { 1814 1815 /* Skip invalid PTEs */ 1816 if (!pte_test(pte, PTE_V)) 1817 continue; 1818 pbits = *pte; 1819 pa = TLBLO_PTE_TO_PA(pbits); 1820 if (page_is_managed(pa) && pte_test(&pbits, PTE_D)) { 1821 m = PHYS_TO_VM_PAGE(pa); 1822 vm_page_dirty(m); 1823 m->md.pv_flags &= ~PV_TABLE_MOD; 1824 } 1825 pte_clear(&pbits, PTE_D); 1826 pte_set(&pbits, PTE_RO); 1827 1828 if (pbits != *pte) { 1829 *pte = pbits; 1830 pmap_update_page(pmap, sva, pbits); 1831 } 1832 } 1833 } 1834 vm_page_unlock_queues(); 1835 PMAP_UNLOCK(pmap); 1836} 1837 1838/* 1839 * Insert the given physical page (p) at 1840 * the specified virtual address (v) in the 1841 * target physical map with the protection requested. 1842 * 1843 * If specified, the page will be wired down, meaning 1844 * that the related pte can not be reclaimed. 1845 * 1846 * NB: This is the only routine which MAY NOT lazy-evaluate 1847 * or lose information. That is, this routine must actually 1848 * insert this page into the given map NOW. 1849 */ 1850void 1851pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1852 vm_prot_t prot, boolean_t wired) 1853{ 1854 vm_offset_t pa, opa; 1855 pt_entry_t *pte; 1856 pt_entry_t origpte, newpte; 1857 pv_entry_t pv; 1858 vm_page_t mpte, om; 1859 int rw = 0; 1860 1861 if (pmap == NULL) 1862 return; 1863 1864 va &= ~PAGE_MASK; 1865 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1866 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1867 (m->oflags & VPO_BUSY) != 0, 1868 ("pmap_enter: page %p is not busy", m)); 1869 1870 mpte = NULL; 1871 1872 vm_page_lock_queues(); 1873 PMAP_LOCK(pmap); 1874 1875 /* 1876 * In the case that a page table page is not resident, we are 1877 * creating it here. 1878 */ 1879 if (va < VM_MAXUSER_ADDRESS) { 1880 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1881 } 1882 pte = pmap_pte(pmap, va); 1883 1884 /* 1885 * Page Directory table entry not valid, we need a new PT page 1886 */ 1887 if (pte == NULL) { 1888 panic("pmap_enter: invalid page directory, pdir=%p, va=%p", 1889 (void *)pmap->pm_segtab, (void *)va); 1890 } 1891 pa = VM_PAGE_TO_PHYS(m); 1892 om = NULL; 1893 origpte = *pte; 1894 opa = TLBLO_PTE_TO_PA(origpte); 1895 1896 /* 1897 * Mapping has not changed, must be protection or wiring change. 1898 */ 1899 if (pte_test(&origpte, PTE_V) && opa == pa) { 1900 /* 1901 * Wiring change, just update stats. We don't worry about 1902 * wiring PT pages as they remain resident as long as there 1903 * are valid mappings in them. Hence, if a user page is 1904 * wired, the PT page will be also. 1905 */ 1906 if (wired && !pte_test(&origpte, PTE_W)) 1907 pmap->pm_stats.wired_count++; 1908 else if (!wired && pte_test(&origpte, PTE_W)) 1909 pmap->pm_stats.wired_count--; 1910 1911 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO), 1912 ("%s: modified page not writable: va: %p, pte: 0x%x", 1913 __func__, (void *)va, origpte)); 1914 1915 /* 1916 * Remove extra pte reference 1917 */ 1918 if (mpte) 1919 mpte->wire_count--; 1920 1921 if (page_is_managed(opa)) { 1922 om = m; 1923 } 1924 goto validate; 1925 } 1926 1927 pv = NULL; 1928 1929 /* 1930 * Mapping has changed, invalidate old range and fall through to 1931 * handle validating new mapping. 1932 */ 1933 if (opa) { 1934 if (pte_test(&origpte, PTE_W)) 1935 pmap->pm_stats.wired_count--; 1936 1937 if (page_is_managed(opa)) { 1938 om = PHYS_TO_VM_PAGE(opa); 1939 pv = pmap_pvh_remove(&om->md, pmap, va); 1940 } 1941 if (mpte != NULL) { 1942 mpte->wire_count--; 1943 KASSERT(mpte->wire_count > 0, 1944 ("pmap_enter: missing reference to page table page," 1945 " va: %p", (void *)va)); 1946 } 1947 } else 1948 pmap->pm_stats.resident_count++; 1949 1950 /* 1951 * Enter on the PV list if part of our managed memory. Note that we 1952 * raise IPL while manipulating pv_table since pmap_enter can be 1953 * called at interrupt time. 1954 */ 1955 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1956 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1957 ("pmap_enter: managed mapping within the clean submap")); 1958 if (pv == NULL) 1959 pv = get_pv_entry(pmap); 1960 pv->pv_va = va; 1961 pv->pv_pmap = pmap; 1962 pv->pv_ptem = mpte; 1963 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1964 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1965 m->md.pv_list_count++; 1966 } else if (pv != NULL) 1967 free_pv_entry(pv); 1968 1969 /* 1970 * Increment counters 1971 */ 1972 if (wired) 1973 pmap->pm_stats.wired_count++; 1974 1975validate: 1976 if ((access & VM_PROT_WRITE) != 0) 1977 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; 1978 rw = init_pte_prot(va, m, prot); 1979 1980#ifdef PMAP_DEBUG 1981 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 1982#endif 1983 /* 1984 * Now validate mapping with desired protection/wiring. 1985 */ 1986 newpte = TLBLO_PA_TO_PFN(pa) | rw | PTE_V; 1987 1988 if (is_cacheable_mem(pa)) 1989 newpte |= PTE_C_CACHE; 1990 else 1991 newpte |= PTE_C_UNCACHED; 1992 1993 if (wired) 1994 newpte |= PTE_W; 1995 1996 if (is_kernel_pmap(pmap)) 1997 newpte |= PTE_G; 1998 1999 /* 2000 * if the mapping or permission bits are different, we need to 2001 * update the pte. 2002 */ 2003 if (origpte != newpte) { 2004 if (pte_test(&origpte, PTE_V)) { 2005 *pte = newpte; 2006 if (page_is_managed(opa) && (opa != pa)) { 2007 if (om->md.pv_flags & PV_TABLE_REF) 2008 vm_page_flag_set(om, PG_REFERENCED); 2009 om->md.pv_flags &= 2010 ~(PV_TABLE_REF | PV_TABLE_MOD); 2011 } 2012 if (pte_test(&origpte, PTE_D)) { 2013 KASSERT(!pte_test(&origpte, PTE_RO), 2014 ("pmap_enter: modified page not writable:" 2015 " va: %p, pte: 0x%x", (void *)va, origpte)); 2016 if (page_is_managed(opa)) 2017 vm_page_dirty(om); 2018 } 2019 if (page_is_managed(opa) && 2020 TAILQ_EMPTY(&om->md.pv_list)) 2021 vm_page_flag_clear(om, PG_WRITEABLE); 2022 } else { 2023 *pte = newpte; 2024 } 2025 } 2026 pmap_update_page(pmap, va, newpte); 2027 2028 /* 2029 * Sync I & D caches for executable pages. Do this only if the the 2030 * target pmap belongs to the current process. Otherwise, an 2031 * unresolvable TLB miss may occur. 2032 */ 2033 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 2034 (prot & VM_PROT_EXECUTE)) { 2035 mips_icache_sync_range(va, PAGE_SIZE); 2036 mips_dcache_wbinv_range(va, PAGE_SIZE); 2037 } 2038 vm_page_unlock_queues(); 2039 PMAP_UNLOCK(pmap); 2040} 2041 2042/* 2043 * this code makes some *MAJOR* assumptions: 2044 * 1. Current pmap & pmap exists. 2045 * 2. Not wired. 2046 * 3. Read access. 2047 * 4. No page table pages. 2048 * but is *MUCH* faster than pmap_enter... 2049 */ 2050 2051void 2052pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2053{ 2054 2055 vm_page_lock_queues(); 2056 PMAP_LOCK(pmap); 2057 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2058 vm_page_unlock_queues(); 2059 PMAP_UNLOCK(pmap); 2060} 2061 2062static vm_page_t 2063pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2064 vm_prot_t prot, vm_page_t mpte) 2065{ 2066 pt_entry_t *pte; 2067 vm_offset_t pa; 2068 2069 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2070 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 2071 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2072 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2073 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2074 2075 /* 2076 * In the case that a page table page is not resident, we are 2077 * creating it here. 2078 */ 2079 if (va < VM_MAXUSER_ADDRESS) { 2080 pd_entry_t *pde; 2081 unsigned ptepindex; 2082 2083 /* 2084 * Calculate pagetable page index 2085 */ 2086 ptepindex = pmap_pde_pindex(va); 2087 if (mpte && (mpte->pindex == ptepindex)) { 2088 mpte->wire_count++; 2089 } else { 2090 /* 2091 * Get the page directory entry 2092 */ 2093 pde = pmap_pde(pmap, va); 2094 2095 /* 2096 * If the page table page is mapped, we just 2097 * increment the hold count, and activate it. 2098 */ 2099 if (pde && *pde != 0) { 2100 if (pmap->pm_ptphint && 2101 (pmap->pm_ptphint->pindex == ptepindex)) { 2102 mpte = pmap->pm_ptphint; 2103 } else { 2104 mpte = PHYS_TO_VM_PAGE( 2105 MIPS_DIRECT_TO_PHYS(*pde)); 2106 pmap->pm_ptphint = mpte; 2107 } 2108 mpte->wire_count++; 2109 } else { 2110 mpte = _pmap_allocpte(pmap, ptepindex, 2111 M_NOWAIT); 2112 if (mpte == NULL) 2113 return (mpte); 2114 } 2115 } 2116 } else { 2117 mpte = NULL; 2118 } 2119 2120 pte = pmap_pte(pmap, va); 2121 if (pte_test(pte, PTE_V)) { 2122 if (mpte != NULL) { 2123 mpte->wire_count--; 2124 mpte = NULL; 2125 } 2126 return (mpte); 2127 } 2128 2129 /* 2130 * Enter on the PV list if part of our managed memory. 2131 */ 2132 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2133 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2134 if (mpte != NULL) { 2135 pmap_unwire_pte_hold(pmap, va, mpte); 2136 mpte = NULL; 2137 } 2138 return (mpte); 2139 } 2140 2141 /* 2142 * Increment counters 2143 */ 2144 pmap->pm_stats.resident_count++; 2145 2146 pa = VM_PAGE_TO_PHYS(m); 2147 2148 /* 2149 * Now validate mapping with RO protection 2150 */ 2151 *pte = TLBLO_PA_TO_PFN(pa) | PTE_V; 2152 2153 if (is_cacheable_mem(pa)) 2154 *pte |= PTE_C_CACHE; 2155 else 2156 *pte |= PTE_C_UNCACHED; 2157 2158 if (is_kernel_pmap(pmap)) 2159 *pte |= PTE_G; 2160 else { 2161 *pte |= PTE_RO; 2162 /* 2163 * Sync I & D caches. Do this only if the the target pmap 2164 * belongs to the current process. Otherwise, an 2165 * unresolvable TLB miss may occur. */ 2166 if (pmap == &curproc->p_vmspace->vm_pmap) { 2167 va &= ~PAGE_MASK; 2168 mips_icache_sync_range(va, PAGE_SIZE); 2169 mips_dcache_wbinv_range(va, PAGE_SIZE); 2170 } 2171 } 2172 return (mpte); 2173} 2174 2175/* 2176 * Make a temporary mapping for a physical address. This is only intended 2177 * to be used for panic dumps. 2178 * 2179 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2180 */ 2181void * 2182pmap_kenter_temporary(vm_paddr_t pa, int i) 2183{ 2184 vm_offset_t va; 2185 2186 if (i != 0) 2187 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2188 __func__); 2189 2190 if (MIPS_DIRECT_MAPPABLE(pa)) { 2191 va = MIPS_PHYS_TO_DIRECT(pa); 2192 } else { 2193#ifndef __mips_n64 /* XXX : to be converted to new style */ 2194 int cpu; 2195 register_t intr; 2196 struct local_sysmaps *sysm; 2197 pt_entry_t *pte, npte; 2198 2199 /* If this is used other than for dumps, we may need to leave 2200 * interrupts disasbled on return. If crash dumps don't work when 2201 * we get to this point, we might want to consider this (leaving things 2202 * disabled as a starting point ;-) 2203 */ 2204 intr = intr_disable(); 2205 cpu = PCPU_GET(cpuid); 2206 sysm = &sysmap_lmem[cpu]; 2207 /* Since this is for the debugger, no locks or any other fun */ 2208 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE; 2209 pte = pmap_pte(kernel_pmap, sysm->base); 2210 *pte = npte; 2211 sysm->valid1 = 1; 2212 pmap_update_page(kernel_pmap, sysm->base, npte); 2213 va = sysm->base; 2214 intr_restore(intr); 2215#endif 2216 } 2217 return ((void *)va); 2218} 2219 2220void 2221pmap_kenter_temporary_free(vm_paddr_t pa) 2222{ 2223#ifndef __mips_n64 /* XXX : to be converted to new style */ 2224 int cpu; 2225 register_t intr; 2226 struct local_sysmaps *sysm; 2227#endif 2228 2229 if (MIPS_DIRECT_MAPPABLE(pa)) { 2230 /* nothing to do for this case */ 2231 return; 2232 } 2233#ifndef __mips_n64 /* XXX : to be converted to new style */ 2234 cpu = PCPU_GET(cpuid); 2235 sysm = &sysmap_lmem[cpu]; 2236 if (sysm->valid1) { 2237 pt_entry_t *pte; 2238 2239 intr = intr_disable(); 2240 pte = pmap_pte(kernel_pmap, sysm->base); 2241 *pte = PTE_G; 2242 pmap_invalidate_page(kernel_pmap, sysm->base); 2243 intr_restore(intr); 2244 sysm->valid1 = 0; 2245 } 2246#endif 2247} 2248 2249/* 2250 * Moved the code to Machine Independent 2251 * vm_map_pmap_enter() 2252 */ 2253 2254/* 2255 * Maps a sequence of resident pages belonging to the same object. 2256 * The sequence begins with the given page m_start. This page is 2257 * mapped at the given virtual address start. Each subsequent page is 2258 * mapped at a virtual address that is offset from start by the same 2259 * amount as the page is offset from m_start within the object. The 2260 * last page in the sequence is the page with the largest offset from 2261 * m_start that can be mapped at a virtual address less than the given 2262 * virtual address end. Not every virtual page between start and end 2263 * is mapped; only those for which a resident page exists with the 2264 * corresponding offset from m_start are mapped. 2265 */ 2266void 2267pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2268 vm_page_t m_start, vm_prot_t prot) 2269{ 2270 vm_page_t m, mpte; 2271 vm_pindex_t diff, psize; 2272 2273 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2274 psize = atop(end - start); 2275 mpte = NULL; 2276 m = m_start; 2277 vm_page_lock_queues(); 2278 PMAP_LOCK(pmap); 2279 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2280 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2281 prot, mpte); 2282 m = TAILQ_NEXT(m, listq); 2283 } 2284 vm_page_unlock_queues(); 2285 PMAP_UNLOCK(pmap); 2286} 2287 2288/* 2289 * pmap_object_init_pt preloads the ptes for a given object 2290 * into the specified pmap. This eliminates the blast of soft 2291 * faults on process startup and immediately after an mmap. 2292 */ 2293void 2294pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2295 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2296{ 2297 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2298 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2299 ("pmap_object_init_pt: non-device object")); 2300} 2301 2302/* 2303 * Routine: pmap_change_wiring 2304 * Function: Change the wiring attribute for a map/virtual-address 2305 * pair. 2306 * In/out conditions: 2307 * The mapping must already exist in the pmap. 2308 */ 2309void 2310pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2311{ 2312 pt_entry_t *pte; 2313 2314 if (pmap == NULL) 2315 return; 2316 2317 PMAP_LOCK(pmap); 2318 pte = pmap_pte(pmap, va); 2319 2320 if (wired && !pte_test(pte, PTE_W)) 2321 pmap->pm_stats.wired_count++; 2322 else if (!wired && pte_test(pte, PTE_W)) 2323 pmap->pm_stats.wired_count--; 2324 2325 /* 2326 * Wiring is not a hardware characteristic so there is no need to 2327 * invalidate TLB. 2328 */ 2329 if (wired) 2330 pte_set(pte, PTE_W); 2331 else 2332 pte_clear(pte, PTE_W); 2333 PMAP_UNLOCK(pmap); 2334} 2335 2336/* 2337 * Copy the range specified by src_addr/len 2338 * from the source map to the range dst_addr/len 2339 * in the destination map. 2340 * 2341 * This routine is only advisory and need not do anything. 2342 */ 2343 2344void 2345pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2346 vm_size_t len, vm_offset_t src_addr) 2347{ 2348} 2349 2350/* 2351 * pmap_zero_page zeros the specified hardware page by mapping 2352 * the page into KVM and using bzero to clear its contents. 2353 * 2354 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2355 */ 2356void 2357pmap_zero_page(vm_page_t m) 2358{ 2359 vm_offset_t va; 2360 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2361 2362 if (MIPS_DIRECT_MAPPABLE(phys)) { 2363 va = MIPS_PHYS_TO_DIRECT(phys); 2364 bzero((caddr_t)va, PAGE_SIZE); 2365 mips_dcache_wbinv_range(va, PAGE_SIZE); 2366 } else { 2367 va = pmap_lmem_map1(phys); 2368 bzero((caddr_t)va, PAGE_SIZE); 2369 mips_dcache_wbinv_range(va, PAGE_SIZE); 2370 pmap_lmem_unmap(); 2371 } 2372} 2373 2374/* 2375 * pmap_zero_page_area zeros the specified hardware page by mapping 2376 * the page into KVM and using bzero to clear its contents. 2377 * 2378 * off and size may not cover an area beyond a single hardware page. 2379 */ 2380void 2381pmap_zero_page_area(vm_page_t m, int off, int size) 2382{ 2383 vm_offset_t va; 2384 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2385 2386 if (MIPS_DIRECT_MAPPABLE(phys)) { 2387 va = MIPS_PHYS_TO_DIRECT(phys); 2388 bzero((char *)(caddr_t)va + off, size); 2389 mips_dcache_wbinv_range(va + off, size); 2390 } else { 2391 va = pmap_lmem_map1(phys); 2392 bzero((char *)va + off, size); 2393 mips_dcache_wbinv_range(va + off, size); 2394 pmap_lmem_unmap(); 2395 } 2396} 2397 2398void 2399pmap_zero_page_idle(vm_page_t m) 2400{ 2401 vm_offset_t va; 2402 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2403 2404 if (MIPS_DIRECT_MAPPABLE(phys)) { 2405 va = MIPS_PHYS_TO_DIRECT(phys); 2406 bzero((caddr_t)va, PAGE_SIZE); 2407 mips_dcache_wbinv_range(va, PAGE_SIZE); 2408 } else { 2409 va = pmap_lmem_map1(phys); 2410 bzero((caddr_t)va, PAGE_SIZE); 2411 mips_dcache_wbinv_range(va, PAGE_SIZE); 2412 pmap_lmem_unmap(); 2413 } 2414} 2415 2416/* 2417 * pmap_copy_page copies the specified (machine independent) 2418 * page by mapping the page into virtual memory and using 2419 * bcopy to copy the page, one machine dependent page at a 2420 * time. 2421 * 2422 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2423 */ 2424void 2425pmap_copy_page(vm_page_t src, vm_page_t dst) 2426{ 2427 vm_offset_t va_src, va_dst; 2428 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src); 2429 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst); 2430 2431 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) { 2432 /* easy case, all can be accessed via KSEG0 */ 2433 /* 2434 * Flush all caches for VA that are mapped to this page 2435 * to make sure that data in SDRAM is up to date 2436 */ 2437 pmap_flush_pvcache(src); 2438 mips_dcache_wbinv_range_index( 2439 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE); 2440 va_src = MIPS_PHYS_TO_DIRECT(phys_src); 2441 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst); 2442 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2443 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2444 } else { 2445 va_src = pmap_lmem_map2(phys_src, phys_dst); 2446 va_dst = va_src + PAGE_SIZE; 2447 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2448 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2449 pmap_lmem_unmap(); 2450 } 2451} 2452 2453/* 2454 * Returns true if the pmap's pv is one of the first 2455 * 16 pvs linked to from this page. This count may 2456 * be changed upwards or downwards in the future; it 2457 * is only necessary that true be returned for a small 2458 * subset of pmaps for proper page aging. 2459 */ 2460boolean_t 2461pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2462{ 2463 pv_entry_t pv; 2464 int loops = 0; 2465 boolean_t rv; 2466 2467 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2468 ("pmap_page_exists_quick: page %p is not managed", m)); 2469 rv = FALSE; 2470 vm_page_lock_queues(); 2471 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2472 if (pv->pv_pmap == pmap) { 2473 rv = TRUE; 2474 break; 2475 } 2476 loops++; 2477 if (loops >= 16) 2478 break; 2479 } 2480 vm_page_unlock_queues(); 2481 return (rv); 2482} 2483 2484/* 2485 * Remove all pages from specified address space 2486 * this aids process exit speeds. Also, this code 2487 * is special cased for current process only, but 2488 * can have the more generic (and slightly slower) 2489 * mode enabled. This is much faster than pmap_remove 2490 * in the case of running down an entire address space. 2491 */ 2492void 2493pmap_remove_pages(pmap_t pmap) 2494{ 2495 pt_entry_t *pte, tpte; 2496 pv_entry_t pv, npv; 2497 vm_page_t m; 2498 2499 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2500 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2501 return; 2502 } 2503 vm_page_lock_queues(); 2504 PMAP_LOCK(pmap); 2505 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv != NULL; pv = npv) { 2506 2507 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2508 if (!pte_test(pte, PTE_V)) 2509 panic("pmap_remove_pages: page on pm_pvlist has no pte"); 2510 tpte = *pte; 2511 2512/* 2513 * We cannot remove wired pages from a process' mapping at this time 2514 */ 2515 if (pte_test(&tpte, PTE_W)) { 2516 npv = TAILQ_NEXT(pv, pv_plist); 2517 continue; 2518 } 2519 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2520 2521 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); 2522 KASSERT(m != NULL, 2523 ("pmap_remove_pages: bad tpte %x", tpte)); 2524 2525 pv->pv_pmap->pm_stats.resident_count--; 2526 2527 /* 2528 * Update the vm_page_t clean and reference bits. 2529 */ 2530 if (pte_test(&tpte, PTE_D)) { 2531 vm_page_dirty(m); 2532 } 2533 npv = TAILQ_NEXT(pv, pv_plist); 2534 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2535 2536 m->md.pv_list_count--; 2537 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2538 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2539 vm_page_flag_clear(m, PG_WRITEABLE); 2540 } 2541 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2542 free_pv_entry(pv); 2543 } 2544 pmap_invalidate_all(pmap); 2545 PMAP_UNLOCK(pmap); 2546 vm_page_unlock_queues(); 2547} 2548 2549/* 2550 * pmap_testbit tests bits in pte's 2551 * note that the testbit/changebit routines are inline, 2552 * and a lot of things compile-time evaluate. 2553 */ 2554static boolean_t 2555pmap_testbit(vm_page_t m, int bit) 2556{ 2557 pv_entry_t pv; 2558 pt_entry_t *pte; 2559 boolean_t rv = FALSE; 2560 2561 if (m->flags & PG_FICTITIOUS) 2562 return (rv); 2563 2564 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2565 return (rv); 2566 2567 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2568 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2569 PMAP_LOCK(pv->pv_pmap); 2570 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2571 rv = pte_test(pte, bit); 2572 PMAP_UNLOCK(pv->pv_pmap); 2573 if (rv) 2574 break; 2575 } 2576 return (rv); 2577} 2578 2579/* 2580 * this routine is used to clear dirty bits in ptes 2581 */ 2582static __inline void 2583pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2584{ 2585 pv_entry_t pv; 2586 pt_entry_t *pte; 2587 2588 if (m->flags & PG_FICTITIOUS) 2589 return; 2590 2591 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2592 /* 2593 * Loop over all current mappings setting/clearing as appropos If 2594 * setting RO do we need to clear the VAC? 2595 */ 2596 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2597 PMAP_LOCK(pv->pv_pmap); 2598 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2599 if (setem) { 2600 *pte |= bit; 2601 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2602 } else { 2603 pt_entry_t pbits = *pte; 2604 2605 if (pbits & bit) { 2606 if (bit == PTE_D) { 2607 if (pbits & PTE_D) 2608 vm_page_dirty(m); 2609 *pte = (pbits & ~PTE_D) | PTE_RO; 2610 } else { 2611 *pte = pbits & ~bit; 2612 } 2613 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2614 } 2615 } 2616 PMAP_UNLOCK(pv->pv_pmap); 2617 } 2618 if (!setem && bit == PTE_D) 2619 vm_page_flag_clear(m, PG_WRITEABLE); 2620} 2621 2622/* 2623 * pmap_page_wired_mappings: 2624 * 2625 * Return the number of managed mappings to the given physical page 2626 * that are wired. 2627 */ 2628int 2629pmap_page_wired_mappings(vm_page_t m) 2630{ 2631 pv_entry_t pv; 2632 pmap_t pmap; 2633 pt_entry_t *pte; 2634 int count; 2635 2636 count = 0; 2637 if ((m->flags & PG_FICTITIOUS) != 0) 2638 return (count); 2639 vm_page_lock_queues(); 2640 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2641 pmap = pv->pv_pmap; 2642 PMAP_LOCK(pmap); 2643 pte = pmap_pte(pmap, pv->pv_va); 2644 if (pte_test(pte, PTE_W)) 2645 count++; 2646 PMAP_UNLOCK(pmap); 2647 } 2648 vm_page_unlock_queues(); 2649 return (count); 2650} 2651 2652/* 2653 * Clear the write and modified bits in each of the given page's mappings. 2654 */ 2655void 2656pmap_remove_write(vm_page_t m) 2657{ 2658 pv_entry_t pv, npv; 2659 vm_offset_t va; 2660 pt_entry_t *pte; 2661 2662 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2663 ("pmap_remove_write: page %p is not managed", m)); 2664 2665 /* 2666 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 2667 * another thread while the object is locked. Thus, if PG_WRITEABLE 2668 * is clear, no page table entries need updating. 2669 */ 2670 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2671 if ((m->oflags & VPO_BUSY) == 0 && 2672 (m->flags & PG_WRITEABLE) == 0) 2673 return; 2674 2675 /* 2676 * Loop over all current mappings setting/clearing as appropos. 2677 */ 2678 vm_page_lock_queues(); 2679 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { 2680 npv = TAILQ_NEXT(pv, pv_plist); 2681 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2682 if (pte == NULL || !pte_test(pte, PTE_V)) 2683 panic("page on pm_pvlist has no pte"); 2684 2685 va = pv->pv_va; 2686 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 2687 VM_PROT_READ | VM_PROT_EXECUTE); 2688 } 2689 vm_page_flag_clear(m, PG_WRITEABLE); 2690 vm_page_unlock_queues(); 2691} 2692 2693/* 2694 * pmap_ts_referenced: 2695 * 2696 * Return the count of reference bits for a page, clearing all of them. 2697 */ 2698int 2699pmap_ts_referenced(vm_page_t m) 2700{ 2701 2702 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2703 ("pmap_ts_referenced: page %p is not managed", m)); 2704 if (m->md.pv_flags & PV_TABLE_REF) { 2705 vm_page_lock_queues(); 2706 m->md.pv_flags &= ~PV_TABLE_REF; 2707 vm_page_unlock_queues(); 2708 return (1); 2709 } 2710 return (0); 2711} 2712 2713/* 2714 * pmap_is_modified: 2715 * 2716 * Return whether or not the specified physical page was modified 2717 * in any physical maps. 2718 */ 2719boolean_t 2720pmap_is_modified(vm_page_t m) 2721{ 2722 boolean_t rv; 2723 2724 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2725 ("pmap_is_modified: page %p is not managed", m)); 2726 2727 /* 2728 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 2729 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 2730 * is clear, no PTEs can have PTE_D set. 2731 */ 2732 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2733 if ((m->oflags & VPO_BUSY) == 0 && 2734 (m->flags & PG_WRITEABLE) == 0) 2735 return (FALSE); 2736 vm_page_lock_queues(); 2737 if (m->md.pv_flags & PV_TABLE_MOD) 2738 rv = TRUE; 2739 else 2740 rv = pmap_testbit(m, PTE_D); 2741 vm_page_unlock_queues(); 2742 return (rv); 2743} 2744 2745/* N/C */ 2746 2747/* 2748 * pmap_is_prefaultable: 2749 * 2750 * Return whether or not the specified virtual address is elgible 2751 * for prefault. 2752 */ 2753boolean_t 2754pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2755{ 2756 pd_entry_t *pde; 2757 pt_entry_t *pte; 2758 boolean_t rv; 2759 2760 rv = FALSE; 2761 PMAP_LOCK(pmap); 2762 pde = pmap_pde(pmap, addr); 2763 if (pde != NULL && *pde != 0) { 2764 pte = pmap_pde_to_pte(pde, addr); 2765 rv = (*pte == 0); 2766 } 2767 PMAP_UNLOCK(pmap); 2768 return (rv); 2769} 2770 2771/* 2772 * Clear the modify bits on the specified physical page. 2773 */ 2774void 2775pmap_clear_modify(vm_page_t m) 2776{ 2777 2778 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2779 ("pmap_clear_modify: page %p is not managed", m)); 2780 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2781 KASSERT((m->oflags & VPO_BUSY) == 0, 2782 ("pmap_clear_modify: page %p is busy", m)); 2783 2784 /* 2785 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_D set. 2786 * If the object containing the page is locked and the page is not 2787 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 2788 */ 2789 if ((m->flags & PG_WRITEABLE) == 0) 2790 return; 2791 vm_page_lock_queues(); 2792 if (m->md.pv_flags & PV_TABLE_MOD) { 2793 pmap_changebit(m, PTE_D, FALSE); 2794 m->md.pv_flags &= ~PV_TABLE_MOD; 2795 } 2796 vm_page_unlock_queues(); 2797} 2798 2799/* 2800 * pmap_is_referenced: 2801 * 2802 * Return whether or not the specified physical page was referenced 2803 * in any physical maps. 2804 */ 2805boolean_t 2806pmap_is_referenced(vm_page_t m) 2807{ 2808 2809 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2810 ("pmap_is_referenced: page %p is not managed", m)); 2811 return ((m->md.pv_flags & PV_TABLE_REF) != 0); 2812} 2813 2814/* 2815 * pmap_clear_reference: 2816 * 2817 * Clear the reference bit on the specified physical page. 2818 */ 2819void 2820pmap_clear_reference(vm_page_t m) 2821{ 2822 2823 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2824 ("pmap_clear_reference: page %p is not managed", m)); 2825 vm_page_lock_queues(); 2826 if (m->md.pv_flags & PV_TABLE_REF) { 2827 m->md.pv_flags &= ~PV_TABLE_REF; 2828 } 2829 vm_page_unlock_queues(); 2830} 2831 2832/* 2833 * Miscellaneous support routines follow 2834 */ 2835 2836/* 2837 * Map a set of physical memory pages into the kernel virtual 2838 * address space. Return a pointer to where it is mapped. This 2839 * routine is intended to be used for mapping device memory, 2840 * NOT real memory. 2841 */ 2842 2843/* 2844 * Map a set of physical memory pages into the kernel virtual 2845 * address space. Return a pointer to where it is mapped. This 2846 * routine is intended to be used for mapping device memory, 2847 * NOT real memory. 2848 * 2849 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. 2850 */ 2851void * 2852pmap_mapdev(vm_offset_t pa, vm_size_t size) 2853{ 2854 vm_offset_t va, tmpva, offset; 2855 2856 /* 2857 * KSEG1 maps only first 512M of phys address space. For 2858 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2859 */ 2860 if (MIPS_DIRECT_MAPPABLE(pa + size - 1)) 2861 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa)); 2862 else { 2863 offset = pa & PAGE_MASK; 2864 size = roundup(size + offset, PAGE_SIZE); 2865 2866 va = kmem_alloc_nofault(kernel_map, size); 2867 if (!va) 2868 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2869 pa = trunc_page(pa); 2870 for (tmpva = va; size > 0;) { 2871 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED); 2872 size -= PAGE_SIZE; 2873 tmpva += PAGE_SIZE; 2874 pa += PAGE_SIZE; 2875 } 2876 } 2877 2878 return ((void *)(va + offset)); 2879} 2880 2881void 2882pmap_unmapdev(vm_offset_t va, vm_size_t size) 2883{ 2884#ifndef __mips_n64 2885 vm_offset_t base, offset, tmpva; 2886 2887 /* If the address is within KSEG1 then there is nothing to do */ 2888 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 2889 return; 2890 2891 base = trunc_page(va); 2892 offset = va & PAGE_MASK; 2893 size = roundup(size + offset, PAGE_SIZE); 2894 for (tmpva = base; tmpva < base + size; tmpva += PAGE_SIZE) 2895 pmap_kremove(tmpva); 2896 kmem_free(kernel_map, base, size); 2897#endif 2898} 2899 2900/* 2901 * perform the pmap work for mincore 2902 */ 2903int 2904pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2905{ 2906 pt_entry_t *ptep, pte; 2907 vm_offset_t pa; 2908 vm_page_t m; 2909 int val; 2910 boolean_t managed; 2911 2912 PMAP_LOCK(pmap); 2913retry: 2914 ptep = pmap_pte(pmap, addr); 2915 pte = (ptep != NULL) ? *ptep : 0; 2916 if (!pte_test(&pte, PTE_V)) { 2917 val = 0; 2918 goto out; 2919 } 2920 val = MINCORE_INCORE; 2921 if (pte_test(&pte, PTE_D)) 2922 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2923 pa = TLBLO_PTE_TO_PA(pte); 2924 managed = page_is_managed(pa); 2925 if (managed) { 2926 /* 2927 * This may falsely report the given address as 2928 * MINCORE_REFERENCED. Unfortunately, due to the lack of 2929 * per-PTE reference information, it is impossible to 2930 * determine if the address is MINCORE_REFERENCED. 2931 */ 2932 m = PHYS_TO_VM_PAGE(pa); 2933 if ((m->flags & PG_REFERENCED) != 0) 2934 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2935 } 2936 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 2937 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 2938 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 2939 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 2940 goto retry; 2941 } else 2942out: 2943 PA_UNLOCK_COND(*locked_pa); 2944 PMAP_UNLOCK(pmap); 2945 return (val); 2946} 2947 2948void 2949pmap_activate(struct thread *td) 2950{ 2951 pmap_t pmap, oldpmap; 2952 struct proc *p = td->td_proc; 2953 2954 critical_enter(); 2955 2956 pmap = vmspace_pmap(p->p_vmspace); 2957 oldpmap = PCPU_GET(curpmap); 2958 2959 if (oldpmap) 2960 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); 2961 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2962 pmap_asid_alloc(pmap); 2963 if (td == curthread) { 2964 PCPU_SET(segbase, pmap->pm_segtab); 2965 mips_wr_entryhi(pmap->pm_asid[PCPU_GET(cpuid)].asid); 2966 } 2967 2968 PCPU_SET(curpmap, pmap); 2969 critical_exit(); 2970} 2971 2972void 2973pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2974{ 2975} 2976 2977/* 2978 * Increase the starting virtual address of the given mapping if a 2979 * different alignment might result in more superpage mappings. 2980 */ 2981void 2982pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2983 vm_offset_t *addr, vm_size_t size) 2984{ 2985 vm_offset_t superpage_offset; 2986 2987 if (size < NBSEG) 2988 return; 2989 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 2990 offset += ptoa(object->pg_color); 2991 superpage_offset = offset & SEGMASK; 2992 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || 2993 (*addr & SEGMASK) == superpage_offset) 2994 return; 2995 if ((*addr & SEGMASK) < superpage_offset) 2996 *addr = (*addr & ~SEGMASK) + superpage_offset; 2997 else 2998 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; 2999} 3000 3001/* 3002 * Increase the starting virtual address of the given mapping so 3003 * that it is aligned to not be the second page in a TLB entry. 3004 * This routine assumes that the length is appropriately-sized so 3005 * that the allocation does not share a TLB entry at all if required. 3006 */ 3007void 3008pmap_align_tlb(vm_offset_t *addr) 3009{ 3010 if ((*addr & PAGE_SIZE) == 0) 3011 return; 3012 *addr += PAGE_SIZE; 3013 return; 3014} 3015 3016#ifdef DDB 3017DB_SHOW_COMMAND(ptable, ddb_pid_dump) 3018{ 3019 pmap_t pmap; 3020 struct thread *td = NULL; 3021 struct proc *p; 3022 int i, j, k; 3023 vm_paddr_t pa; 3024 vm_offset_t va; 3025 3026 if (have_addr) { 3027 td = db_lookup_thread(addr, TRUE); 3028 if (td == NULL) { 3029 db_printf("Invalid pid or tid"); 3030 return; 3031 } 3032 p = td->td_proc; 3033 if (p->p_vmspace == NULL) { 3034 db_printf("No vmspace for process"); 3035 return; 3036 } 3037 pmap = vmspace_pmap(p->p_vmspace); 3038 } else 3039 pmap = kernel_pmap; 3040 3041 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n", 3042 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid, 3043 pmap->pm_asid[0].gen); 3044 for (i = 0; i < NPDEPG; i++) { 3045 pd_entry_t *pdpe; 3046 pt_entry_t *pde; 3047 pt_entry_t pte; 3048 3049 pdpe = (pd_entry_t *)pmap->pm_segtab[i]; 3050 if (pdpe == NULL) 3051 continue; 3052 db_printf("[%4d] %p\n", i, pdpe); 3053#ifdef __mips_n64 3054 for (j = 0; j < NPDEPG; j++) { 3055 pde = (pt_entry_t *)pdpe[j]; 3056 if (pde == NULL) 3057 continue; 3058 db_printf("\t[%4d] %p\n", j, pde); 3059#else 3060 { 3061 j = 0; 3062 pde = (pt_entry_t *)pdpe; 3063#endif 3064 for (k = 0; k < NPTEPG; k++) { 3065 pte = pde[k]; 3066 if (pte == 0 || !pte_test(&pte, PTE_V)) 3067 continue; 3068 pa = TLBLO_PTE_TO_PA(pte); 3069 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); 3070 db_printf("\t\t[%04d] va: %p pte: %8x pa:%lx\n", 3071 k, (void *)va, pte, (u_long)pa); 3072 } 3073 } 3074 } 3075} 3076#endif 3077 3078#if defined(DEBUG) 3079 3080static void pads(pmap_t pm); 3081void pmap_pvdump(vm_offset_t pa); 3082 3083/* print address space of pmap*/ 3084static void 3085pads(pmap_t pm) 3086{ 3087 unsigned va, i, j; 3088 pt_entry_t *ptep; 3089 3090 if (pm == kernel_pmap) 3091 return; 3092 for (i = 0; i < NPTEPG; i++) 3093 if (pm->pm_segtab[i]) 3094 for (j = 0; j < NPTEPG; j++) { 3095 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3096 if (pm == kernel_pmap && va < KERNBASE) 3097 continue; 3098 if (pm != kernel_pmap && 3099 va >= VM_MAXUSER_ADDRESS) 3100 continue; 3101 ptep = pmap_pte(pm, va); 3102 if (pmap_pte_v(ptep)) 3103 printf("%x:%x ", va, *(int *)ptep); 3104 } 3105 3106} 3107 3108void 3109pmap_pvdump(vm_offset_t pa) 3110{ 3111 register pv_entry_t pv; 3112 vm_page_t m; 3113 3114 printf("pa %x", pa); 3115 m = PHYS_TO_VM_PAGE(pa); 3116 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3117 pv = TAILQ_NEXT(pv, pv_list)) { 3118 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3119 pads(pv->pv_pmap); 3120 } 3121 printf(" "); 3122} 3123 3124/* N/C */ 3125#endif 3126 3127 3128/* 3129 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3130 * It takes almost as much or more time to search the TLB for a 3131 * specific ASID and flush those entries as it does to flush the entire TLB. 3132 * Therefore, when we allocate a new ASID, we just take the next number. When 3133 * we run out of numbers, we flush the TLB, increment the generation count 3134 * and start over. ASID zero is reserved for kernel use. 3135 */ 3136static void 3137pmap_asid_alloc(pmap) 3138 pmap_t pmap; 3139{ 3140 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3141 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3142 else { 3143 if (PCPU_GET(next_asid) == pmap_max_asid) { 3144 tlb_invalidate_all_user(NULL); 3145 PCPU_SET(asid_generation, 3146 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3147 if (PCPU_GET(asid_generation) == 0) { 3148 PCPU_SET(asid_generation, 1); 3149 } 3150 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3151 } 3152 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3153 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3154 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3155 } 3156} 3157 3158int 3159page_is_managed(vm_offset_t pa) 3160{ 3161 vm_offset_t pgnum = mips_btop(pa); 3162 3163 if (pgnum >= first_page) { 3164 vm_page_t m; 3165 3166 m = PHYS_TO_VM_PAGE(pa); 3167 if (m == NULL) 3168 return (0); 3169 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 3170 return (1); 3171 } 3172 return (0); 3173} 3174 3175static int 3176init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) 3177{ 3178 int rw; 3179 3180 if (!(prot & VM_PROT_WRITE)) 3181 rw = PTE_V | PTE_RO | PTE_C_CACHE; 3182 else if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 3183 if ((m->md.pv_flags & PV_TABLE_MOD) != 0) 3184 rw = PTE_V | PTE_D | PTE_C_CACHE; 3185 else 3186 rw = PTE_V | PTE_C_CACHE; 3187 vm_page_flag_set(m, PG_WRITEABLE); 3188 } else 3189 /* Needn't emulate a modified bit for unmanaged pages. */ 3190 rw = PTE_V | PTE_D | PTE_C_CACHE; 3191 return (rw); 3192} 3193 3194/* 3195 * pmap_emulate_modified : do dirty bit emulation 3196 * 3197 * On SMP, update just the local TLB, other CPUs will update their 3198 * TLBs from PTE lazily, if they get the exception. 3199 * Returns 0 in case of sucess, 1 if the page is read only and we 3200 * need to fault. 3201 */ 3202int 3203pmap_emulate_modified(pmap_t pmap, vm_offset_t va) 3204{ 3205 vm_page_t m; 3206 pt_entry_t *pte; 3207 vm_offset_t pa; 3208 3209 PMAP_LOCK(pmap); 3210 pte = pmap_pte(pmap, va); 3211 if (pte == NULL) 3212 panic("pmap_emulate_modified: can't find PTE"); 3213#ifdef SMP 3214 /* It is possible that some other CPU changed m-bit */ 3215 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { 3216 pmap_update_page_local(pmap, va, *pte); 3217 PMAP_UNLOCK(pmap); 3218 return (0); 3219 } 3220#else 3221 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) 3222 panic("pmap_emulate_modified: invalid pte"); 3223#endif 3224 if (pte_test(pte, PTE_RO)) { 3225 /* write to read only page in the kernel */ 3226 PMAP_UNLOCK(pmap); 3227 return (1); 3228 } 3229 pte_set(pte, PTE_D); 3230 pmap_update_page_local(pmap, va, *pte); 3231 pa = TLBLO_PTE_TO_PA(*pte); 3232 if (!page_is_managed(pa)) 3233 panic("pmap_emulate_modified: unmanaged page"); 3234 m = PHYS_TO_VM_PAGE(pa); 3235 m->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); 3236 PMAP_UNLOCK(pmap); 3237 return (0); 3238} 3239 3240/* 3241 * Routine: pmap_kextract 3242 * Function: 3243 * Extract the physical page address associated 3244 * virtual address. 3245 */ 3246 /* PMAP_INLINE */ vm_offset_t 3247pmap_kextract(vm_offset_t va) 3248{ 3249 int mapped; 3250 3251 /* 3252 * First, the direct-mapped regions. 3253 */ 3254#if defined(__mips_n64) 3255 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) 3256 return (MIPS_XKPHYS_TO_PHYS(va)); 3257#endif 3258 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) 3259 return (MIPS_KSEG0_TO_PHYS(va)); 3260 3261 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) 3262 return (MIPS_KSEG1_TO_PHYS(va)); 3263 3264 /* 3265 * User virtual addresses. 3266 */ 3267 if (va < VM_MAXUSER_ADDRESS) { 3268 pt_entry_t *ptep; 3269 3270 if (curproc && curproc->p_vmspace) { 3271 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3272 if (ptep) { 3273 return (TLBLO_PTE_TO_PA(*ptep) | 3274 (va & PAGE_MASK)); 3275 } 3276 return (0); 3277 } 3278 } 3279 3280 /* 3281 * Should be kernel virtual here, otherwise fail 3282 */ 3283 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); 3284#if defined(__mips_n64) 3285 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); 3286#endif 3287 /* 3288 * Kernel virtual. 3289 */ 3290 3291 if (mapped) { 3292 pt_entry_t *ptep; 3293 3294 /* Is the kernel pmap initialized? */ 3295 if (kernel_pmap->pm_active) { 3296 /* It's inside the virtual address range */ 3297 ptep = pmap_pte(kernel_pmap, va); 3298 if (ptep) { 3299 return (TLBLO_PTE_TO_PA(*ptep) | 3300 (va & PAGE_MASK)); 3301 } 3302 } 3303 return (0); 3304 } 3305 3306 panic("%s for unknown address space %p.", __func__, (void *)va); 3307} 3308 3309 3310void 3311pmap_flush_pvcache(vm_page_t m) 3312{ 3313 pv_entry_t pv; 3314 3315 if (m != NULL) { 3316 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3317 pv = TAILQ_NEXT(pv, pv_list)) { 3318 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3319 } 3320 } 3321} 3322