pmap.c revision 192628
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * In addition to hardware address maps, this 46 * module is called upon to provide software-use-only 47 * maps which may or may not be stored in the same 48 * form as hardware maps. These pseudo-maps are 49 * used to store intermediate results from copy 50 * operations to and from address spaces. 51 * 52 * Since the information managed by this module is 53 * also stored by the logical address mapping module, 54 * this module may throw away valid virtual-to-physical 55 * mappings at almost any time. However, invalidations 56 * of virtual-to-physical mappings must be done as 57 * requested. 58 * 59 * In order to cope with hardware architectures which 60 * make virtual-to-physical map invalidates expensive, 61 * this module may delay invalidate or reduced protection 62 * operations until such time as they are actually 63 * necessary. This module is given full information as 64 * to which processors are currently using which maps, 65 * and to when physical maps must be made correct. 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 192628 2009-05-23 07:58:56Z alc $"); 70 71#include "opt_ddb.h" 72#include "opt_msgbuf.h" 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/proc.h> 76#include <sys/msgbuf.h> 77#include <sys/vmmeter.h> 78#include <sys/mman.h> 79 80#include <vm/vm.h> 81#include <vm/vm_param.h> 82#include <sys/lock.h> 83#include <sys/mutex.h> 84#include <vm/vm_kern.h> 85#include <vm/vm_page.h> 86#include <vm/vm_map.h> 87#include <vm/vm_object.h> 88#include <vm/vm_extern.h> 89#include <vm/vm_pageout.h> 90#include <vm/vm_pager.h> 91#include <vm/uma.h> 92#include <sys/pcpu.h> 93#include <sys/sched.h> 94#ifdef SMP 95#include <sys/smp.h> 96#endif 97 98#include <machine/cache.h> 99#include <machine/pltfm.h> 100#include <machine/md_var.h> 101 102#if defined(DIAGNOSTIC) 103#define PMAP_DIAGNOSTIC 104#endif 105 106#undef PMAP_DEBUG 107 108#ifndef PMAP_SHPGPERPROC 109#define PMAP_SHPGPERPROC 200 110#endif 111 112#if !defined(PMAP_DIAGNOSTIC) 113#define PMAP_INLINE __inline 114#else 115#define PMAP_INLINE 116#endif 117 118/* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121#define pmap_pde(m, v) (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT])) 122#define segtab_pde(m, v) (m[(vm_offset_t)(v) >> SEGSHIFT]) 123 124#define pmap_pte_w(pte) ((*(int *)pte & PTE_W) != 0) 125#define pmap_pde_v(pte) ((*(int *)pte) != 0) 126#define pmap_pte_m(pte) ((*(int *)pte & PTE_M) != 0) 127#define pmap_pte_v(pte) ((*(int *)pte & PTE_V) != 0) 128 129#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W)) 130#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 131 132#define MIPS_SEGSIZE (1L << SEGSHIFT) 133#define mips_segtrunc(va) ((va) & ~(MIPS_SEGSIZE-1)) 134#define pmap_TLB_invalidate_all() MIPS_TBIAP() 135#define pmap_va_asid(pmap, va) ((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT)) 136#define is_kernel_pmap(x) ((x) == kernel_pmap) 137 138struct pmap kernel_pmap_store; 139pd_entry_t *kernel_segmap; 140 141vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 142vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 143 144static int nkpt; 145unsigned pmap_max_asid; /* max ASID supported by the system */ 146 147 148#define PMAP_ASID_RESERVED 0 149 150 151vm_offset_t kernel_vm_end; 152 153static void pmap_asid_alloc(pmap_t pmap); 154 155/* 156 * Data for the pv entry allocation mechanism 157 */ 158static uma_zone_t pvzone; 159static struct vm_object pvzone_obj; 160static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 161 162struct fpage fpages_shared[FPAGES_SHARED]; 163 164struct sysmaps sysmaps_pcpu[MAXCPU]; 165 166static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 167static pv_entry_t get_pv_entry(pmap_t locked_pmap); 168static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 169 170static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 171 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 172static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); 173static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 174static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 175static boolean_t pmap_testbit(vm_page_t m, int bit); 176static void 177pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, 178 vm_page_t m, boolean_t wired); 179static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 180 vm_offset_t va, vm_page_t m); 181 182static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 183 184static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 185static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 186static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); 187static void pmap_TLB_invalidate_kernel(vm_offset_t); 188static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t); 189static void pmap_init_fpage(void); 190 191#ifdef SMP 192static void pmap_invalidate_page_action(void *arg); 193static void pmap_invalidate_all_action(void *arg); 194static void pmap_update_page_action(void *arg); 195 196#endif 197 198struct local_sysmaps { 199 struct mtx lock; 200 pt_entry_t CMAP1; 201 pt_entry_t CMAP2; 202 caddr_t CADDR1; 203 caddr_t CADDR2; 204 uint16_t valid1, valid2; 205}; 206 207/* This structure is for large memory 208 * above 512Meg. We can't (in 32 bit mode) 209 * just use the direct mapped MIPS_CACHED_TO_PHYS() 210 * macros since we can't see the memory and must 211 * map it in when we need to access it. In 64 212 * bit mode this goes away. 213 */ 214static struct local_sysmaps sysmap_lmem[MAXCPU]; 215caddr_t virtual_sys_start = (caddr_t)0; 216 217pd_entry_t 218pmap_segmap(pmap_t pmap, vm_offset_t va) 219{ 220 if (pmap->pm_segtab) 221 return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]); 222 else 223 return ((pd_entry_t)0); 224} 225 226/* 227 * Routine: pmap_pte 228 * Function: 229 * Extract the page table entry associated 230 * with the given map/virtual_address pair. 231 */ 232pt_entry_t * 233pmap_pte(pmap_t pmap, vm_offset_t va) 234{ 235 pt_entry_t *pdeaddr; 236 237 if (pmap) { 238 pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va); 239 if (pdeaddr) { 240 return pdeaddr + vad_to_pte_offset(va); 241 } 242 } 243 return ((pt_entry_t *)0); 244} 245 246 247vm_offset_t 248pmap_steal_memory(vm_size_t size) 249{ 250 vm_size_t bank_size; 251 vm_offset_t pa, va; 252 253 size = round_page(size); 254 255 bank_size = phys_avail[1] - phys_avail[0]; 256 while (size > bank_size) { 257 int i; 258 259 for (i = 0; phys_avail[i + 2]; i += 2) { 260 phys_avail[i] = phys_avail[i + 2]; 261 phys_avail[i + 1] = phys_avail[i + 3]; 262 } 263 phys_avail[i] = 0; 264 phys_avail[i + 1] = 0; 265 if (!phys_avail[0]) 266 panic("pmap_steal_memory: out of memory"); 267 bank_size = phys_avail[1] - phys_avail[0]; 268 } 269 270 pa = phys_avail[0]; 271 phys_avail[0] += size; 272 if (pa >= MIPS_KSEG0_LARGEST_PHYS) { 273 panic("Out of memory below 512Meg?"); 274 } 275 va = MIPS_PHYS_TO_CACHED(pa); 276 bzero((caddr_t)va, size); 277 return va; 278} 279 280/* 281 * Bootstrap the system enough to run with virtual memory. This 282 * assumes that the phys_avail array has been initialized. 283 */ 284void 285pmap_bootstrap(void) 286{ 287 pt_entry_t *pgtab; 288 pt_entry_t *pte; 289 int i, j; 290 int memory_larger_than_512meg = 0; 291 292 /* Sort. */ 293again: 294 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 295 if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS) { 296 memory_larger_than_512meg++; 297 } 298 if (i < 2) 299 continue; 300 if (phys_avail[i - 2] > phys_avail[i]) { 301 vm_paddr_t ptemp[2]; 302 303 304 ptemp[0] = phys_avail[i + 0]; 305 ptemp[1] = phys_avail[i + 1]; 306 307 phys_avail[i + 0] = phys_avail[i - 2]; 308 phys_avail[i + 1] = phys_avail[i - 1]; 309 310 phys_avail[i - 2] = ptemp[0]; 311 phys_avail[i - 1] = ptemp[1]; 312 goto again; 313 } 314 } 315 316 if (bootverbose) { 317 printf("Physical memory chunk(s):\n"); 318 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 319 vm_paddr_t size; 320 321 size = phys_avail[i + 1] - phys_avail[i]; 322 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 323 (uintmax_t) phys_avail[i], 324 (uintmax_t) phys_avail[i + 1] - 1, 325 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 326 } 327 } 328 /* 329 * Steal the message buffer from the beginning of memory. 330 */ 331 msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); 332 msgbufinit(msgbufp, MSGBUF_SIZE); 333 334 /* 335 * Steal thread0 kstack. 336 */ 337 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 338 339 340 virtual_avail = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; 341 virtual_end = VM_MAX_KERNEL_ADDRESS; 342 343 /* 344 * Steal some virtual space that will not be in kernel_segmap. This 345 * va memory space will be used to map in kernel pages that are 346 * outside the 512Meg region. Note that we only do this steal when 347 * we do have memory in this region, that way for systems with 348 * smaller memory we don't "steal" any va ranges :-) 349 */ 350 if (memory_larger_than_512meg) { 351 for (i = 0; i < MAXCPU; i++) { 352 sysmap_lmem[i].CMAP1 = PTE_G; 353 sysmap_lmem[i].CMAP2 = PTE_G; 354 sysmap_lmem[i].CADDR1 = (caddr_t)virtual_avail; 355 virtual_avail += PAGE_SIZE; 356 sysmap_lmem[i].CADDR2 = (caddr_t)virtual_avail; 357 virtual_avail += PAGE_SIZE; 358 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 359 PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]); 360 } 361 } 362 virtual_sys_start = (caddr_t)virtual_avail; 363 /* 364 * Allocate segment table for the kernel 365 */ 366 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 367 368 /* 369 * Allocate second level page tables for the kernel 370 */ 371 nkpt = NKPT; 372 if (memory_larger_than_512meg) { 373 /* 374 * If we have a large memory system we CANNOT afford to hit 375 * pmap_growkernel() and allocate memory. Since we MAY end 376 * up with a page that is NOT mappable. For that reason we 377 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h) 378 * this gives us 480meg of kernel virtual addresses at the 379 * cost of 120 pages (each page gets us 4 Meg). Since the 380 * kernel starts at virtual_avail, we can use this to 381 * calculate how many entris are left from there to the end 382 * of the segmap, we want to allocate all of it, which would 383 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results 384 * in about 256 entries or so instead of the 120. 385 */ 386 nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT); 387 } 388 pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt); 389 390 /* 391 * The R[4-7]?00 stores only one copy of the Global bit in the 392 * translation lookaside buffer for each 2 page entry. Thus invalid 393 * entrys must have the Global bit set so when Entry LO and Entry HI 394 * G bits are anded together they will produce a global bit to store 395 * in the tlb. 396 */ 397 for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++) 398 *pte = PTE_G; 399 400 printf("Va=0x%x Ve=%x\n", virtual_avail, virtual_end); 401 /* 402 * The segment table contains the KVA of the pages in the second 403 * level page table. 404 */ 405 printf("init kernel_segmap va >> = %d nkpt:%d\n", 406 (virtual_avail >> SEGSHIFT), 407 nkpt); 408 for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++) 409 kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG)); 410 411 for (i = 0; phys_avail[i + 2]; i += 2) 412 continue; 413 printf("avail_start:0x%x avail_end:0x%x\n", 414 phys_avail[0], phys_avail[i + 1]); 415 416 /* 417 * The kernel's pmap is statically allocated so we don't have to use 418 * pmap_create, which is unlikely to work correctly at this part of 419 * the boot sequence (XXX and which no longer exists). 420 */ 421 PMAP_LOCK_INIT(kernel_pmap); 422 kernel_pmap->pm_segtab = kernel_segmap; 423 kernel_pmap->pm_active = ~0; 424 TAILQ_INIT(&kernel_pmap->pm_pvlist); 425 kernel_pmap->pm_asid[PCPU_GET(cpuid)].asid = PMAP_ASID_RESERVED; 426 kernel_pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 427 pmap_max_asid = VMNUM_PIDS; 428 MachSetPID(0); 429} 430 431/* 432 * Initialize a vm_page's machine-dependent fields. 433 */ 434void 435pmap_page_init(vm_page_t m) 436{ 437 438 TAILQ_INIT(&m->md.pv_list); 439 m->md.pv_list_count = 0; 440 m->md.pv_flags = 0; 441} 442 443/* 444 * Initialize the pmap module. 445 * Called by vm_init, to initialize any structures that the pmap 446 * system needs to map virtual memory. 447 * pmap_init has been enhanced to support in a fairly consistant 448 * way, discontiguous physical memory. 449 */ 450void 451pmap_init(void) 452{ 453 454 if (need_wired_tlb_page_pool) 455 pmap_init_fpage(); 456 /* 457 * Initialize the address space (zone) for the pv entries. Set a 458 * high water mark so that the system can recover from excessive 459 * numbers of pv entries. 460 */ 461 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 462 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 463 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; 464 pv_entry_high_water = 9 * (pv_entry_max / 10); 465 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 466} 467 468/*************************************************** 469 * Low level helper routines..... 470 ***************************************************/ 471 472#if defined(PMAP_DIAGNOSTIC) 473 474/* 475 * This code checks for non-writeable/modified pages. 476 * This should be an invalid condition. 477 */ 478static int 479pmap_nw_modified(pt_entry_t pte) 480{ 481 if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO)) 482 return (1); 483 else 484 return (0); 485} 486 487#endif 488 489static void 490pmap_invalidate_all(pmap_t pmap) 491{ 492#ifdef SMP 493 smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap); 494} 495 496static void 497pmap_invalidate_all_action(void *arg) 498{ 499 pmap_t pmap = (pmap_t)arg; 500 501#endif 502 503 if (pmap->pm_active & PCPU_GET(cpumask)) { 504 pmap_TLB_invalidate_all(); 505 } else 506 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 507} 508 509struct pmap_invalidate_page_arg { 510 pmap_t pmap; 511 vm_offset_t va; 512}; 513 514static __inline void 515pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 516{ 517#ifdef SMP 518 struct pmap_invalidate_page_arg arg; 519 520 arg.pmap = pmap; 521 arg.va = va; 522 523 smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); 524} 525 526static void 527pmap_invalidate_page_action(void *arg) 528{ 529 pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; 530 vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; 531 532#endif 533 534 if (is_kernel_pmap(pmap)) { 535 pmap_TLB_invalidate_kernel(va); 536 return; 537 } 538 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 539 return; 540 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 541 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 542 return; 543 } 544 va = pmap_va_asid(pmap, (va & ~PGOFSET)); 545 mips_TBIS(va); 546} 547 548static void 549pmap_TLB_invalidate_kernel(vm_offset_t va) 550{ 551 u_int32_t pid; 552 553 MachTLBGetPID(pid); 554 va = va | (pid << VMTLB_PID_SHIFT); 555 mips_TBIS(va); 556} 557 558struct pmap_update_page_arg { 559 pmap_t pmap; 560 vm_offset_t va; 561 pt_entry_t pte; 562}; 563 564void 565pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 566{ 567#ifdef SMP 568 struct pmap_update_page_arg arg; 569 570 arg.pmap = pmap; 571 arg.va = va; 572 arg.pte = pte; 573 574 smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg); 575} 576 577static void 578pmap_update_page_action(void *arg) 579{ 580 pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap; 581 vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va; 582 pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte; 583 584#endif 585 if (is_kernel_pmap(pmap)) { 586 pmap_TLB_update_kernel(va, pte); 587 return; 588 } 589 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 590 return; 591 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 592 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 593 return; 594 } 595 va = pmap_va_asid(pmap, va); 596 MachTLBUpdate(va, pte); 597} 598 599static void 600pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte) 601{ 602 u_int32_t pid; 603 604 MachTLBGetPID(pid); 605 va = va | (pid << VMTLB_PID_SHIFT); 606 607 MachTLBUpdate(va, pte); 608} 609 610/* 611 * Routine: pmap_extract 612 * Function: 613 * Extract the physical page address associated 614 * with the given map/virtual_address pair. 615 */ 616vm_paddr_t 617pmap_extract(pmap_t pmap, vm_offset_t va) 618{ 619 pt_entry_t *pte; 620 vm_offset_t retval = 0; 621 622 PMAP_LOCK(pmap); 623 pte = pmap_pte(pmap, va); 624 if (pte) { 625 retval = mips_tlbpfn_to_paddr(*pte) | (va & PAGE_MASK); 626 } 627 PMAP_UNLOCK(pmap); 628 return retval; 629} 630 631/* 632 * Routine: pmap_extract_and_hold 633 * Function: 634 * Atomically extract and hold the physical page 635 * with the given pmap and virtual address pair 636 * if that mapping permits the given protection. 637 */ 638vm_page_t 639pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 640{ 641 pt_entry_t pte; 642 vm_page_t m; 643 644 m = NULL; 645 vm_page_lock_queues(); 646 PMAP_LOCK(pmap); 647 648 pte = *pmap_pte(pmap, va); 649 if (pte != 0 && pmap_pte_v(&pte) && 650 ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) { 651 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte)); 652 vm_page_hold(m); 653 } 654 vm_page_unlock_queues(); 655 PMAP_UNLOCK(pmap); 656 return (m); 657} 658 659/*************************************************** 660 * Low level mapping routines..... 661 ***************************************************/ 662 663/* 664 * add a wired page to the kva 665 */ 666 /* PMAP_INLINE */ void 667pmap_kenter(vm_offset_t va, vm_paddr_t pa) 668{ 669 register pt_entry_t *pte; 670 pt_entry_t npte, opte; 671 672#ifdef PMAP_DEBUG 673 printf("pmap_kenter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 674#endif 675 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W; 676 677 if (is_cacheable_mem(pa)) 678 npte |= PTE_CACHE; 679 else 680 npte |= PTE_UNCACHED; 681 682 pte = pmap_pte(kernel_pmap, va); 683 opte = *pte; 684 *pte = npte; 685 686 pmap_update_page(kernel_pmap, va, npte); 687} 688 689/* 690 * remove a page from the kernel pagetables 691 */ 692 /* PMAP_INLINE */ void 693pmap_kremove(vm_offset_t va) 694{ 695 register pt_entry_t *pte; 696 697 pte = pmap_pte(kernel_pmap, va); 698 *pte = PTE_G; 699 pmap_invalidate_page(kernel_pmap, va); 700} 701 702/* 703 * Used to map a range of physical addresses into kernel 704 * virtual address space. 705 * 706 * The value passed in '*virt' is a suggested virtual address for 707 * the mapping. Architectures which can support a direct-mapped 708 * physical to virtual region can return the appropriate address 709 * within that region, leaving '*virt' unchanged. Other 710 * architectures should map the pages starting at '*virt' and 711 * update '*virt' with the first usable address after the mapped 712 * region. 713 */ 714vm_offset_t 715pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 716{ 717 vm_offset_t va, sva; 718 719 va = sva = *virt; 720 while (start < end) { 721 pmap_kenter(va, start); 722 va += PAGE_SIZE; 723 start += PAGE_SIZE; 724 } 725 *virt = va; 726 return (sva); 727} 728 729/* 730 * Add a list of wired pages to the kva 731 * this routine is only used for temporary 732 * kernel mappings that do not need to have 733 * page modification or references recorded. 734 * Note that old mappings are simply written 735 * over. The page *must* be wired. 736 */ 737void 738pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 739{ 740 int i; 741 742 for (i = 0; i < count; i++) { 743 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 744 va += PAGE_SIZE; 745 } 746} 747 748/* 749 * this routine jerks page mappings from the 750 * kernel -- it is meant only for temporary mappings. 751 */ 752void 753pmap_qremove(vm_offset_t va, int count) 754{ 755 while (count-- > 0) { 756 pmap_kremove(va); 757 va += PAGE_SIZE; 758 } 759} 760 761/*************************************************** 762 * Page table page management routines..... 763 ***************************************************/ 764 765/* 766 * floating pages (FPAGES) management routines 767 * 768 * FPAGES are the reserved virtual memory areas which can be 769 * mapped to any physical memory. This gets used typically 770 * in the following functions: 771 * 772 * pmap_zero_page 773 * pmap_copy_page 774 */ 775 776/* 777 * Create the floating pages, aka FPAGES! 778 */ 779static void 780pmap_init_fpage() 781{ 782 vm_offset_t kva; 783 int i, j; 784 struct sysmaps *sysmaps; 785 786 /* 787 * We allocate a total of (FPAGES*MAXCPU + FPAGES_SHARED + 1) pages 788 * at first. FPAGES & FPAGES_SHARED should be EVEN Then we'll adjust 789 * 'kva' to be even-page aligned so that the fpage area can be wired 790 * in the TLB with a single TLB entry. 791 */ 792 kva = kmem_alloc_nofault(kernel_map, 793 (FPAGES * MAXCPU + 1 + FPAGES_SHARED) * PAGE_SIZE); 794 if ((void *)kva == NULL) 795 panic("pmap_init_fpage: fpage allocation failed"); 796 797 /* 798 * Make up start at an even page number so we can wire down the 799 * fpage area in the tlb with a single tlb entry. 800 */ 801 if ((((vm_offset_t)kva) >> PGSHIFT) & 1) { 802 /* 803 * 'kva' is not even-page aligned. Adjust it and free the 804 * first page which is unused. 805 */ 806 kmem_free(kernel_map, (vm_offset_t)kva, NBPG); 807 kva = ((vm_offset_t)kva) + NBPG; 808 } else { 809 /* 810 * 'kva' is even page aligned. We don't need the last page, 811 * free it. 812 */ 813 kmem_free(kernel_map, ((vm_offset_t)kva) + FSPACE, NBPG); 814 } 815 816 for (i = 0; i < MAXCPU; i++) { 817 sysmaps = &sysmaps_pcpu[i]; 818 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 819 820 /* Assign FPAGES pages to the CPU */ 821 for (j = 0; j < FPAGES; j++) 822 sysmaps->fp[j].kva = kva + (j) * PAGE_SIZE; 823 kva = ((vm_offset_t)kva) + (FPAGES * PAGE_SIZE); 824 } 825 826 /* 827 * An additional 2 pages are needed, one for pmap_zero_page_idle() 828 * and one for coredump. These pages are shared by all cpu's 829 */ 830 fpages_shared[PMAP_FPAGE3].kva = kva; 831 fpages_shared[PMAP_FPAGE_KENTER_TEMP].kva = kva + PAGE_SIZE; 832} 833 834/* 835 * Map the page to the fpage virtual address as specified thru' fpage id 836 */ 837vm_offset_t 838pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, boolean_t check_unmaped) 839{ 840 vm_offset_t kva; 841 register pt_entry_t *pte; 842 pt_entry_t npte; 843 844 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 845 /* 846 * Check if the fpage is free 847 */ 848 if (fp->state) { 849 if (check_unmaped == TRUE) 850 pmap_unmap_fpage(pa, fp); 851 else 852 panic("pmap_map_fpage: fpage is busy"); 853 } 854 fp->state = TRUE; 855 kva = fp->kva; 856 857 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 858 pte = pmap_pte(kernel_pmap, kva); 859 *pte = npte; 860 861 pmap_TLB_update_kernel(kva, npte); 862 863 return (kva); 864} 865 866/* 867 * Unmap the page from the fpage virtual address as specified thru' fpage id 868 */ 869void 870pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp) 871{ 872 vm_offset_t kva; 873 register pt_entry_t *pte; 874 875 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 876 /* 877 * Check if the fpage is busy 878 */ 879 if (!(fp->state)) { 880 panic("pmap_unmap_fpage: fpage is free"); 881 } 882 kva = fp->kva; 883 884 pte = pmap_pte(kernel_pmap, kva); 885 *pte = PTE_G; 886 pmap_TLB_invalidate_kernel(kva); 887 888 fp->state = FALSE; 889 890 /* 891 * Should there be any flush operation at the end? 892 */ 893} 894 895/* Revision 1.507 896 * 897 * Simplify the reference counting of page table pages. Specifically, use 898 * the page table page's wired count rather than its hold count to contain 899 * the reference count. 900 */ 901 902/* 903 * This routine unholds page table pages, and if the hold count 904 * drops to zero, then it decrements the wire count. 905 */ 906static int 907_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 908{ 909 910 /* 911 * unmap the page table page 912 */ 913 pmap->pm_segtab[m->pindex] = 0; 914 --pmap->pm_stats.resident_count; 915 916 if (pmap->pm_ptphint == m) 917 pmap->pm_ptphint = NULL; 918 919 /* 920 * If the page is finally unwired, simply free it. 921 */ 922 vm_page_free_zero(m); 923 atomic_subtract_int(&cnt.v_wire_count, 1); 924 return (1); 925} 926 927static PMAP_INLINE int 928pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 929{ 930 --m->wire_count; 931 if (m->wire_count == 0) 932 return (_pmap_unwire_pte_hold(pmap, m)); 933 else 934 return (0); 935} 936 937/* 938 * After removing a page table entry, this routine is used to 939 * conditionally free the page, and manage the hold/wire counts. 940 */ 941static int 942pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 943{ 944 unsigned ptepindex; 945 pd_entry_t pteva; 946 947 if (va >= VM_MAXUSER_ADDRESS) 948 return (0); 949 950 if (mpte == NULL) { 951 ptepindex = (va >> SEGSHIFT); 952 if (pmap->pm_ptphint && 953 (pmap->pm_ptphint->pindex == ptepindex)) { 954 mpte = pmap->pm_ptphint; 955 } else { 956 pteva = *pmap_pde(pmap, va); 957 mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 958 pmap->pm_ptphint = mpte; 959 } 960 } 961 return pmap_unwire_pte_hold(pmap, mpte); 962} 963 964void 965pmap_pinit0(pmap_t pmap) 966{ 967 int i; 968 969 PMAP_LOCK_INIT(pmap); 970 pmap->pm_segtab = kernel_segmap; 971 pmap->pm_active = 0; 972 pmap->pm_ptphint = NULL; 973 for (i = 0; i < MAXCPU; i++) { 974 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 975 pmap->pm_asid[i].gen = 0; 976 } 977 PCPU_SET(curpmap, pmap); 978 TAILQ_INIT(&pmap->pm_pvlist); 979 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 980} 981 982/* 983 * Initialize a preallocated and zeroed pmap structure, 984 * such as one in a vmspace structure. 985 */ 986int 987pmap_pinit(pmap_t pmap) 988{ 989 vm_page_t ptdpg; 990 int i; 991 int req; 992 993 PMAP_LOCK_INIT(pmap); 994 995 req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | 996 VM_ALLOC_ZERO; 997 998#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 999 if (need_wired_tlb_page_pool) 1000 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1001#endif 1002 /* 1003 * allocate the page directory page 1004 */ 1005 while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL) 1006 VM_WAIT; 1007 1008 ptdpg->valid = VM_PAGE_BITS_ALL; 1009 1010 pmap->pm_segtab = (pd_entry_t *) 1011 MIPS_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(ptdpg)); 1012 if ((ptdpg->flags & PG_ZERO) == 0) 1013 bzero(pmap->pm_segtab, PAGE_SIZE); 1014 1015 pmap->pm_active = 0; 1016 pmap->pm_ptphint = NULL; 1017 for (i = 0; i < MAXCPU; i++) { 1018 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1019 pmap->pm_asid[i].gen = 0; 1020 } 1021 TAILQ_INIT(&pmap->pm_pvlist); 1022 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1023 1024 return (1); 1025} 1026 1027/* 1028 * this routine is called if the page table page is not 1029 * mapped correctly. 1030 */ 1031static vm_page_t 1032_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1033{ 1034 vm_offset_t pteva, ptepa; 1035 vm_page_t m; 1036 int req; 1037 1038 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1039 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1040 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1041 1042 req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ; 1043#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 1044 if (need_wired_tlb_page_pool) 1045 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1046#endif 1047 /* 1048 * Find or fabricate a new pagetable page 1049 */ 1050 if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) { 1051 if (flags & M_WAITOK) { 1052 PMAP_UNLOCK(pmap); 1053 vm_page_unlock_queues(); 1054 VM_WAIT; 1055 vm_page_lock_queues(); 1056 PMAP_LOCK(pmap); 1057 } 1058 /* 1059 * Indicate the need to retry. While waiting, the page 1060 * table page may have been allocated. 1061 */ 1062 return (NULL); 1063 } 1064 if ((m->flags & PG_ZERO) == 0) 1065 pmap_zero_page(m); 1066 1067 KASSERT(m->queue == PQ_NONE, 1068 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1069 1070 /* 1071 * Map the pagetable page into the process address space, if it 1072 * isn't already there. 1073 */ 1074 1075 pmap->pm_stats.resident_count++; 1076 1077 ptepa = VM_PAGE_TO_PHYS(m); 1078 pteva = MIPS_PHYS_TO_CACHED(ptepa); 1079 pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva; 1080 1081 /* 1082 * Set the page table hint 1083 */ 1084 pmap->pm_ptphint = m; 1085 1086 /* 1087 * Kernel page tables are allocated in pmap_bootstrap() or 1088 * pmap_growkernel(). 1089 */ 1090 if (is_kernel_pmap(pmap)) 1091 panic("_pmap_allocpte() called for kernel pmap\n"); 1092 1093 m->valid = VM_PAGE_BITS_ALL; 1094 vm_page_flag_clear(m, PG_ZERO); 1095 1096 return (m); 1097} 1098 1099static vm_page_t 1100pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1101{ 1102 unsigned ptepindex; 1103 vm_offset_t pteva; 1104 vm_page_t m; 1105 1106 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1107 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1108 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1109 1110 /* 1111 * Calculate pagetable page index 1112 */ 1113 ptepindex = va >> SEGSHIFT; 1114retry: 1115 /* 1116 * Get the page directory entry 1117 */ 1118 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1119 1120 /* 1121 * If the page table page is mapped, we just increment the hold 1122 * count, and activate it. 1123 */ 1124 if (pteva) { 1125 /* 1126 * In order to get the page table page, try the hint first. 1127 */ 1128 if (pmap->pm_ptphint && 1129 (pmap->pm_ptphint->pindex == ptepindex)) { 1130 m = pmap->pm_ptphint; 1131 } else { 1132 m = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 1133 pmap->pm_ptphint = m; 1134 } 1135 m->wire_count++; 1136 } else { 1137 /* 1138 * Here if the pte page isn't mapped, or if it has been 1139 * deallocated. 1140 */ 1141 m = _pmap_allocpte(pmap, ptepindex, flags); 1142 if (m == NULL && (flags & M_WAITOK)) 1143 goto retry; 1144 } 1145 return m; 1146} 1147 1148 1149/*************************************************** 1150* Pmap allocation/deallocation routines. 1151 ***************************************************/ 1152/* 1153 * Revision 1.397 1154 * - Merged pmap_release and pmap_release_free_page. When pmap_release is 1155 * called only the page directory page(s) can be left in the pmap pte 1156 * object, since all page table pages will have been freed by 1157 * pmap_remove_pages and pmap_remove. In addition, there can only be one 1158 * reference to the pmap and the page directory is wired, so the page(s) 1159 * can never be busy. So all there is to do is clear the magic mappings 1160 * from the page directory and free the page(s). 1161 */ 1162 1163 1164/* 1165 * Release any resources held by the given physical map. 1166 * Called when a pmap initialized by pmap_pinit is being released. 1167 * Should only be called if the map contains no valid mappings. 1168 */ 1169void 1170pmap_release(pmap_t pmap) 1171{ 1172 vm_page_t ptdpg; 1173 1174 KASSERT(pmap->pm_stats.resident_count == 0, 1175 ("pmap_release: pmap resident count %ld != 0", 1176 pmap->pm_stats.resident_count)); 1177 1178 ptdpg = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pmap->pm_segtab)); 1179 ptdpg->wire_count--; 1180 atomic_subtract_int(&cnt.v_wire_count, 1); 1181 vm_page_free_zero(ptdpg); 1182} 1183 1184/* 1185 * grow the number of kernel page table entries, if needed 1186 */ 1187void 1188pmap_growkernel(vm_offset_t addr) 1189{ 1190 vm_offset_t ptppaddr; 1191 vm_page_t nkpg; 1192 pt_entry_t *pte; 1193 int i, req; 1194 1195 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1196 if (kernel_vm_end == 0) { 1197 kernel_vm_end = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; 1198 nkpt = 0; 1199 while (segtab_pde(kernel_segmap, kernel_vm_end)) { 1200 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1201 ~(PAGE_SIZE * NPTEPG - 1); 1202 nkpt++; 1203 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1204 kernel_vm_end = kernel_map->max_offset; 1205 break; 1206 } 1207 } 1208 } 1209 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1210 if (addr - 1 >= kernel_map->max_offset) 1211 addr = kernel_map->max_offset; 1212 while (kernel_vm_end < addr) { 1213 if (segtab_pde(kernel_segmap, kernel_vm_end)) { 1214 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1215 ~(PAGE_SIZE * NPTEPG - 1); 1216 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1217 kernel_vm_end = kernel_map->max_offset; 1218 break; 1219 } 1220 continue; 1221 } 1222 /* 1223 * This index is bogus, but out of the way 1224 */ 1225 req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 1226#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 1227 if (need_wired_tlb_page_pool) 1228 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1229#endif 1230 nkpg = vm_page_alloc(NULL, nkpt, req); 1231 if (!nkpg) 1232 panic("pmap_growkernel: no memory to grow kernel"); 1233 1234 nkpt++; 1235 1236 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1237 if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) { 1238 /* 1239 * We need to do something here, but I am not sure 1240 * what. We can access anything in the 0 - 512Meg 1241 * region, but if we get a page to go in the kernel 1242 * segmap that is outside of of that we really need 1243 * to have another mapping beyond the temporary ones 1244 * I have. Not sure how to do this yet. FIXME FIXME. 1245 */ 1246 panic("Gak, can't handle a k-page table outside of lower 512Meg"); 1247 } 1248 pte = (pt_entry_t *)MIPS_PHYS_TO_CACHED(ptppaddr); 1249 segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte; 1250 1251 /* 1252 * The R[4-7]?00 stores only one copy of the Global bit in 1253 * the translation lookaside buffer for each 2 page entry. 1254 * Thus invalid entrys must have the Global bit set so when 1255 * Entry LO and Entry HI G bits are anded together they will 1256 * produce a global bit to store in the tlb. 1257 */ 1258 for (i = 0; i < NPTEPG; i++, pte++) 1259 *pte = PTE_G; 1260 1261 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1262 ~(PAGE_SIZE * NPTEPG - 1); 1263 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1264 kernel_vm_end = kernel_map->max_offset; 1265 break; 1266 } 1267 } 1268} 1269 1270/*************************************************** 1271* page management routines. 1272 ***************************************************/ 1273 1274/* 1275 * free the pv_entry back to the free list 1276 */ 1277static PMAP_INLINE void 1278free_pv_entry(pv_entry_t pv) 1279{ 1280 1281 pv_entry_count--; 1282 uma_zfree(pvzone, pv); 1283} 1284 1285/* 1286 * get a new pv_entry, allocating a block from the system 1287 * when needed. 1288 * the memory allocation is performed bypassing the malloc code 1289 * because of the possibility of allocations at interrupt time. 1290 */ 1291static pv_entry_t 1292get_pv_entry(pmap_t locked_pmap) 1293{ 1294 static const struct timeval printinterval = { 60, 0 }; 1295 static struct timeval lastprint; 1296 struct vpgqueues *vpq; 1297 pt_entry_t *pte, oldpte; 1298 pmap_t pmap; 1299 pv_entry_t allocated_pv, next_pv, pv; 1300 vm_offset_t va; 1301 vm_page_t m; 1302 1303 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1304 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1305 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 1306 if (allocated_pv != NULL) { 1307 pv_entry_count++; 1308 if (pv_entry_count > pv_entry_high_water) 1309 pagedaemon_wakeup(); 1310 else 1311 return (allocated_pv); 1312 } 1313 /* 1314 * Reclaim pv entries: At first, destroy mappings to inactive 1315 * pages. After that, if a pv entry is still needed, destroy 1316 * mappings to active pages. 1317 */ 1318 if (ratecheck(&lastprint, &printinterval)) 1319 printf("Approaching the limit on PV entries, " 1320 "increase the vm.pmap.shpgperproc tunable.\n"); 1321 vpq = &vm_page_queues[PQ_INACTIVE]; 1322retry: 1323 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1324 if (m->hold_count || m->busy) 1325 continue; 1326 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1327 va = pv->pv_va; 1328 pmap = pv->pv_pmap; 1329 /* Avoid deadlock and lock recursion. */ 1330 if (pmap > locked_pmap) 1331 PMAP_LOCK(pmap); 1332 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1333 continue; 1334 pmap->pm_stats.resident_count--; 1335 pte = pmap_pte(pmap, va); 1336 KASSERT(pte != NULL, ("pte")); 1337 oldpte = loadandclear((u_int *)pte); 1338 if (is_kernel_pmap(pmap)) 1339 *pte = PTE_G; 1340 KASSERT((oldpte & PTE_W) == 0, 1341 ("wired pte for unwired page")); 1342 if (m->md.pv_flags & PV_TABLE_REF) 1343 vm_page_flag_set(m, PG_REFERENCED); 1344 if (oldpte & PTE_M) 1345 vm_page_dirty(m); 1346 pmap_invalidate_page(pmap, va); 1347 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1348 m->md.pv_list_count--; 1349 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1350 if (TAILQ_EMPTY(&m->md.pv_list)) { 1351 vm_page_flag_clear(m, PG_WRITEABLE); 1352 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1353 } 1354 pmap_unuse_pt(pmap, va, pv->pv_ptem); 1355 if (pmap != locked_pmap) 1356 PMAP_UNLOCK(pmap); 1357 if (allocated_pv == NULL) 1358 allocated_pv = pv; 1359 else 1360 free_pv_entry(pv); 1361 } 1362 } 1363 if (allocated_pv == NULL) { 1364 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 1365 vpq = &vm_page_queues[PQ_ACTIVE]; 1366 goto retry; 1367 } 1368 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 1369 } 1370 return (allocated_pv); 1371} 1372 1373/* 1374 * Revision 1.370 1375 * 1376 * Move pmap_collect() out of the machine-dependent code, rename it 1377 * to reflect its new location, and add page queue and flag locking. 1378 * 1379 * Notes: (1) alpha, i386, and ia64 had identical implementations 1380 * of pmap_collect() in terms of machine-independent interfaces; 1381 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. 1382 * 1383 * MIPS implementation was identical to alpha [Junos 8.2] 1384 */ 1385 1386/* 1387 * If it is the first entry on the list, it is actually 1388 * in the header and we must copy the following entry up 1389 * to the header. Otherwise we must search the list for 1390 * the entry. In either case we free the now unused entry. 1391 */ 1392 1393static void 1394pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1395{ 1396 pv_entry_t pv; 1397 1398 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1399 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1400 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1401 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1402 if (pmap == pv->pv_pmap && va == pv->pv_va) 1403 break; 1404 } 1405 } else { 1406 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1407 if (va == pv->pv_va) 1408 break; 1409 } 1410 } 1411 1412 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 1413 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1414 m->md.pv_list_count--; 1415 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1416 vm_page_flag_clear(m, PG_WRITEABLE); 1417 1418 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1419 free_pv_entry(pv); 1420} 1421 1422/* 1423 * Create a pv entry for page at pa for 1424 * (pmap, va). 1425 */ 1426static void 1427pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1428 boolean_t wired) 1429{ 1430 pv_entry_t pv; 1431 1432 pv = get_pv_entry(pmap); 1433 pv->pv_va = va; 1434 pv->pv_pmap = pmap; 1435 pv->pv_ptem = mpte; 1436 pv->pv_wired = wired; 1437 1438 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1439 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1440 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1441 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1442 m->md.pv_list_count++; 1443} 1444 1445/* 1446 * Conditionally create a pv entry. 1447 */ 1448static boolean_t 1449pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1450 vm_page_t m) 1451{ 1452 pv_entry_t pv; 1453 1454 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1455 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1456 if (pv_entry_count < pv_entry_high_water && 1457 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 1458 pv_entry_count++; 1459 pv->pv_va = va; 1460 pv->pv_pmap = pmap; 1461 pv->pv_ptem = mpte; 1462 pv->pv_wired = FALSE; 1463 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1464 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1465 m->md.pv_list_count++; 1466 return (TRUE); 1467 } else 1468 return (FALSE); 1469} 1470 1471/* 1472 * pmap_remove_pte: do the things to unmap a page in a process 1473 */ 1474static int 1475pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1476{ 1477 pt_entry_t oldpte; 1478 vm_page_t m; 1479 vm_offset_t pa; 1480 1481 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1482 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1483 1484 oldpte = loadandclear((u_int *)ptq); 1485 if (is_kernel_pmap(pmap)) 1486 *ptq = PTE_G; 1487 1488 if (oldpte & PTE_W) 1489 pmap->pm_stats.wired_count -= 1; 1490 1491 pmap->pm_stats.resident_count -= 1; 1492 pa = mips_tlbpfn_to_paddr(oldpte); 1493 1494 if (page_is_managed(pa)) { 1495 m = PHYS_TO_VM_PAGE(pa); 1496 if (oldpte & PTE_M) { 1497#if defined(PMAP_DIAGNOSTIC) 1498 if (pmap_nw_modified(oldpte)) { 1499 printf( 1500 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1501 va, oldpte); 1502 } 1503#endif 1504 vm_page_dirty(m); 1505 } 1506 if (m->md.pv_flags & PV_TABLE_REF) 1507 vm_page_flag_set(m, PG_REFERENCED); 1508 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1509 1510 pmap_remove_entry(pmap, m, va); 1511 } 1512 return pmap_unuse_pt(pmap, va, NULL); 1513} 1514 1515/* 1516 * Remove a single page from a process address space 1517 */ 1518static void 1519pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1520{ 1521 register pt_entry_t *ptq; 1522 1523 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1524 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1525 ptq = pmap_pte(pmap, va); 1526 1527 /* 1528 * if there is no pte for this address, just skip it!!! 1529 */ 1530 if (!ptq || !pmap_pte_v(ptq)) { 1531 return; 1532 } 1533 /* 1534 * get a local va for mappings for this pmap. 1535 */ 1536 (void)pmap_remove_pte(pmap, ptq, va); 1537 pmap_invalidate_page(pmap, va); 1538 1539 return; 1540} 1541 1542/* 1543 * Remove the given range of addresses from the specified map. 1544 * 1545 * It is assumed that the start and end are properly 1546 * rounded to the page size. 1547 */ 1548void 1549pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1550{ 1551 vm_offset_t va, nva; 1552 1553 if (pmap == NULL) 1554 return; 1555 1556 if (pmap->pm_stats.resident_count == 0) 1557 return; 1558 1559 vm_page_lock_queues(); 1560 PMAP_LOCK(pmap); 1561 1562 /* 1563 * special handling of removing one page. a very common operation 1564 * and easy to short circuit some code. 1565 */ 1566 if ((sva + PAGE_SIZE) == eva) { 1567 pmap_remove_page(pmap, sva); 1568 goto out; 1569 } 1570 for (va = sva; va < eva; va = nva) { 1571 if (!*pmap_pde(pmap, va)) { 1572 nva = mips_segtrunc(va + MIPS_SEGSIZE); 1573 continue; 1574 } 1575 pmap_remove_page(pmap, va); 1576 nva = va + PAGE_SIZE; 1577 } 1578 1579out: 1580 vm_page_unlock_queues(); 1581 PMAP_UNLOCK(pmap); 1582} 1583 1584/* 1585 * Routine: pmap_remove_all 1586 * Function: 1587 * Removes this physical page from 1588 * all physical maps in which it resides. 1589 * Reflects back modify bits to the pager. 1590 * 1591 * Notes: 1592 * Original versions of this routine were very 1593 * inefficient because they iteratively called 1594 * pmap_remove (slow...) 1595 */ 1596 1597void 1598pmap_remove_all(vm_page_t m) 1599{ 1600 register pv_entry_t pv; 1601 register pt_entry_t *pte, tpte; 1602 1603 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1604 ("pmap_remove_all: page %p is fictitious", m)); 1605 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1606 1607 if (m->md.pv_flags & PV_TABLE_REF) 1608 vm_page_flag_set(m, PG_REFERENCED); 1609 1610 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1611 PMAP_LOCK(pv->pv_pmap); 1612 pv->pv_pmap->pm_stats.resident_count--; 1613 1614 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1615 1616 tpte = loadandclear((u_int *)pte); 1617 if (is_kernel_pmap(pv->pv_pmap)) 1618 *pte = PTE_G; 1619 1620 if (tpte & PTE_W) 1621 pv->pv_pmap->pm_stats.wired_count--; 1622 1623 /* 1624 * Update the vm_page_t clean and reference bits. 1625 */ 1626 if (tpte & PTE_M) { 1627#if defined(PMAP_DIAGNOSTIC) 1628 if (pmap_nw_modified(tpte)) { 1629 printf( 1630 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1631 pv->pv_va, tpte); 1632 } 1633#endif 1634 vm_page_dirty(m); 1635 } 1636 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1637 1638 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1639 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1640 m->md.pv_list_count--; 1641 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1642 PMAP_UNLOCK(pv->pv_pmap); 1643 free_pv_entry(pv); 1644 } 1645 1646 vm_page_flag_clear(m, PG_WRITEABLE); 1647 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1648} 1649 1650/* 1651 * Set the physical protection on the 1652 * specified range of this map as requested. 1653 */ 1654void 1655pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1656{ 1657 pt_entry_t *pte; 1658 1659 if (pmap == NULL) 1660 return; 1661 1662 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1663 pmap_remove(pmap, sva, eva); 1664 return; 1665 } 1666 if (prot & VM_PROT_WRITE) 1667 return; 1668 1669 vm_page_lock_queues(); 1670 PMAP_LOCK(pmap); 1671 while (sva < eva) { 1672 pt_entry_t pbits, obits; 1673 vm_page_t m; 1674 vm_offset_t pa; 1675 1676 /* 1677 * If segment table entry is empty, skip this segment. 1678 */ 1679 if (!*pmap_pde(pmap, sva)) { 1680 sva = mips_segtrunc(sva + MIPS_SEGSIZE); 1681 continue; 1682 } 1683 /* 1684 * If pte is invalid, skip this page 1685 */ 1686 pte = pmap_pte(pmap, sva); 1687 if (!pmap_pte_v(pte)) { 1688 sva += PAGE_SIZE; 1689 continue; 1690 } 1691retry: 1692 obits = pbits = *pte; 1693 pa = mips_tlbpfn_to_paddr(pbits); 1694 1695 if (page_is_managed(pa)) { 1696 m = PHYS_TO_VM_PAGE(pa); 1697 if (m->md.pv_flags & PV_TABLE_REF) { 1698 vm_page_flag_set(m, PG_REFERENCED); 1699 m->md.pv_flags &= ~PV_TABLE_REF; 1700 } 1701 if (pbits & PTE_M) { 1702 vm_page_dirty(m); 1703 m->md.pv_flags &= ~PV_TABLE_MOD; 1704 } 1705 } 1706 pbits = (pbits & ~PTE_M) | PTE_RO; 1707 1708 if (pbits != *pte) { 1709 if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) 1710 goto retry; 1711 pmap_update_page(pmap, sva, pbits); 1712 } 1713 sva += PAGE_SIZE; 1714 } 1715 vm_page_unlock_queues(); 1716 PMAP_UNLOCK(pmap); 1717} 1718 1719/* 1720 * Insert the given physical page (p) at 1721 * the specified virtual address (v) in the 1722 * target physical map with the protection requested. 1723 * 1724 * If specified, the page will be wired down, meaning 1725 * that the related pte can not be reclaimed. 1726 * 1727 * NB: This is the only routine which MAY NOT lazy-evaluate 1728 * or lose information. That is, this routine must actually 1729 * insert this page into the given map NOW. 1730 */ 1731void 1732pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t fault_type, vm_page_t m, vm_prot_t prot, 1733 boolean_t wired) 1734{ 1735 vm_offset_t pa, opa; 1736 register pt_entry_t *pte; 1737 pt_entry_t origpte, newpte; 1738 vm_page_t mpte, om; 1739 int rw = 0; 1740 1741 if (pmap == NULL) 1742 return; 1743 1744 va &= ~PAGE_MASK; 1745#ifdef PMAP_DIAGNOSTIC 1746 if (va > VM_MAX_KERNEL_ADDRESS) 1747 panic("pmap_enter: toobig"); 1748#endif 1749 1750 mpte = NULL; 1751 1752 vm_page_lock_queues(); 1753 PMAP_LOCK(pmap); 1754 1755 /* 1756 * In the case that a page table page is not resident, we are 1757 * creating it here. 1758 */ 1759 if (va < VM_MAXUSER_ADDRESS) { 1760 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1761 } 1762 pte = pmap_pte(pmap, va); 1763 1764 /* 1765 * Page Directory table entry not valid, we need a new PT page 1766 */ 1767 if (pte == NULL) { 1768 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 1769 (void *)pmap->pm_segtab, va); 1770 } 1771 pa = VM_PAGE_TO_PHYS(m); 1772 om = NULL; 1773 origpte = *pte; 1774 opa = mips_tlbpfn_to_paddr(origpte); 1775 1776 /* 1777 * Mapping has not changed, must be protection or wiring change. 1778 */ 1779 if ((origpte & PTE_V) && (opa == pa)) { 1780 /* 1781 * Wiring change, just update stats. We don't worry about 1782 * wiring PT pages as they remain resident as long as there 1783 * are valid mappings in them. Hence, if a user page is 1784 * wired, the PT page will be also. 1785 */ 1786 if (wired && ((origpte & PTE_W) == 0)) 1787 pmap->pm_stats.wired_count++; 1788 else if (!wired && (origpte & PTE_W)) 1789 pmap->pm_stats.wired_count--; 1790 1791#if defined(PMAP_DIAGNOSTIC) 1792 if (pmap_nw_modified(origpte)) { 1793 printf( 1794 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1795 va, origpte); 1796 } 1797#endif 1798 1799 /* 1800 * Remove extra pte reference 1801 */ 1802 if (mpte) 1803 mpte->wire_count--; 1804 1805 /* 1806 * We might be turning off write access to the page, so we 1807 * go ahead and sense modify status. 1808 */ 1809 if (page_is_managed(opa)) { 1810 om = m; 1811 } 1812 goto validate; 1813 } 1814 /* 1815 * Mapping has changed, invalidate old range and fall through to 1816 * handle validating new mapping. 1817 */ 1818 if (opa) { 1819 if (origpte & PTE_W) 1820 pmap->pm_stats.wired_count--; 1821 1822 if (page_is_managed(opa)) { 1823 om = PHYS_TO_VM_PAGE(opa); 1824 pmap_remove_entry(pmap, om, va); 1825 } 1826 if (mpte != NULL) { 1827 mpte->wire_count--; 1828 KASSERT(mpte->wire_count > 0, 1829 ("pmap_enter: missing reference to page table page," 1830 " va: 0x%x", va)); 1831 } 1832 } else 1833 pmap->pm_stats.resident_count++; 1834 1835 /* 1836 * Enter on the PV list if part of our managed memory. Note that we 1837 * raise IPL while manipulating pv_table since pmap_enter can be 1838 * called at interrupt time. 1839 */ 1840 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1841 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1842 ("pmap_enter: managed mapping within the clean submap")); 1843 pmap_insert_entry(pmap, va, mpte, m, wired); 1844 } 1845 /* 1846 * Increment counters 1847 */ 1848 if (wired) 1849 pmap->pm_stats.wired_count++; 1850 1851validate: 1852 if ((access & VM_PROT_WRITE) != 0) 1853 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; 1854 rw = init_pte_prot(va, m, prot); 1855 1856#ifdef PMAP_DEBUG 1857 printf("pmap_enter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 1858#endif 1859 /* 1860 * Now validate mapping with desired protection/wiring. 1861 */ 1862 newpte = mips_paddr_to_tlbpfn(pa) | rw | PTE_V; 1863 1864 if (is_cacheable_mem(pa)) 1865 newpte |= PTE_CACHE; 1866 else 1867 newpte |= PTE_UNCACHED; 1868 1869 if (wired) 1870 newpte |= PTE_W; 1871 1872 if (is_kernel_pmap(pmap)) { 1873 newpte |= PTE_G; 1874 } 1875 1876 /* 1877 * if the mapping or permission bits are different, we need to 1878 * update the pte. 1879 */ 1880 if (origpte != newpte) { 1881 if (origpte & PTE_V) { 1882 *pte = newpte; 1883 if (page_is_managed(opa) && (opa != pa)) { 1884 if (om->md.pv_flags & PV_TABLE_REF) 1885 vm_page_flag_set(om, PG_REFERENCED); 1886 om->md.pv_flags &= 1887 ~(PV_TABLE_REF | PV_TABLE_MOD); 1888 } 1889 if (origpte & PTE_M) { 1890 KASSERT((origpte & PTE_RW), 1891 ("pmap_enter: modified page not writable:" 1892 " va: 0x%x, pte: 0x%lx", va, origpte)); 1893 if (page_is_managed(opa)) 1894 vm_page_dirty(om); 1895 } 1896 } else { 1897 *pte = newpte; 1898 } 1899 } 1900 pmap_update_page(pmap, va, newpte); 1901 1902 /* 1903 * Sync I & D caches for executable pages. Do this only if the the 1904 * target pmap belongs to the current process. Otherwise, an 1905 * unresolvable TLB miss may occur. 1906 */ 1907 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 1908 (prot & VM_PROT_EXECUTE)) { 1909 mips_icache_sync_range(va, NBPG); 1910 mips_dcache_wbinv_range(va, NBPG); 1911 } 1912 vm_page_unlock_queues(); 1913 PMAP_UNLOCK(pmap); 1914} 1915 1916/* 1917 * this code makes some *MAJOR* assumptions: 1918 * 1. Current pmap & pmap exists. 1919 * 2. Not wired. 1920 * 3. Read access. 1921 * 4. No page table pages. 1922 * but is *MUCH* faster than pmap_enter... 1923 */ 1924 1925void 1926pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1927{ 1928 1929 PMAP_LOCK(pmap); 1930 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 1931 PMAP_UNLOCK(pmap); 1932} 1933 1934static vm_page_t 1935pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1936 vm_prot_t prot, vm_page_t mpte) 1937{ 1938 pt_entry_t *pte; 1939 vm_offset_t pa; 1940 1941 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1942 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 1943 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1944 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1945 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1946 1947 /* 1948 * In the case that a page table page is not resident, we are 1949 * creating it here. 1950 */ 1951 if (va < VM_MAXUSER_ADDRESS) { 1952 unsigned ptepindex; 1953 vm_offset_t pteva; 1954 1955 /* 1956 * Calculate pagetable page index 1957 */ 1958 ptepindex = va >> SEGSHIFT; 1959 if (mpte && (mpte->pindex == ptepindex)) { 1960 mpte->wire_count++; 1961 } else { 1962 /* 1963 * Get the page directory entry 1964 */ 1965 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1966 1967 /* 1968 * If the page table page is mapped, we just 1969 * increment the hold count, and activate it. 1970 */ 1971 if (pteva) { 1972 if (pmap->pm_ptphint && 1973 (pmap->pm_ptphint->pindex == ptepindex)) { 1974 mpte = pmap->pm_ptphint; 1975 } else { 1976 mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 1977 pmap->pm_ptphint = mpte; 1978 } 1979 mpte->wire_count++; 1980 } else { 1981 mpte = _pmap_allocpte(pmap, ptepindex, 1982 M_NOWAIT); 1983 if (mpte == NULL) 1984 return (mpte); 1985 } 1986 } 1987 } else { 1988 mpte = NULL; 1989 } 1990 1991 pte = pmap_pte(pmap, va); 1992 if (pmap_pte_v(pte)) { 1993 if (mpte != NULL) { 1994 mpte->wire_count--; 1995 mpte = NULL; 1996 } 1997 return (mpte); 1998 } 1999 2000 /* 2001 * Enter on the PV list if part of our managed memory. 2002 */ 2003 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2004 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2005 if (mpte != NULL) { 2006 pmap_unwire_pte_hold(pmap, mpte); 2007 mpte = NULL; 2008 } 2009 return (mpte); 2010 } 2011 2012 /* 2013 * Increment counters 2014 */ 2015 pmap->pm_stats.resident_count++; 2016 2017 pa = VM_PAGE_TO_PHYS(m); 2018 2019 /* 2020 * Now validate mapping with RO protection 2021 */ 2022 *pte = mips_paddr_to_tlbpfn(pa) | PTE_V; 2023 2024 if (is_cacheable_mem(pa)) 2025 *pte |= PTE_CACHE; 2026 else 2027 *pte |= PTE_UNCACHED; 2028 2029 if (is_kernel_pmap(pmap)) 2030 *pte |= PTE_G; 2031 else { 2032 *pte |= PTE_RO; 2033 /* 2034 * Sync I & D caches. Do this only if the the target pmap 2035 * belongs to the current process. Otherwise, an 2036 * unresolvable TLB miss may occur. */ 2037 if (pmap == &curproc->p_vmspace->vm_pmap) { 2038 va &= ~PAGE_MASK; 2039 mips_icache_sync_range(va, NBPG); 2040 mips_dcache_wbinv_range(va, NBPG); 2041 } 2042 } 2043 return (mpte); 2044} 2045 2046/* 2047 * Make a temporary mapping for a physical address. This is only intended 2048 * to be used for panic dumps. 2049 */ 2050void * 2051pmap_kenter_temporary(vm_paddr_t pa, int i) 2052{ 2053 vm_offset_t va; 2054 2055 if (i != 0) 2056 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2057 __func__); 2058 2059#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2060 if (need_wired_tlb_page_pool) { 2061 va = pmap_map_fpage(pa, &fpages_shared[PMAP_FPAGE_KENTER_TEMP], 2062 TRUE); 2063 } else 2064#endif 2065 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2066 va = MIPS_PHYS_TO_CACHED(pa); 2067 } else { 2068 int cpu; 2069 struct local_sysmaps *sysm; 2070 2071 cpu = PCPU_GET(cpuid); 2072 sysm = &sysmap_lmem[cpu]; 2073 /* Since this is for the debugger, no locks or any other fun */ 2074 sysm->CMAP1 = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2075 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2076 sysm->valid1 = 1; 2077 va = (vm_offset_t)sysm->CADDR1; 2078 } 2079 return ((void *)va); 2080} 2081 2082void 2083pmap_kenter_temporary_free(vm_paddr_t pa) 2084{ 2085 int cpu; 2086 struct local_sysmaps *sysm; 2087 2088 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2089 /* nothing to do for this case */ 2090 return; 2091 } 2092 cpu = PCPU_GET(cpuid); 2093 sysm = &sysmap_lmem[cpu]; 2094 if (sysm->valid1) { 2095 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2096 sysm->CMAP1 = 0; 2097 sysm->valid1 = 0; 2098 } 2099} 2100 2101/* 2102 * Moved the code to Machine Independent 2103 * vm_map_pmap_enter() 2104 */ 2105 2106/* 2107 * Maps a sequence of resident pages belonging to the same object. 2108 * The sequence begins with the given page m_start. This page is 2109 * mapped at the given virtual address start. Each subsequent page is 2110 * mapped at a virtual address that is offset from start by the same 2111 * amount as the page is offset from m_start within the object. The 2112 * last page in the sequence is the page with the largest offset from 2113 * m_start that can be mapped at a virtual address less than the given 2114 * virtual address end. Not every virtual page between start and end 2115 * is mapped; only those for which a resident page exists with the 2116 * corresponding offset from m_start are mapped. 2117 */ 2118void 2119pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2120 vm_page_t m_start, vm_prot_t prot) 2121{ 2122 vm_page_t m, mpte; 2123 vm_pindex_t diff, psize; 2124 2125 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2126 psize = atop(end - start); 2127 mpte = NULL; 2128 m = m_start; 2129 PMAP_LOCK(pmap); 2130 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2131 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2132 prot, mpte); 2133 m = TAILQ_NEXT(m, listq); 2134 } 2135 PMAP_UNLOCK(pmap); 2136} 2137 2138/* 2139 * pmap_object_init_pt preloads the ptes for a given object 2140 * into the specified pmap. This eliminates the blast of soft 2141 * faults on process startup and immediately after an mmap. 2142 */ 2143void 2144pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2145 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2146{ 2147 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2148 KASSERT(object->type == OBJT_DEVICE, 2149 ("pmap_object_init_pt: non-device object")); 2150} 2151 2152/* 2153 * Routine: pmap_change_wiring 2154 * Function: Change the wiring attribute for a map/virtual-address 2155 * pair. 2156 * In/out conditions: 2157 * The mapping must already exist in the pmap. 2158 */ 2159void 2160pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2161{ 2162 register pt_entry_t *pte; 2163 2164 if (pmap == NULL) 2165 return; 2166 2167 PMAP_LOCK(pmap); 2168 pte = pmap_pte(pmap, va); 2169 2170 if (wired && !pmap_pte_w(pte)) 2171 pmap->pm_stats.wired_count++; 2172 else if (!wired && pmap_pte_w(pte)) 2173 pmap->pm_stats.wired_count--; 2174 2175 /* 2176 * Wiring is not a hardware characteristic so there is no need to 2177 * invalidate TLB. 2178 */ 2179 pmap_pte_set_w(pte, wired); 2180 PMAP_UNLOCK(pmap); 2181} 2182 2183/* 2184 * Copy the range specified by src_addr/len 2185 * from the source map to the range dst_addr/len 2186 * in the destination map. 2187 * 2188 * This routine is only advisory and need not do anything. 2189 */ 2190 2191void 2192pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2193 vm_size_t len, vm_offset_t src_addr) 2194{ 2195} 2196 2197/* 2198 * pmap_zero_page zeros the specified hardware page by mapping 2199 * the page into KVM and using bzero to clear its contents. 2200 */ 2201void 2202pmap_zero_page(vm_page_t m) 2203{ 2204 vm_offset_t va; 2205 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2206 2207#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2208 if (need_wired_tlb_page_pool) { 2209 struct fpage *fp1; 2210 struct sysmaps *sysmaps; 2211 2212 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2213 mtx_lock(&sysmaps->lock); 2214 sched_pin(); 2215 2216 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2217 va = pmap_map_fpage(phys, fp1, FALSE); 2218 bzero((caddr_t)va, PAGE_SIZE); 2219 pmap_unmap_fpage(phys, fp1); 2220 sched_unpin(); 2221 mtx_unlock(&sysmaps->lock); 2222 /* 2223 * Should you do cache flush? 2224 */ 2225 } else 2226#endif 2227 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2228 2229 va = MIPS_PHYS_TO_UNCACHED(phys); 2230 2231 bzero((caddr_t)va, PAGE_SIZE); 2232 mips_dcache_wbinv_range(va, PAGE_SIZE); 2233 } else { 2234 int cpu; 2235 struct local_sysmaps *sysm; 2236 2237 cpu = PCPU_GET(cpuid); 2238 sysm = &sysmap_lmem[cpu]; 2239 PMAP_LGMEM_LOCK(sysm); 2240 sched_pin(); 2241 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2242 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2243 sysm->valid1 = 1; 2244 bzero(sysm->CADDR1, PAGE_SIZE); 2245 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2246 sysm->CMAP1 = 0; 2247 sysm->valid1 = 0; 2248 sched_unpin(); 2249 PMAP_LGMEM_UNLOCK(sysm); 2250 } 2251 2252} 2253 2254/* 2255 * pmap_zero_page_area zeros the specified hardware page by mapping 2256 * the page into KVM and using bzero to clear its contents. 2257 * 2258 * off and size may not cover an area beyond a single hardware page. 2259 */ 2260void 2261pmap_zero_page_area(vm_page_t m, int off, int size) 2262{ 2263 vm_offset_t va; 2264 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2265 2266#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2267 if (need_wired_tlb_page_pool) { 2268 struct fpage *fp1; 2269 struct sysmaps *sysmaps; 2270 2271 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2272 mtx_lock(&sysmaps->lock); 2273 sched_pin(); 2274 2275 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2276 va = pmap_map_fpage(phys, fp1, FALSE); 2277 bzero((caddr_t)va + off, size); 2278 pmap_unmap_fpage(phys, fp1); 2279 2280 sched_unpin(); 2281 mtx_unlock(&sysmaps->lock); 2282 } else 2283#endif 2284 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2285 va = MIPS_PHYS_TO_UNCACHED(phys); 2286 bzero((char *)(caddr_t)va + off, size); 2287 mips_dcache_wbinv_range(va + off, size); 2288 } else { 2289 int cpu; 2290 struct local_sysmaps *sysm; 2291 2292 cpu = PCPU_GET(cpuid); 2293 sysm = &sysmap_lmem[cpu]; 2294 PMAP_LGMEM_LOCK(sysm); 2295 sched_pin(); 2296 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2297 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2298 sysm->valid1 = 1; 2299 bzero((char *)sysm->CADDR1 + off, size); 2300 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2301 sysm->CMAP1 = 0; 2302 sysm->valid1 = 0; 2303 sched_unpin(); 2304 PMAP_LGMEM_UNLOCK(sysm); 2305 } 2306} 2307 2308void 2309pmap_zero_page_idle(vm_page_t m) 2310{ 2311 vm_offset_t va; 2312 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2313 2314#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2315 if (need_wired_tlb_page_pool) { 2316 sched_pin(); 2317 va = pmap_map_fpage(phys, &fpages_shared[PMAP_FPAGE3], FALSE); 2318 bzero((caddr_t)va, PAGE_SIZE); 2319 pmap_unmap_fpage(phys, &fpages_shared[PMAP_FPAGE3]); 2320 sched_unpin(); 2321 } else 2322#endif 2323 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2324 va = MIPS_PHYS_TO_UNCACHED(phys); 2325 bzero((caddr_t)va, PAGE_SIZE); 2326 mips_dcache_wbinv_range(va, PAGE_SIZE); 2327 } else { 2328 int cpu; 2329 struct local_sysmaps *sysm; 2330 2331 cpu = PCPU_GET(cpuid); 2332 sysm = &sysmap_lmem[cpu]; 2333 PMAP_LGMEM_LOCK(sysm); 2334 sched_pin(); 2335 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2336 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2337 sysm->valid1 = 1; 2338 bzero(sysm->CADDR1, PAGE_SIZE); 2339 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2340 sysm->CMAP1 = 0; 2341 sysm->valid1 = 0; 2342 sched_unpin(); 2343 PMAP_LGMEM_UNLOCK(sysm); 2344 } 2345 2346} 2347 2348/* 2349 * pmap_copy_page copies the specified (machine independent) 2350 * page by mapping the page into virtual memory and using 2351 * bcopy to copy the page, one machine dependent page at a 2352 * time. 2353 */ 2354void 2355pmap_copy_page(vm_page_t src, vm_page_t dst) 2356{ 2357 vm_offset_t va_src, va_dst; 2358 vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); 2359 vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); 2360 2361 2362#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2363 if (need_wired_tlb_page_pool) { 2364 struct fpage *fp1, *fp2; 2365 struct sysmaps *sysmaps; 2366 2367 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2368 mtx_lock(&sysmaps->lock); 2369 sched_pin(); 2370 2371 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2372 fp2 = &sysmaps->fp[PMAP_FPAGE2]; 2373 2374 va_src = pmap_map_fpage(phy_src, fp1, FALSE); 2375 va_dst = pmap_map_fpage(phy_dst, fp2, FALSE); 2376 2377 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2378 2379 pmap_unmap_fpage(phy_src, fp1); 2380 pmap_unmap_fpage(phy_dst, fp2); 2381 sched_unpin(); 2382 mtx_unlock(&sysmaps->lock); 2383 2384 /* 2385 * Should you flush the cache? 2386 */ 2387 } else 2388#endif 2389 { 2390 if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { 2391 /* easy case, all can be accessed via KSEG0 */ 2392 va_src = MIPS_PHYS_TO_CACHED(phy_src); 2393 va_dst = MIPS_PHYS_TO_CACHED(phy_dst); 2394 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2395 } else { 2396 int cpu; 2397 struct local_sysmaps *sysm; 2398 2399 cpu = PCPU_GET(cpuid); 2400 sysm = &sysmap_lmem[cpu]; 2401 PMAP_LGMEM_LOCK(sysm); 2402 sched_pin(); 2403 if (phy_src < MIPS_KSEG0_LARGEST_PHYS) { 2404 /* one side needs mapping - dest */ 2405 va_src = MIPS_PHYS_TO_CACHED(phy_src); 2406 sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2407 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); 2408 sysm->valid2 = 2; 2409 va_dst = (vm_offset_t)sysm->CADDR2; 2410 } else if (phy_dst < MIPS_KSEG0_LARGEST_PHYS) { 2411 /* one side needs mapping - src */ 2412 va_dst = MIPS_PHYS_TO_CACHED(phy_dst); 2413 sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2414 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2415 va_src = (vm_offset_t)sysm->CADDR1; 2416 sysm->valid1 = 1; 2417 } else { 2418 /* all need mapping */ 2419 sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2420 sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2421 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2422 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); 2423 sysm->valid1 = sysm->valid2 = 1; 2424 va_src = (vm_offset_t)sysm->CADDR1; 2425 va_dst = (vm_offset_t)sysm->CADDR2; 2426 } 2427 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2428 if (sysm->valid1) { 2429 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2430 sysm->CMAP1 = 0; 2431 sysm->valid1 = 0; 2432 } 2433 if (sysm->valid2) { 2434 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR2); 2435 sysm->CMAP2 = 0; 2436 sysm->valid2 = 0; 2437 } 2438 sched_unpin(); 2439 PMAP_LGMEM_UNLOCK(sysm); 2440 } 2441 } 2442} 2443 2444/* 2445 * Returns true if the pmap's pv is one of the first 2446 * 16 pvs linked to from this page. This count may 2447 * be changed upwards or downwards in the future; it 2448 * is only necessary that true be returned for a small 2449 * subset of pmaps for proper page aging. 2450 */ 2451boolean_t 2452pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2453{ 2454 pv_entry_t pv; 2455 int loops = 0; 2456 2457 if (m->flags & PG_FICTITIOUS) 2458 return FALSE; 2459 2460 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2461 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2462 if (pv->pv_pmap == pmap) { 2463 return TRUE; 2464 } 2465 loops++; 2466 if (loops >= 16) 2467 break; 2468 } 2469 return (FALSE); 2470} 2471 2472/* 2473 * Remove all pages from specified address space 2474 * this aids process exit speeds. Also, this code 2475 * is special cased for current process only, but 2476 * can have the more generic (and slightly slower) 2477 * mode enabled. This is much faster than pmap_remove 2478 * in the case of running down an entire address space. 2479 */ 2480void 2481pmap_remove_pages(pmap_t pmap) 2482{ 2483 pt_entry_t *pte, tpte; 2484 pv_entry_t pv, npv; 2485 vm_page_t m; 2486 2487 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2488 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2489 return; 2490 } 2491 vm_page_lock_queues(); 2492 PMAP_LOCK(pmap); 2493 sched_pin(); 2494 //XXX need to be TAILQ_FOREACH_SAFE ? 2495 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); 2496 pv; 2497 pv = npv) { 2498 2499 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2500 if (!pmap_pte_v(pte)) 2501 panic("pmap_remove_pages: page on pm_pvlist has no pte\n"); 2502 tpte = *pte; 2503 2504/* 2505 * We cannot remove wired pages from a process' mapping at this time 2506 */ 2507 if (tpte & PTE_W) { 2508 npv = TAILQ_NEXT(pv, pv_plist); 2509 continue; 2510 } 2511 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2512 2513 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte)); 2514 2515 KASSERT(m < &vm_page_array[vm_page_array_size], 2516 ("pmap_remove_pages: bad tpte %lx", tpte)); 2517 2518 pv->pv_pmap->pm_stats.resident_count--; 2519 2520 /* 2521 * Update the vm_page_t clean and reference bits. 2522 */ 2523 if (tpte & PTE_M) { 2524 vm_page_dirty(m); 2525 } 2526 npv = TAILQ_NEXT(pv, pv_plist); 2527 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2528 2529 m->md.pv_list_count--; 2530 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2531 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2532 vm_page_flag_clear(m, PG_WRITEABLE); 2533 } 2534 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2535 free_pv_entry(pv); 2536 } 2537 sched_unpin(); 2538 pmap_invalidate_all(pmap); 2539 PMAP_UNLOCK(pmap); 2540 vm_page_unlock_queues(); 2541} 2542 2543/* 2544 * pmap_testbit tests bits in pte's 2545 * note that the testbit/changebit routines are inline, 2546 * and a lot of things compile-time evaluate. 2547 */ 2548static boolean_t 2549pmap_testbit(vm_page_t m, int bit) 2550{ 2551 pv_entry_t pv; 2552 pt_entry_t *pte; 2553 boolean_t rv = FALSE; 2554 2555 if (m->flags & PG_FICTITIOUS) 2556 return rv; 2557 2558 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2559 return rv; 2560 2561 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2562 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2563#if defined(PMAP_DIAGNOSTIC) 2564 if (!pv->pv_pmap) { 2565 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2566 continue; 2567 } 2568#endif 2569 PMAP_LOCK(pv->pv_pmap); 2570 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2571 rv = (*pte & bit) != 0; 2572 PMAP_UNLOCK(pv->pv_pmap); 2573 if (rv) 2574 break; 2575 } 2576 return (rv); 2577} 2578 2579/* 2580 * this routine is used to modify bits in ptes 2581 */ 2582static __inline void 2583pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2584{ 2585 register pv_entry_t pv; 2586 register pt_entry_t *pte; 2587 2588 if (m->flags & PG_FICTITIOUS) 2589 return; 2590 2591 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2592 /* 2593 * Loop over all current mappings setting/clearing as appropos If 2594 * setting RO do we need to clear the VAC? 2595 */ 2596 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2597#if defined(PMAP_DIAGNOSTIC) 2598 if (!pv->pv_pmap) { 2599 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2600 continue; 2601 } 2602#endif 2603 2604 PMAP_LOCK(pv->pv_pmap); 2605 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2606 2607 if (setem) { 2608 *(int *)pte |= bit; 2609 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2610 } else { 2611 vm_offset_t pbits = *(vm_offset_t *)pte; 2612 2613 if (pbits & bit) { 2614 if (bit == PTE_RW) { 2615 if (pbits & PTE_M) { 2616 vm_page_dirty(m); 2617 } 2618 *(int *)pte = (pbits & ~(PTE_M | PTE_RW)) | 2619 PTE_RO; 2620 } else { 2621 *(int *)pte = pbits & ~bit; 2622 } 2623 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2624 } 2625 } 2626 PMAP_UNLOCK(pv->pv_pmap); 2627 } 2628 if (!setem && bit == PTE_RW) 2629 vm_page_flag_clear(m, PG_WRITEABLE); 2630} 2631 2632/* 2633 * pmap_page_wired_mappings: 2634 * 2635 * Return the number of managed mappings to the given physical page 2636 * that are wired. 2637 */ 2638int 2639pmap_page_wired_mappings(vm_page_t m) 2640{ 2641 pv_entry_t pv; 2642 int count; 2643 2644 count = 0; 2645 if ((m->flags & PG_FICTITIOUS) != 0) 2646 return (count); 2647 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2648 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) 2649 if (pv->pv_wired) 2650 count++; 2651 return (count); 2652} 2653 2654/* 2655 * Clear the write and modified bits in each of the given page's mappings. 2656 */ 2657void 2658pmap_remove_write(vm_page_t m) 2659{ 2660 pv_entry_t pv, npv; 2661 vm_offset_t va; 2662 pt_entry_t *pte; 2663 2664 if ((m->flags & PG_WRITEABLE) == 0) 2665 return; 2666 2667 /* 2668 * Loop over all current mappings setting/clearing as appropos. 2669 */ 2670 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { 2671 npv = TAILQ_NEXT(pv, pv_plist); 2672 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2673 2674 if ((pte == NULL) || !mips_pg_v(*pte)) 2675 panic("page on pm_pvlist has no pte\n"); 2676 2677 va = pv->pv_va; 2678 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 2679 VM_PROT_READ | VM_PROT_EXECUTE); 2680 } 2681 vm_page_flag_clear(m, PG_WRITEABLE); 2682} 2683 2684/* 2685 * pmap_ts_referenced: 2686 * 2687 * Return the count of reference bits for a page, clearing all of them. 2688 */ 2689int 2690pmap_ts_referenced(vm_page_t m) 2691{ 2692 if (m->flags & PG_FICTITIOUS) 2693 return (0); 2694 2695 if (m->md.pv_flags & PV_TABLE_REF) { 2696 m->md.pv_flags &= ~PV_TABLE_REF; 2697 return 1; 2698 } 2699 return 0; 2700} 2701 2702/* 2703 * pmap_is_modified: 2704 * 2705 * Return whether or not the specified physical page was modified 2706 * in any physical maps. 2707 */ 2708boolean_t 2709pmap_is_modified(vm_page_t m) 2710{ 2711 if (m->flags & PG_FICTITIOUS) 2712 return FALSE; 2713 2714 if (m->md.pv_flags & PV_TABLE_MOD) 2715 return TRUE; 2716 else 2717 return pmap_testbit(m, PTE_M); 2718} 2719 2720/* N/C */ 2721 2722/* 2723 * pmap_is_prefaultable: 2724 * 2725 * Return whether or not the specified virtual address is elgible 2726 * for prefault. 2727 */ 2728boolean_t 2729pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2730{ 2731 pt_entry_t *pte; 2732 boolean_t rv; 2733 2734 rv = FALSE; 2735 PMAP_LOCK(pmap); 2736 if (*pmap_pde(pmap, addr)) { 2737 pte = pmap_pte(pmap, addr); 2738 rv = (*pte == 0); 2739 } 2740 PMAP_UNLOCK(pmap); 2741 return (rv); 2742} 2743 2744/* 2745 * Clear the modify bits on the specified physical page. 2746 */ 2747void 2748pmap_clear_modify(vm_page_t m) 2749{ 2750 if (m->flags & PG_FICTITIOUS) 2751 return; 2752 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2753 if (m->md.pv_flags & PV_TABLE_MOD) { 2754 pmap_changebit(m, PTE_M, FALSE); 2755 m->md.pv_flags &= ~PV_TABLE_MOD; 2756 } 2757} 2758 2759/* 2760 * pmap_clear_reference: 2761 * 2762 * Clear the reference bit on the specified physical page. 2763 */ 2764void 2765pmap_clear_reference(vm_page_t m) 2766{ 2767 if (m->flags & PG_FICTITIOUS) 2768 return; 2769 2770 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2771 if (m->md.pv_flags & PV_TABLE_REF) { 2772 m->md.pv_flags &= ~PV_TABLE_REF; 2773 } 2774} 2775 2776/* 2777 * Miscellaneous support routines follow 2778 */ 2779 2780/* 2781 * Map a set of physical memory pages into the kernel virtual 2782 * address space. Return a pointer to where it is mapped. This 2783 * routine is intended to be used for mapping device memory, 2784 * NOT real memory. 2785 */ 2786 2787/* 2788 * Map a set of physical memory pages into the kernel virtual 2789 * address space. Return a pointer to where it is mapped. This 2790 * routine is intended to be used for mapping device memory, 2791 * NOT real memory. 2792 */ 2793void * 2794pmap_mapdev(vm_offset_t pa, vm_size_t size) 2795{ 2796 vm_offset_t va, tmpva, offset; 2797 2798 /* 2799 * KSEG1 maps only first 512M of phys address space. For 2800 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2801 */ 2802 if (pa + size < MIPS_KSEG0_LARGEST_PHYS) 2803 return (void *)MIPS_PHYS_TO_KSEG1(pa); 2804 else { 2805 offset = pa & PAGE_MASK; 2806 size = roundup(size, PAGE_SIZE); 2807 2808 va = kmem_alloc_nofault(kernel_map, size); 2809 if (!va) 2810 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2811 for (tmpva = va; size > 0;) { 2812 pmap_kenter(tmpva, pa); 2813 size -= PAGE_SIZE; 2814 tmpva += PAGE_SIZE; 2815 pa += PAGE_SIZE; 2816 } 2817 } 2818 2819 return ((void *)(va + offset)); 2820} 2821 2822void 2823pmap_unmapdev(vm_offset_t va, vm_size_t size) 2824{ 2825} 2826 2827/* 2828 * perform the pmap work for mincore 2829 */ 2830int 2831pmap_mincore(pmap_t pmap, vm_offset_t addr) 2832{ 2833 2834 pt_entry_t *ptep, pte; 2835 vm_page_t m; 2836 int val = 0; 2837 2838 PMAP_LOCK(pmap); 2839 ptep = pmap_pte(pmap, addr); 2840 pte = (ptep != NULL) ? *ptep : 0; 2841 PMAP_UNLOCK(pmap); 2842 2843 if (mips_pg_v(pte)) { 2844 vm_offset_t pa; 2845 2846 val = MINCORE_INCORE; 2847 pa = mips_tlbpfn_to_paddr(pte); 2848 if (!page_is_managed(pa)) 2849 return val; 2850 2851 m = PHYS_TO_VM_PAGE(pa); 2852 2853 /* 2854 * Modified by us 2855 */ 2856 if (pte & PTE_M) 2857 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2858 /* 2859 * Modified by someone 2860 */ 2861 else { 2862 vm_page_lock_queues(); 2863 if (m->dirty || pmap_is_modified(m)) 2864 val |= MINCORE_MODIFIED_OTHER; 2865 vm_page_unlock_queues(); 2866 } 2867 /* 2868 * Referenced by us or someone 2869 */ 2870 vm_page_lock_queues(); 2871 if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 2872 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2873 vm_page_flag_set(m, PG_REFERENCED); 2874 } 2875 vm_page_unlock_queues(); 2876 } 2877 return val; 2878} 2879 2880void 2881pmap_activate(struct thread *td) 2882{ 2883 pmap_t pmap, oldpmap; 2884 struct proc *p = td->td_proc; 2885 2886 critical_enter(); 2887 2888 pmap = vmspace_pmap(p->p_vmspace); 2889 oldpmap = PCPU_GET(curpmap); 2890 2891 if (oldpmap) 2892 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); 2893 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2894 pmap_asid_alloc(pmap); 2895 if (td == curthread) { 2896 PCPU_SET(segbase, pmap->pm_segtab); 2897 MachSetPID(pmap->pm_asid[PCPU_GET(cpuid)].asid); 2898 } 2899 PCPU_SET(curpmap, pmap); 2900 critical_exit(); 2901} 2902 2903/* 2904 * Increase the starting virtual address of the given mapping if a 2905 * different alignment might result in more superpage mappings. 2906 */ 2907void 2908pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2909 vm_offset_t *addr, vm_size_t size) 2910{ 2911 vm_offset_t superpage_offset; 2912 2913 if (size < NBSEG) 2914 return; 2915 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 2916 offset += ptoa(object->pg_color); 2917 superpage_offset = offset & SEGOFSET; 2918 if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG || 2919 (*addr & SEGOFSET) == superpage_offset) 2920 return; 2921 if ((*addr & SEGOFSET) < superpage_offset) 2922 *addr = (*addr & ~SEGOFSET) + superpage_offset; 2923 else 2924 *addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset; 2925} 2926 2927int pmap_pid_dump(int pid); 2928 2929int 2930pmap_pid_dump(int pid) 2931{ 2932 pmap_t pmap; 2933 struct proc *p; 2934 int npte = 0; 2935 int index; 2936 2937 sx_slock(&allproc_lock); 2938 LIST_FOREACH(p, &allproc, p_list) { 2939 if (p->p_pid != pid) 2940 continue; 2941 2942 if (p->p_vmspace) { 2943 int i, j; 2944 2945 printf("vmspace is %p\n", 2946 p->p_vmspace); 2947 index = 0; 2948 pmap = vmspace_pmap(p->p_vmspace); 2949 printf("pmap asid:%x generation:%x\n", 2950 pmap->pm_asid[0].asid, 2951 pmap->pm_asid[0].gen); 2952 for (i = 0; i < NUSERPGTBLS; i++) { 2953 pd_entry_t *pde; 2954 pt_entry_t *pte; 2955 unsigned base = i << SEGSHIFT; 2956 2957 pde = &pmap->pm_segtab[i]; 2958 if (pde && pmap_pde_v(pde)) { 2959 for (j = 0; j < 1024; j++) { 2960 unsigned va = base + 2961 (j << PAGE_SHIFT); 2962 2963 pte = pmap_pte(pmap, va); 2964 if (pte && pmap_pte_v(pte)) { 2965 vm_offset_t pa; 2966 vm_page_t m; 2967 2968 pa = mips_tlbpfn_to_paddr(*pte); 2969 m = PHYS_TO_VM_PAGE(pa); 2970 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 2971 va, pa, 2972 m->hold_count, 2973 m->wire_count, 2974 m->flags); 2975 npte++; 2976 index++; 2977 if (index >= 2) { 2978 index = 0; 2979 printf("\n"); 2980 } else { 2981 printf(" "); 2982 } 2983 } 2984 } 2985 } 2986 } 2987 } else { 2988 printf("Process pid:%d has no vm_space\n", pid); 2989 } 2990 break; 2991 } 2992 sx_sunlock(&allproc_lock); 2993 return npte; 2994} 2995 2996 2997#if defined(DEBUG) 2998 2999static void pads(pmap_t pm); 3000void pmap_pvdump(vm_offset_t pa); 3001 3002/* print address space of pmap*/ 3003static void 3004pads(pmap_t pm) 3005{ 3006 unsigned va, i, j; 3007 pt_entry_t *ptep; 3008 3009 if (pm == kernel_pmap) 3010 return; 3011 for (i = 0; i < NPTEPG; i++) 3012 if (pm->pm_segtab[i]) 3013 for (j = 0; j < NPTEPG; j++) { 3014 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3015 if (pm == kernel_pmap && va < KERNBASE) 3016 continue; 3017 if (pm != kernel_pmap && 3018 va >= VM_MAXUSER_ADDRESS) 3019 continue; 3020 ptep = pmap_pte(pm, va); 3021 if (pmap_pte_v(ptep)) 3022 printf("%x:%x ", va, *(int *)ptep); 3023 } 3024 3025} 3026 3027void 3028pmap_pvdump(vm_offset_t pa) 3029{ 3030 register pv_entry_t pv; 3031 vm_page_t m; 3032 3033 printf("pa %x", pa); 3034 m = PHYS_TO_VM_PAGE(pa); 3035 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3036 pv = TAILQ_NEXT(pv, pv_list)) { 3037 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3038 pads(pv->pv_pmap); 3039 } 3040 printf(" "); 3041} 3042 3043/* N/C */ 3044#endif 3045 3046 3047/* 3048 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3049 * It takes almost as much or more time to search the TLB for a 3050 * specific ASID and flush those entries as it does to flush the entire TLB. 3051 * Therefore, when we allocate a new ASID, we just take the next number. When 3052 * we run out of numbers, we flush the TLB, increment the generation count 3053 * and start over. ASID zero is reserved for kernel use. 3054 */ 3055static void 3056pmap_asid_alloc(pmap) 3057 pmap_t pmap; 3058{ 3059 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3060 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3061 else { 3062 if (PCPU_GET(next_asid) == pmap_max_asid) { 3063 MIPS_TBIAP(); 3064 PCPU_SET(asid_generation, 3065 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3066 if (PCPU_GET(asid_generation) == 0) { 3067 PCPU_SET(asid_generation, 1); 3068 } 3069 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3070 } 3071 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3072 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3073 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3074 } 3075 3076#ifdef DEBUG 3077 if (pmapdebug & (PDB_FOLLOW | PDB_TLBPID)) { 3078 if (curproc) 3079 printf("pmap_asid_alloc: curproc %d '%s' ", 3080 curproc->p_pid, curproc->p_comm); 3081 else 3082 printf("pmap_asid_alloc: curproc <none> "); 3083 printf("segtab %p asid %d\n", pmap->pm_segtab, 3084 pmap->pm_asid[PCPU_GET(cpuid)].asid); 3085 } 3086#endif 3087} 3088 3089int 3090page_is_managed(vm_offset_t pa) 3091{ 3092 vm_offset_t pgnum = mips_btop(pa); 3093 3094 if (pgnum >= first_page && (pgnum < (first_page + vm_page_array_size))) { 3095 vm_page_t m; 3096 3097 m = PHYS_TO_VM_PAGE(pa); 3098 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 3099 return 1; 3100 } 3101 return 0; 3102} 3103 3104static int 3105init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) 3106{ 3107 int rw = 0; 3108 3109 if (!(prot & VM_PROT_WRITE)) 3110 rw = PTE_ROPAGE; 3111 else { 3112 if (va >= VM_MIN_KERNEL_ADDRESS) { 3113 /* 3114 * Don't bother to trap on kernel writes, just 3115 * record page as dirty. 3116 */ 3117 rw = PTE_RWPAGE; 3118 vm_page_dirty(m); 3119 } else if ((m->md.pv_flags & PV_TABLE_MOD) || m->dirty) 3120 rw = PTE_RWPAGE; 3121 else 3122 rw = PTE_CWPAGE; 3123 } 3124 return rw; 3125} 3126 3127/* 3128 * pmap_page_is_free: 3129 * 3130 * Called when a page is freed to allow pmap to clean up 3131 * any extra state associated with the page. In this case 3132 * clear modified/referenced bits. 3133 */ 3134void 3135pmap_page_is_free(vm_page_t m) 3136{ 3137 3138 m->md.pv_flags = 0; 3139} 3140 3141/* 3142 * pmap_set_modified: 3143 * 3144 * Sets the page modified and reference bits for the specified page. 3145 */ 3146void 3147pmap_set_modified(vm_offset_t pa) 3148{ 3149 3150 PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); 3151} 3152 3153#include <machine/db_machdep.h> 3154 3155/* 3156 * Dump the translation buffer (TLB) in readable form. 3157 */ 3158 3159void 3160db_dump_tlb(int first, int last) 3161{ 3162 struct tlb tlb; 3163 int tlbno; 3164 3165 tlbno = first; 3166 3167 while (tlbno <= last) { 3168 MachTLBRead(tlbno, &tlb); 3169 if (tlb.tlb_lo0 & PTE_V || tlb.tlb_lo1 & PTE_V) { 3170 printf("TLB %2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00)); 3171 } else { 3172 printf("TLB*%2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00)); 3173 } 3174 printf("0=0x%08x ", pfn_to_vad(tlb.tlb_lo0)); 3175 printf("%c", tlb.tlb_lo0 & PTE_M ? 'M' : ' '); 3176 printf("%c", tlb.tlb_lo0 & PTE_G ? 'G' : ' '); 3177 printf(" atr %x ", (tlb.tlb_lo0 >> 3) & 7); 3178 printf("1=0x%08x ", pfn_to_vad(tlb.tlb_lo1)); 3179 printf("%c", tlb.tlb_lo1 & PTE_M ? 'M' : ' '); 3180 printf("%c", tlb.tlb_lo1 & PTE_G ? 'G' : ' '); 3181 printf(" atr %x ", (tlb.tlb_lo1 >> 3) & 7); 3182 printf(" sz=%x pid=%x\n", tlb.tlb_mask, 3183 (tlb.tlb_hi & 0x000000ff) 3184 ); 3185 tlbno++; 3186 } 3187} 3188 3189#ifdef DDB 3190#include <sys/kernel.h> 3191#include <ddb/ddb.h> 3192 3193DB_SHOW_COMMAND(tlb, ddb_dump_tlb) 3194{ 3195 db_dump_tlb(0, num_tlbentries - 1); 3196} 3197 3198#endif 3199 3200/* 3201 * Routine: pmap_kextract 3202 * Function: 3203 * Extract the physical page address associated 3204 * virtual address. 3205 */ 3206 /* PMAP_INLINE */ vm_offset_t 3207pmap_kextract(vm_offset_t va) 3208{ 3209 vm_offset_t pa = 0; 3210 3211 if (va < MIPS_CACHED_MEMORY_ADDR) { 3212 /* user virtual address */ 3213 pt_entry_t *ptep; 3214 3215 if (curproc && curproc->p_vmspace) { 3216 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3217 if (ptep) 3218 pa = mips_tlbpfn_to_paddr(*ptep) | 3219 (va & PAGE_MASK); 3220 } 3221 } else if (va >= MIPS_CACHED_MEMORY_ADDR && 3222 va < MIPS_UNCACHED_MEMORY_ADDR) 3223 pa = MIPS_CACHED_TO_PHYS(va); 3224 else if (va >= MIPS_UNCACHED_MEMORY_ADDR && 3225 va < MIPS_KSEG2_START) 3226 pa = MIPS_UNCACHED_TO_PHYS(va); 3227#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 3228 else if (need_wired_tlb_page_pool && ((va >= VM_MIN_KERNEL_ADDRESS) && 3229 (va < (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET)))) 3230 pa = MIPS_CACHED_TO_PHYS(va); 3231#endif 3232 else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) { 3233 pt_entry_t *ptep; 3234 3235 /* Is the kernel pmap initialized? */ 3236 if (kernel_pmap->pm_active) { 3237 if (va >= (vm_offset_t)virtual_sys_start) { 3238 /* Its inside the virtual address range */ 3239 ptep = pmap_pte(kernel_pmap, va); 3240 if (ptep) 3241 pa = mips_tlbpfn_to_paddr(*ptep) | 3242 (va & PAGE_MASK); 3243 } else { 3244 int i; 3245 3246 /* 3247 * its inside the special mapping area, I 3248 * don't think this should happen, but if it 3249 * does I want it toa all work right :-) 3250 * Note if it does happen, we assume the 3251 * caller has the lock? FIXME, this needs to 3252 * be checked FIXEM - RRS. 3253 */ 3254 for (i = 0; i < MAXCPU; i++) { 3255 if ((sysmap_lmem[i].valid1) && ((vm_offset_t)sysmap_lmem[i].CADDR1 == va)) { 3256 pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP1); 3257 break; 3258 } 3259 if ((sysmap_lmem[i].valid2) && ((vm_offset_t)sysmap_lmem[i].CADDR2 == va)) { 3260 pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP2); 3261 break; 3262 } 3263 } 3264 } 3265 } 3266 } 3267 return pa; 3268} 3269