pmap.c revision 195840
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * In addition to hardware address maps, this 46 * module is called upon to provide software-use-only 47 * maps which may or may not be stored in the same 48 * form as hardware maps. These pseudo-maps are 49 * used to store intermediate results from copy 50 * operations to and from address spaces. 51 * 52 * Since the information managed by this module is 53 * also stored by the logical address mapping module, 54 * this module may throw away valid virtual-to-physical 55 * mappings at almost any time. However, invalidations 56 * of virtual-to-physical mappings must be done as 57 * requested. 58 * 59 * In order to cope with hardware architectures which 60 * make virtual-to-physical map invalidates expensive, 61 * this module may delay invalidate or reduced protection 62 * operations until such time as they are actually 63 * necessary. This module is given full information as 64 * to which processors are currently using which maps, 65 * and to when physical maps must be made correct. 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 195840 2009-07-24 13:50:29Z jhb $"); 70 71#include "opt_ddb.h" 72#include "opt_msgbuf.h" 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/proc.h> 76#include <sys/msgbuf.h> 77#include <sys/vmmeter.h> 78#include <sys/mman.h> 79 80#include <vm/vm.h> 81#include <vm/vm_param.h> 82#include <sys/lock.h> 83#include <sys/mutex.h> 84#include <vm/vm_kern.h> 85#include <vm/vm_page.h> 86#include <vm/vm_map.h> 87#include <vm/vm_object.h> 88#include <vm/vm_extern.h> 89#include <vm/vm_pageout.h> 90#include <vm/vm_pager.h> 91#include <vm/uma.h> 92#include <sys/pcpu.h> 93#include <sys/sched.h> 94#ifdef SMP 95#include <sys/smp.h> 96#endif 97 98#include <machine/cache.h> 99#include <machine/pltfm.h> 100#include <machine/md_var.h> 101 102#if defined(DIAGNOSTIC) 103#define PMAP_DIAGNOSTIC 104#endif 105 106#undef PMAP_DEBUG 107 108#ifndef PMAP_SHPGPERPROC 109#define PMAP_SHPGPERPROC 200 110#endif 111 112#if !defined(PMAP_DIAGNOSTIC) 113#define PMAP_INLINE __inline 114#else 115#define PMAP_INLINE 116#endif 117 118/* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121#define pmap_pde(m, v) (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT])) 122#define segtab_pde(m, v) (m[(vm_offset_t)(v) >> SEGSHIFT]) 123 124#define pmap_pte_w(pte) ((*(int *)pte & PTE_W) != 0) 125#define pmap_pde_v(pte) ((*(int *)pte) != 0) 126#define pmap_pte_m(pte) ((*(int *)pte & PTE_M) != 0) 127#define pmap_pte_v(pte) ((*(int *)pte & PTE_V) != 0) 128 129#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W)) 130#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 131 132#define MIPS_SEGSIZE (1L << SEGSHIFT) 133#define mips_segtrunc(va) ((va) & ~(MIPS_SEGSIZE-1)) 134#define pmap_TLB_invalidate_all() MIPS_TBIAP() 135#define pmap_va_asid(pmap, va) ((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT)) 136#define is_kernel_pmap(x) ((x) == kernel_pmap) 137 138struct pmap kernel_pmap_store; 139pd_entry_t *kernel_segmap; 140 141vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 142vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 143 144static int nkpt; 145unsigned pmap_max_asid; /* max ASID supported by the system */ 146 147 148#define PMAP_ASID_RESERVED 0 149 150 151vm_offset_t kernel_vm_end; 152 153static void pmap_asid_alloc(pmap_t pmap); 154 155/* 156 * Data for the pv entry allocation mechanism 157 */ 158static uma_zone_t pvzone; 159static struct vm_object pvzone_obj; 160static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 161 162struct fpage fpages_shared[FPAGES_SHARED]; 163 164struct sysmaps sysmaps_pcpu[MAXCPU]; 165 166static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 167static pv_entry_t get_pv_entry(pmap_t locked_pmap); 168static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 169 170static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 171 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 172static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); 173static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 174static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 175static boolean_t pmap_testbit(vm_page_t m, int bit); 176static void 177pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, 178 vm_page_t m, boolean_t wired); 179static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 180 vm_offset_t va, vm_page_t m); 181 182static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 183 184static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 185static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 186static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); 187static void pmap_TLB_invalidate_kernel(vm_offset_t); 188static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t); 189static void pmap_init_fpage(void); 190 191#ifdef SMP 192static void pmap_invalidate_page_action(void *arg); 193static void pmap_invalidate_all_action(void *arg); 194static void pmap_update_page_action(void *arg); 195 196#endif 197 198struct local_sysmaps { 199 struct mtx lock; 200 pt_entry_t CMAP1; 201 pt_entry_t CMAP2; 202 caddr_t CADDR1; 203 caddr_t CADDR2; 204 uint16_t valid1, valid2; 205}; 206 207/* This structure is for large memory 208 * above 512Meg. We can't (in 32 bit mode) 209 * just use the direct mapped MIPS_CACHED_TO_PHYS() 210 * macros since we can't see the memory and must 211 * map it in when we need to access it. In 64 212 * bit mode this goes away. 213 */ 214static struct local_sysmaps sysmap_lmem[MAXCPU]; 215caddr_t virtual_sys_start = (caddr_t)0; 216 217pd_entry_t 218pmap_segmap(pmap_t pmap, vm_offset_t va) 219{ 220 if (pmap->pm_segtab) 221 return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]); 222 else 223 return ((pd_entry_t)0); 224} 225 226/* 227 * Routine: pmap_pte 228 * Function: 229 * Extract the page table entry associated 230 * with the given map/virtual_address pair. 231 */ 232pt_entry_t * 233pmap_pte(pmap_t pmap, vm_offset_t va) 234{ 235 pt_entry_t *pdeaddr; 236 237 if (pmap) { 238 pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va); 239 if (pdeaddr) { 240 return pdeaddr + vad_to_pte_offset(va); 241 } 242 } 243 return ((pt_entry_t *)0); 244} 245 246 247vm_offset_t 248pmap_steal_memory(vm_size_t size) 249{ 250 vm_size_t bank_size; 251 vm_offset_t pa, va; 252 253 size = round_page(size); 254 255 bank_size = phys_avail[1] - phys_avail[0]; 256 while (size > bank_size) { 257 int i; 258 259 for (i = 0; phys_avail[i + 2]; i += 2) { 260 phys_avail[i] = phys_avail[i + 2]; 261 phys_avail[i + 1] = phys_avail[i + 3]; 262 } 263 phys_avail[i] = 0; 264 phys_avail[i + 1] = 0; 265 if (!phys_avail[0]) 266 panic("pmap_steal_memory: out of memory"); 267 bank_size = phys_avail[1] - phys_avail[0]; 268 } 269 270 pa = phys_avail[0]; 271 phys_avail[0] += size; 272 if (pa >= MIPS_KSEG0_LARGEST_PHYS) { 273 panic("Out of memory below 512Meg?"); 274 } 275 va = MIPS_PHYS_TO_CACHED(pa); 276 bzero((caddr_t)va, size); 277 return va; 278} 279 280/* 281 * Bootstrap the system enough to run with virtual memory. This 282 * assumes that the phys_avail array has been initialized. 283 */ 284void 285pmap_bootstrap(void) 286{ 287 pt_entry_t *pgtab; 288 pt_entry_t *pte; 289 int i, j; 290 int memory_larger_than_512meg = 0; 291 292 /* Sort. */ 293again: 294 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 295 if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS) { 296 memory_larger_than_512meg++; 297 } 298 if (i < 2) 299 continue; 300 if (phys_avail[i - 2] > phys_avail[i]) { 301 vm_paddr_t ptemp[2]; 302 303 304 ptemp[0] = phys_avail[i + 0]; 305 ptemp[1] = phys_avail[i + 1]; 306 307 phys_avail[i + 0] = phys_avail[i - 2]; 308 phys_avail[i + 1] = phys_avail[i - 1]; 309 310 phys_avail[i - 2] = ptemp[0]; 311 phys_avail[i - 1] = ptemp[1]; 312 goto again; 313 } 314 } 315 316 if (bootverbose) { 317 printf("Physical memory chunk(s):\n"); 318 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 319 vm_paddr_t size; 320 321 size = phys_avail[i + 1] - phys_avail[i]; 322 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 323 (uintmax_t) phys_avail[i], 324 (uintmax_t) phys_avail[i + 1] - 1, 325 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 326 } 327 } 328 /* 329 * Steal the message buffer from the beginning of memory. 330 */ 331 msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); 332 msgbufinit(msgbufp, MSGBUF_SIZE); 333 334 /* Steal memory for the dynamic per-cpu area. */ 335 dpcpu_init((void *)pmap_steal_memory(DPCPU_SIZE), 0); 336 337 /* 338 * Steal thread0 kstack. 339 */ 340 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 341 342 343 virtual_avail = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; 344 virtual_end = VM_MAX_KERNEL_ADDRESS; 345 346 /* 347 * Steal some virtual space that will not be in kernel_segmap. This 348 * va memory space will be used to map in kernel pages that are 349 * outside the 512Meg region. Note that we only do this steal when 350 * we do have memory in this region, that way for systems with 351 * smaller memory we don't "steal" any va ranges :-) 352 */ 353 if (memory_larger_than_512meg) { 354 for (i = 0; i < MAXCPU; i++) { 355 sysmap_lmem[i].CMAP1 = PTE_G; 356 sysmap_lmem[i].CMAP2 = PTE_G; 357 sysmap_lmem[i].CADDR1 = (caddr_t)virtual_avail; 358 virtual_avail += PAGE_SIZE; 359 sysmap_lmem[i].CADDR2 = (caddr_t)virtual_avail; 360 virtual_avail += PAGE_SIZE; 361 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 362 PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]); 363 } 364 } 365 virtual_sys_start = (caddr_t)virtual_avail; 366 /* 367 * Allocate segment table for the kernel 368 */ 369 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 370 371 /* 372 * Allocate second level page tables for the kernel 373 */ 374 nkpt = NKPT; 375 if (memory_larger_than_512meg) { 376 /* 377 * If we have a large memory system we CANNOT afford to hit 378 * pmap_growkernel() and allocate memory. Since we MAY end 379 * up with a page that is NOT mappable. For that reason we 380 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h) 381 * this gives us 480meg of kernel virtual addresses at the 382 * cost of 120 pages (each page gets us 4 Meg). Since the 383 * kernel starts at virtual_avail, we can use this to 384 * calculate how many entris are left from there to the end 385 * of the segmap, we want to allocate all of it, which would 386 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results 387 * in about 256 entries or so instead of the 120. 388 */ 389 nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT); 390 } 391 pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt); 392 393 /* 394 * The R[4-7]?00 stores only one copy of the Global bit in the 395 * translation lookaside buffer for each 2 page entry. Thus invalid 396 * entrys must have the Global bit set so when Entry LO and Entry HI 397 * G bits are anded together they will produce a global bit to store 398 * in the tlb. 399 */ 400 for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++) 401 *pte = PTE_G; 402 403 printf("Va=0x%x Ve=%x\n", virtual_avail, virtual_end); 404 /* 405 * The segment table contains the KVA of the pages in the second 406 * level page table. 407 */ 408 printf("init kernel_segmap va >> = %d nkpt:%d\n", 409 (virtual_avail >> SEGSHIFT), 410 nkpt); 411 for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++) 412 kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG)); 413 414 for (i = 0; phys_avail[i + 2]; i += 2) 415 continue; 416 printf("avail_start:0x%x avail_end:0x%x\n", 417 phys_avail[0], phys_avail[i + 1]); 418 419 /* 420 * The kernel's pmap is statically allocated so we don't have to use 421 * pmap_create, which is unlikely to work correctly at this part of 422 * the boot sequence (XXX and which no longer exists). 423 */ 424 PMAP_LOCK_INIT(kernel_pmap); 425 kernel_pmap->pm_segtab = kernel_segmap; 426 kernel_pmap->pm_active = ~0; 427 TAILQ_INIT(&kernel_pmap->pm_pvlist); 428 kernel_pmap->pm_asid[PCPU_GET(cpuid)].asid = PMAP_ASID_RESERVED; 429 kernel_pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 430 pmap_max_asid = VMNUM_PIDS; 431 MachSetPID(0); 432} 433 434/* 435 * Initialize a vm_page's machine-dependent fields. 436 */ 437void 438pmap_page_init(vm_page_t m) 439{ 440 441 TAILQ_INIT(&m->md.pv_list); 442 m->md.pv_list_count = 0; 443 m->md.pv_flags = 0; 444} 445 446/* 447 * Initialize the pmap module. 448 * Called by vm_init, to initialize any structures that the pmap 449 * system needs to map virtual memory. 450 * pmap_init has been enhanced to support in a fairly consistant 451 * way, discontiguous physical memory. 452 */ 453void 454pmap_init(void) 455{ 456 457 if (need_wired_tlb_page_pool) 458 pmap_init_fpage(); 459 /* 460 * Initialize the address space (zone) for the pv entries. Set a 461 * high water mark so that the system can recover from excessive 462 * numbers of pv entries. 463 */ 464 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 465 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 466 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; 467 pv_entry_high_water = 9 * (pv_entry_max / 10); 468 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 469} 470 471/*************************************************** 472 * Low level helper routines..... 473 ***************************************************/ 474 475#if defined(PMAP_DIAGNOSTIC) 476 477/* 478 * This code checks for non-writeable/modified pages. 479 * This should be an invalid condition. 480 */ 481static int 482pmap_nw_modified(pt_entry_t pte) 483{ 484 if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO)) 485 return (1); 486 else 487 return (0); 488} 489 490#endif 491 492static void 493pmap_invalidate_all(pmap_t pmap) 494{ 495#ifdef SMP 496 smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap); 497} 498 499static void 500pmap_invalidate_all_action(void *arg) 501{ 502 pmap_t pmap = (pmap_t)arg; 503 504#endif 505 506 if (pmap->pm_active & PCPU_GET(cpumask)) { 507 pmap_TLB_invalidate_all(); 508 } else 509 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 510} 511 512struct pmap_invalidate_page_arg { 513 pmap_t pmap; 514 vm_offset_t va; 515}; 516 517static __inline void 518pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 519{ 520#ifdef SMP 521 struct pmap_invalidate_page_arg arg; 522 523 arg.pmap = pmap; 524 arg.va = va; 525 526 smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); 527} 528 529static void 530pmap_invalidate_page_action(void *arg) 531{ 532 pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; 533 vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; 534 535#endif 536 537 if (is_kernel_pmap(pmap)) { 538 pmap_TLB_invalidate_kernel(va); 539 return; 540 } 541 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 542 return; 543 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 544 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 545 return; 546 } 547 va = pmap_va_asid(pmap, (va & ~PGOFSET)); 548 mips_TBIS(va); 549} 550 551static void 552pmap_TLB_invalidate_kernel(vm_offset_t va) 553{ 554 u_int32_t pid; 555 556 MachTLBGetPID(pid); 557 va = va | (pid << VMTLB_PID_SHIFT); 558 mips_TBIS(va); 559} 560 561struct pmap_update_page_arg { 562 pmap_t pmap; 563 vm_offset_t va; 564 pt_entry_t pte; 565}; 566 567void 568pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 569{ 570#ifdef SMP 571 struct pmap_update_page_arg arg; 572 573 arg.pmap = pmap; 574 arg.va = va; 575 arg.pte = pte; 576 577 smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg); 578} 579 580static void 581pmap_update_page_action(void *arg) 582{ 583 pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap; 584 vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va; 585 pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte; 586 587#endif 588 if (is_kernel_pmap(pmap)) { 589 pmap_TLB_update_kernel(va, pte); 590 return; 591 } 592 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 593 return; 594 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 595 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 596 return; 597 } 598 va = pmap_va_asid(pmap, va); 599 MachTLBUpdate(va, pte); 600} 601 602static void 603pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte) 604{ 605 u_int32_t pid; 606 607 MachTLBGetPID(pid); 608 va = va | (pid << VMTLB_PID_SHIFT); 609 610 MachTLBUpdate(va, pte); 611} 612 613/* 614 * Routine: pmap_extract 615 * Function: 616 * Extract the physical page address associated 617 * with the given map/virtual_address pair. 618 */ 619vm_paddr_t 620pmap_extract(pmap_t pmap, vm_offset_t va) 621{ 622 pt_entry_t *pte; 623 vm_offset_t retval = 0; 624 625 PMAP_LOCK(pmap); 626 pte = pmap_pte(pmap, va); 627 if (pte) { 628 retval = mips_tlbpfn_to_paddr(*pte) | (va & PAGE_MASK); 629 } 630 PMAP_UNLOCK(pmap); 631 return retval; 632} 633 634/* 635 * Routine: pmap_extract_and_hold 636 * Function: 637 * Atomically extract and hold the physical page 638 * with the given pmap and virtual address pair 639 * if that mapping permits the given protection. 640 */ 641vm_page_t 642pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 643{ 644 pt_entry_t pte; 645 vm_page_t m; 646 647 m = NULL; 648 vm_page_lock_queues(); 649 PMAP_LOCK(pmap); 650 651 pte = *pmap_pte(pmap, va); 652 if (pte != 0 && pmap_pte_v(&pte) && 653 ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) { 654 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte)); 655 vm_page_hold(m); 656 } 657 vm_page_unlock_queues(); 658 PMAP_UNLOCK(pmap); 659 return (m); 660} 661 662/*************************************************** 663 * Low level mapping routines..... 664 ***************************************************/ 665 666/* 667 * add a wired page to the kva 668 */ 669 /* PMAP_INLINE */ void 670pmap_kenter(vm_offset_t va, vm_paddr_t pa) 671{ 672 register pt_entry_t *pte; 673 pt_entry_t npte, opte; 674 675#ifdef PMAP_DEBUG 676 printf("pmap_kenter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 677#endif 678 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W; 679 680 if (is_cacheable_mem(pa)) 681 npte |= PTE_CACHE; 682 else 683 npte |= PTE_UNCACHED; 684 685 pte = pmap_pte(kernel_pmap, va); 686 opte = *pte; 687 *pte = npte; 688 689 pmap_update_page(kernel_pmap, va, npte); 690} 691 692/* 693 * remove a page from the kernel pagetables 694 */ 695 /* PMAP_INLINE */ void 696pmap_kremove(vm_offset_t va) 697{ 698 register pt_entry_t *pte; 699 700 pte = pmap_pte(kernel_pmap, va); 701 *pte = PTE_G; 702 pmap_invalidate_page(kernel_pmap, va); 703} 704 705/* 706 * Used to map a range of physical addresses into kernel 707 * virtual address space. 708 * 709 * The value passed in '*virt' is a suggested virtual address for 710 * the mapping. Architectures which can support a direct-mapped 711 * physical to virtual region can return the appropriate address 712 * within that region, leaving '*virt' unchanged. Other 713 * architectures should map the pages starting at '*virt' and 714 * update '*virt' with the first usable address after the mapped 715 * region. 716 */ 717vm_offset_t 718pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 719{ 720 vm_offset_t va, sva; 721 722 va = sva = *virt; 723 while (start < end) { 724 pmap_kenter(va, start); 725 va += PAGE_SIZE; 726 start += PAGE_SIZE; 727 } 728 *virt = va; 729 return (sva); 730} 731 732/* 733 * Add a list of wired pages to the kva 734 * this routine is only used for temporary 735 * kernel mappings that do not need to have 736 * page modification or references recorded. 737 * Note that old mappings are simply written 738 * over. The page *must* be wired. 739 */ 740void 741pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 742{ 743 int i; 744 745 for (i = 0; i < count; i++) { 746 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 747 va += PAGE_SIZE; 748 } 749} 750 751/* 752 * this routine jerks page mappings from the 753 * kernel -- it is meant only for temporary mappings. 754 */ 755void 756pmap_qremove(vm_offset_t va, int count) 757{ 758 while (count-- > 0) { 759 pmap_kremove(va); 760 va += PAGE_SIZE; 761 } 762} 763 764/*************************************************** 765 * Page table page management routines..... 766 ***************************************************/ 767 768/* 769 * floating pages (FPAGES) management routines 770 * 771 * FPAGES are the reserved virtual memory areas which can be 772 * mapped to any physical memory. This gets used typically 773 * in the following functions: 774 * 775 * pmap_zero_page 776 * pmap_copy_page 777 */ 778 779/* 780 * Create the floating pages, aka FPAGES! 781 */ 782static void 783pmap_init_fpage() 784{ 785 vm_offset_t kva; 786 int i, j; 787 struct sysmaps *sysmaps; 788 789 /* 790 * We allocate a total of (FPAGES*MAXCPU + FPAGES_SHARED + 1) pages 791 * at first. FPAGES & FPAGES_SHARED should be EVEN Then we'll adjust 792 * 'kva' to be even-page aligned so that the fpage area can be wired 793 * in the TLB with a single TLB entry. 794 */ 795 kva = kmem_alloc_nofault(kernel_map, 796 (FPAGES * MAXCPU + 1 + FPAGES_SHARED) * PAGE_SIZE); 797 if ((void *)kva == NULL) 798 panic("pmap_init_fpage: fpage allocation failed"); 799 800 /* 801 * Make up start at an even page number so we can wire down the 802 * fpage area in the tlb with a single tlb entry. 803 */ 804 if ((((vm_offset_t)kva) >> PGSHIFT) & 1) { 805 /* 806 * 'kva' is not even-page aligned. Adjust it and free the 807 * first page which is unused. 808 */ 809 kmem_free(kernel_map, (vm_offset_t)kva, NBPG); 810 kva = ((vm_offset_t)kva) + NBPG; 811 } else { 812 /* 813 * 'kva' is even page aligned. We don't need the last page, 814 * free it. 815 */ 816 kmem_free(kernel_map, ((vm_offset_t)kva) + FSPACE, NBPG); 817 } 818 819 for (i = 0; i < MAXCPU; i++) { 820 sysmaps = &sysmaps_pcpu[i]; 821 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 822 823 /* Assign FPAGES pages to the CPU */ 824 for (j = 0; j < FPAGES; j++) 825 sysmaps->fp[j].kva = kva + (j) * PAGE_SIZE; 826 kva = ((vm_offset_t)kva) + (FPAGES * PAGE_SIZE); 827 } 828 829 /* 830 * An additional 2 pages are needed, one for pmap_zero_page_idle() 831 * and one for coredump. These pages are shared by all cpu's 832 */ 833 fpages_shared[PMAP_FPAGE3].kva = kva; 834 fpages_shared[PMAP_FPAGE_KENTER_TEMP].kva = kva + PAGE_SIZE; 835} 836 837/* 838 * Map the page to the fpage virtual address as specified thru' fpage id 839 */ 840vm_offset_t 841pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, boolean_t check_unmaped) 842{ 843 vm_offset_t kva; 844 register pt_entry_t *pte; 845 pt_entry_t npte; 846 847 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 848 /* 849 * Check if the fpage is free 850 */ 851 if (fp->state) { 852 if (check_unmaped == TRUE) 853 pmap_unmap_fpage(pa, fp); 854 else 855 panic("pmap_map_fpage: fpage is busy"); 856 } 857 fp->state = TRUE; 858 kva = fp->kva; 859 860 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 861 pte = pmap_pte(kernel_pmap, kva); 862 *pte = npte; 863 864 pmap_TLB_update_kernel(kva, npte); 865 866 return (kva); 867} 868 869/* 870 * Unmap the page from the fpage virtual address as specified thru' fpage id 871 */ 872void 873pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp) 874{ 875 vm_offset_t kva; 876 register pt_entry_t *pte; 877 878 KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 879 /* 880 * Check if the fpage is busy 881 */ 882 if (!(fp->state)) { 883 panic("pmap_unmap_fpage: fpage is free"); 884 } 885 kva = fp->kva; 886 887 pte = pmap_pte(kernel_pmap, kva); 888 *pte = PTE_G; 889 pmap_TLB_invalidate_kernel(kva); 890 891 fp->state = FALSE; 892 893 /* 894 * Should there be any flush operation at the end? 895 */ 896} 897 898/* Revision 1.507 899 * 900 * Simplify the reference counting of page table pages. Specifically, use 901 * the page table page's wired count rather than its hold count to contain 902 * the reference count. 903 */ 904 905/* 906 * This routine unholds page table pages, and if the hold count 907 * drops to zero, then it decrements the wire count. 908 */ 909static int 910_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 911{ 912 913 /* 914 * unmap the page table page 915 */ 916 pmap->pm_segtab[m->pindex] = 0; 917 --pmap->pm_stats.resident_count; 918 919 if (pmap->pm_ptphint == m) 920 pmap->pm_ptphint = NULL; 921 922 /* 923 * If the page is finally unwired, simply free it. 924 */ 925 vm_page_free_zero(m); 926 atomic_subtract_int(&cnt.v_wire_count, 1); 927 return (1); 928} 929 930static PMAP_INLINE int 931pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 932{ 933 --m->wire_count; 934 if (m->wire_count == 0) 935 return (_pmap_unwire_pte_hold(pmap, m)); 936 else 937 return (0); 938} 939 940/* 941 * After removing a page table entry, this routine is used to 942 * conditionally free the page, and manage the hold/wire counts. 943 */ 944static int 945pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 946{ 947 unsigned ptepindex; 948 pd_entry_t pteva; 949 950 if (va >= VM_MAXUSER_ADDRESS) 951 return (0); 952 953 if (mpte == NULL) { 954 ptepindex = (va >> SEGSHIFT); 955 if (pmap->pm_ptphint && 956 (pmap->pm_ptphint->pindex == ptepindex)) { 957 mpte = pmap->pm_ptphint; 958 } else { 959 pteva = *pmap_pde(pmap, va); 960 mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 961 pmap->pm_ptphint = mpte; 962 } 963 } 964 return pmap_unwire_pte_hold(pmap, mpte); 965} 966 967void 968pmap_pinit0(pmap_t pmap) 969{ 970 int i; 971 972 PMAP_LOCK_INIT(pmap); 973 pmap->pm_segtab = kernel_segmap; 974 pmap->pm_active = 0; 975 pmap->pm_ptphint = NULL; 976 for (i = 0; i < MAXCPU; i++) { 977 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 978 pmap->pm_asid[i].gen = 0; 979 } 980 PCPU_SET(curpmap, pmap); 981 TAILQ_INIT(&pmap->pm_pvlist); 982 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 983} 984 985/* 986 * Initialize a preallocated and zeroed pmap structure, 987 * such as one in a vmspace structure. 988 */ 989int 990pmap_pinit(pmap_t pmap) 991{ 992 vm_page_t ptdpg; 993 int i; 994 int req; 995 996 PMAP_LOCK_INIT(pmap); 997 998 req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | 999 VM_ALLOC_ZERO; 1000 1001#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 1002 if (need_wired_tlb_page_pool) 1003 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1004#endif 1005 /* 1006 * allocate the page directory page 1007 */ 1008 while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL) 1009 VM_WAIT; 1010 1011 ptdpg->valid = VM_PAGE_BITS_ALL; 1012 1013 pmap->pm_segtab = (pd_entry_t *) 1014 MIPS_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(ptdpg)); 1015 if ((ptdpg->flags & PG_ZERO) == 0) 1016 bzero(pmap->pm_segtab, PAGE_SIZE); 1017 1018 pmap->pm_active = 0; 1019 pmap->pm_ptphint = NULL; 1020 for (i = 0; i < MAXCPU; i++) { 1021 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1022 pmap->pm_asid[i].gen = 0; 1023 } 1024 TAILQ_INIT(&pmap->pm_pvlist); 1025 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1026 1027 return (1); 1028} 1029 1030/* 1031 * this routine is called if the page table page is not 1032 * mapped correctly. 1033 */ 1034static vm_page_t 1035_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1036{ 1037 vm_offset_t pteva, ptepa; 1038 vm_page_t m; 1039 int req; 1040 1041 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1042 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1043 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1044 1045 req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ; 1046#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 1047 if (need_wired_tlb_page_pool) 1048 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1049#endif 1050 /* 1051 * Find or fabricate a new pagetable page 1052 */ 1053 if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) { 1054 if (flags & M_WAITOK) { 1055 PMAP_UNLOCK(pmap); 1056 vm_page_unlock_queues(); 1057 VM_WAIT; 1058 vm_page_lock_queues(); 1059 PMAP_LOCK(pmap); 1060 } 1061 /* 1062 * Indicate the need to retry. While waiting, the page 1063 * table page may have been allocated. 1064 */ 1065 return (NULL); 1066 } 1067 if ((m->flags & PG_ZERO) == 0) 1068 pmap_zero_page(m); 1069 1070 KASSERT(m->queue == PQ_NONE, 1071 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1072 1073 /* 1074 * Map the pagetable page into the process address space, if it 1075 * isn't already there. 1076 */ 1077 1078 pmap->pm_stats.resident_count++; 1079 1080 ptepa = VM_PAGE_TO_PHYS(m); 1081 pteva = MIPS_PHYS_TO_CACHED(ptepa); 1082 pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva; 1083 1084 /* 1085 * Set the page table hint 1086 */ 1087 pmap->pm_ptphint = m; 1088 1089 /* 1090 * Kernel page tables are allocated in pmap_bootstrap() or 1091 * pmap_growkernel(). 1092 */ 1093 if (is_kernel_pmap(pmap)) 1094 panic("_pmap_allocpte() called for kernel pmap\n"); 1095 1096 m->valid = VM_PAGE_BITS_ALL; 1097 vm_page_flag_clear(m, PG_ZERO); 1098 1099 return (m); 1100} 1101 1102static vm_page_t 1103pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1104{ 1105 unsigned ptepindex; 1106 vm_offset_t pteva; 1107 vm_page_t m; 1108 1109 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1110 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1111 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1112 1113 /* 1114 * Calculate pagetable page index 1115 */ 1116 ptepindex = va >> SEGSHIFT; 1117retry: 1118 /* 1119 * Get the page directory entry 1120 */ 1121 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1122 1123 /* 1124 * If the page table page is mapped, we just increment the hold 1125 * count, and activate it. 1126 */ 1127 if (pteva) { 1128 /* 1129 * In order to get the page table page, try the hint first. 1130 */ 1131 if (pmap->pm_ptphint && 1132 (pmap->pm_ptphint->pindex == ptepindex)) { 1133 m = pmap->pm_ptphint; 1134 } else { 1135 m = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 1136 pmap->pm_ptphint = m; 1137 } 1138 m->wire_count++; 1139 } else { 1140 /* 1141 * Here if the pte page isn't mapped, or if it has been 1142 * deallocated. 1143 */ 1144 m = _pmap_allocpte(pmap, ptepindex, flags); 1145 if (m == NULL && (flags & M_WAITOK)) 1146 goto retry; 1147 } 1148 return m; 1149} 1150 1151 1152/*************************************************** 1153* Pmap allocation/deallocation routines. 1154 ***************************************************/ 1155/* 1156 * Revision 1.397 1157 * - Merged pmap_release and pmap_release_free_page. When pmap_release is 1158 * called only the page directory page(s) can be left in the pmap pte 1159 * object, since all page table pages will have been freed by 1160 * pmap_remove_pages and pmap_remove. In addition, there can only be one 1161 * reference to the pmap and the page directory is wired, so the page(s) 1162 * can never be busy. So all there is to do is clear the magic mappings 1163 * from the page directory and free the page(s). 1164 */ 1165 1166 1167/* 1168 * Release any resources held by the given physical map. 1169 * Called when a pmap initialized by pmap_pinit is being released. 1170 * Should only be called if the map contains no valid mappings. 1171 */ 1172void 1173pmap_release(pmap_t pmap) 1174{ 1175 vm_page_t ptdpg; 1176 1177 KASSERT(pmap->pm_stats.resident_count == 0, 1178 ("pmap_release: pmap resident count %ld != 0", 1179 pmap->pm_stats.resident_count)); 1180 1181 ptdpg = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pmap->pm_segtab)); 1182 ptdpg->wire_count--; 1183 atomic_subtract_int(&cnt.v_wire_count, 1); 1184 vm_page_free_zero(ptdpg); 1185} 1186 1187/* 1188 * grow the number of kernel page table entries, if needed 1189 */ 1190void 1191pmap_growkernel(vm_offset_t addr) 1192{ 1193 vm_offset_t ptppaddr; 1194 vm_page_t nkpg; 1195 pt_entry_t *pte; 1196 int i, req; 1197 1198 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1199 if (kernel_vm_end == 0) { 1200 kernel_vm_end = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; 1201 nkpt = 0; 1202 while (segtab_pde(kernel_segmap, kernel_vm_end)) { 1203 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1204 ~(PAGE_SIZE * NPTEPG - 1); 1205 nkpt++; 1206 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1207 kernel_vm_end = kernel_map->max_offset; 1208 break; 1209 } 1210 } 1211 } 1212 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1213 if (addr - 1 >= kernel_map->max_offset) 1214 addr = kernel_map->max_offset; 1215 while (kernel_vm_end < addr) { 1216 if (segtab_pde(kernel_segmap, kernel_vm_end)) { 1217 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1218 ~(PAGE_SIZE * NPTEPG - 1); 1219 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1220 kernel_vm_end = kernel_map->max_offset; 1221 break; 1222 } 1223 continue; 1224 } 1225 /* 1226 * This index is bogus, but out of the way 1227 */ 1228 req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 1229#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 1230 if (need_wired_tlb_page_pool) 1231 req |= VM_ALLOC_WIRED_TLB_PG_POOL; 1232#endif 1233 nkpg = vm_page_alloc(NULL, nkpt, req); 1234 if (!nkpg) 1235 panic("pmap_growkernel: no memory to grow kernel"); 1236 1237 nkpt++; 1238 1239 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1240 if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) { 1241 /* 1242 * We need to do something here, but I am not sure 1243 * what. We can access anything in the 0 - 512Meg 1244 * region, but if we get a page to go in the kernel 1245 * segmap that is outside of of that we really need 1246 * to have another mapping beyond the temporary ones 1247 * I have. Not sure how to do this yet. FIXME FIXME. 1248 */ 1249 panic("Gak, can't handle a k-page table outside of lower 512Meg"); 1250 } 1251 pte = (pt_entry_t *)MIPS_PHYS_TO_CACHED(ptppaddr); 1252 segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte; 1253 1254 /* 1255 * The R[4-7]?00 stores only one copy of the Global bit in 1256 * the translation lookaside buffer for each 2 page entry. 1257 * Thus invalid entrys must have the Global bit set so when 1258 * Entry LO and Entry HI G bits are anded together they will 1259 * produce a global bit to store in the tlb. 1260 */ 1261 for (i = 0; i < NPTEPG; i++, pte++) 1262 *pte = PTE_G; 1263 1264 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1265 ~(PAGE_SIZE * NPTEPG - 1); 1266 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1267 kernel_vm_end = kernel_map->max_offset; 1268 break; 1269 } 1270 } 1271} 1272 1273/*************************************************** 1274* page management routines. 1275 ***************************************************/ 1276 1277/* 1278 * free the pv_entry back to the free list 1279 */ 1280static PMAP_INLINE void 1281free_pv_entry(pv_entry_t pv) 1282{ 1283 1284 pv_entry_count--; 1285 uma_zfree(pvzone, pv); 1286} 1287 1288/* 1289 * get a new pv_entry, allocating a block from the system 1290 * when needed. 1291 * the memory allocation is performed bypassing the malloc code 1292 * because of the possibility of allocations at interrupt time. 1293 */ 1294static pv_entry_t 1295get_pv_entry(pmap_t locked_pmap) 1296{ 1297 static const struct timeval printinterval = { 60, 0 }; 1298 static struct timeval lastprint; 1299 struct vpgqueues *vpq; 1300 pt_entry_t *pte, oldpte; 1301 pmap_t pmap; 1302 pv_entry_t allocated_pv, next_pv, pv; 1303 vm_offset_t va; 1304 vm_page_t m; 1305 1306 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1307 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1308 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 1309 if (allocated_pv != NULL) { 1310 pv_entry_count++; 1311 if (pv_entry_count > pv_entry_high_water) 1312 pagedaemon_wakeup(); 1313 else 1314 return (allocated_pv); 1315 } 1316 /* 1317 * Reclaim pv entries: At first, destroy mappings to inactive 1318 * pages. After that, if a pv entry is still needed, destroy 1319 * mappings to active pages. 1320 */ 1321 if (ratecheck(&lastprint, &printinterval)) 1322 printf("Approaching the limit on PV entries, " 1323 "increase the vm.pmap.shpgperproc tunable.\n"); 1324 vpq = &vm_page_queues[PQ_INACTIVE]; 1325retry: 1326 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1327 if (m->hold_count || m->busy) 1328 continue; 1329 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1330 va = pv->pv_va; 1331 pmap = pv->pv_pmap; 1332 /* Avoid deadlock and lock recursion. */ 1333 if (pmap > locked_pmap) 1334 PMAP_LOCK(pmap); 1335 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1336 continue; 1337 pmap->pm_stats.resident_count--; 1338 pte = pmap_pte(pmap, va); 1339 KASSERT(pte != NULL, ("pte")); 1340 oldpte = loadandclear((u_int *)pte); 1341 if (is_kernel_pmap(pmap)) 1342 *pte = PTE_G; 1343 KASSERT((oldpte & PTE_W) == 0, 1344 ("wired pte for unwired page")); 1345 if (m->md.pv_flags & PV_TABLE_REF) 1346 vm_page_flag_set(m, PG_REFERENCED); 1347 if (oldpte & PTE_M) 1348 vm_page_dirty(m); 1349 pmap_invalidate_page(pmap, va); 1350 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1351 m->md.pv_list_count--; 1352 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1353 if (TAILQ_EMPTY(&m->md.pv_list)) { 1354 vm_page_flag_clear(m, PG_WRITEABLE); 1355 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1356 } 1357 pmap_unuse_pt(pmap, va, pv->pv_ptem); 1358 if (pmap != locked_pmap) 1359 PMAP_UNLOCK(pmap); 1360 if (allocated_pv == NULL) 1361 allocated_pv = pv; 1362 else 1363 free_pv_entry(pv); 1364 } 1365 } 1366 if (allocated_pv == NULL) { 1367 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 1368 vpq = &vm_page_queues[PQ_ACTIVE]; 1369 goto retry; 1370 } 1371 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 1372 } 1373 return (allocated_pv); 1374} 1375 1376/* 1377 * Revision 1.370 1378 * 1379 * Move pmap_collect() out of the machine-dependent code, rename it 1380 * to reflect its new location, and add page queue and flag locking. 1381 * 1382 * Notes: (1) alpha, i386, and ia64 had identical implementations 1383 * of pmap_collect() in terms of machine-independent interfaces; 1384 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. 1385 * 1386 * MIPS implementation was identical to alpha [Junos 8.2] 1387 */ 1388 1389/* 1390 * If it is the first entry on the list, it is actually 1391 * in the header and we must copy the following entry up 1392 * to the header. Otherwise we must search the list for 1393 * the entry. In either case we free the now unused entry. 1394 */ 1395 1396static void 1397pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1398{ 1399 pv_entry_t pv; 1400 1401 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1402 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1403 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1404 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1405 if (pmap == pv->pv_pmap && va == pv->pv_va) 1406 break; 1407 } 1408 } else { 1409 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1410 if (va == pv->pv_va) 1411 break; 1412 } 1413 } 1414 1415 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 1416 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1417 m->md.pv_list_count--; 1418 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1419 vm_page_flag_clear(m, PG_WRITEABLE); 1420 1421 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1422 free_pv_entry(pv); 1423} 1424 1425/* 1426 * Create a pv entry for page at pa for 1427 * (pmap, va). 1428 */ 1429static void 1430pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1431 boolean_t wired) 1432{ 1433 pv_entry_t pv; 1434 1435 pv = get_pv_entry(pmap); 1436 pv->pv_va = va; 1437 pv->pv_pmap = pmap; 1438 pv->pv_ptem = mpte; 1439 pv->pv_wired = wired; 1440 1441 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1442 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1443 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1444 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1445 m->md.pv_list_count++; 1446} 1447 1448/* 1449 * Conditionally create a pv entry. 1450 */ 1451static boolean_t 1452pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1453 vm_page_t m) 1454{ 1455 pv_entry_t pv; 1456 1457 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1458 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1459 if (pv_entry_count < pv_entry_high_water && 1460 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 1461 pv_entry_count++; 1462 pv->pv_va = va; 1463 pv->pv_pmap = pmap; 1464 pv->pv_ptem = mpte; 1465 pv->pv_wired = FALSE; 1466 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1467 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1468 m->md.pv_list_count++; 1469 return (TRUE); 1470 } else 1471 return (FALSE); 1472} 1473 1474/* 1475 * pmap_remove_pte: do the things to unmap a page in a process 1476 */ 1477static int 1478pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1479{ 1480 pt_entry_t oldpte; 1481 vm_page_t m; 1482 vm_offset_t pa; 1483 1484 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1485 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1486 1487 oldpte = loadandclear((u_int *)ptq); 1488 if (is_kernel_pmap(pmap)) 1489 *ptq = PTE_G; 1490 1491 if (oldpte & PTE_W) 1492 pmap->pm_stats.wired_count -= 1; 1493 1494 pmap->pm_stats.resident_count -= 1; 1495 pa = mips_tlbpfn_to_paddr(oldpte); 1496 1497 if (page_is_managed(pa)) { 1498 m = PHYS_TO_VM_PAGE(pa); 1499 if (oldpte & PTE_M) { 1500#if defined(PMAP_DIAGNOSTIC) 1501 if (pmap_nw_modified(oldpte)) { 1502 printf( 1503 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1504 va, oldpte); 1505 } 1506#endif 1507 vm_page_dirty(m); 1508 } 1509 if (m->md.pv_flags & PV_TABLE_REF) 1510 vm_page_flag_set(m, PG_REFERENCED); 1511 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1512 1513 pmap_remove_entry(pmap, m, va); 1514 } 1515 return pmap_unuse_pt(pmap, va, NULL); 1516} 1517 1518/* 1519 * Remove a single page from a process address space 1520 */ 1521static void 1522pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1523{ 1524 register pt_entry_t *ptq; 1525 1526 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1527 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1528 ptq = pmap_pte(pmap, va); 1529 1530 /* 1531 * if there is no pte for this address, just skip it!!! 1532 */ 1533 if (!ptq || !pmap_pte_v(ptq)) { 1534 return; 1535 } 1536 /* 1537 * get a local va for mappings for this pmap. 1538 */ 1539 (void)pmap_remove_pte(pmap, ptq, va); 1540 pmap_invalidate_page(pmap, va); 1541 1542 return; 1543} 1544 1545/* 1546 * Remove the given range of addresses from the specified map. 1547 * 1548 * It is assumed that the start and end are properly 1549 * rounded to the page size. 1550 */ 1551void 1552pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1553{ 1554 vm_offset_t va, nva; 1555 1556 if (pmap == NULL) 1557 return; 1558 1559 if (pmap->pm_stats.resident_count == 0) 1560 return; 1561 1562 vm_page_lock_queues(); 1563 PMAP_LOCK(pmap); 1564 1565 /* 1566 * special handling of removing one page. a very common operation 1567 * and easy to short circuit some code. 1568 */ 1569 if ((sva + PAGE_SIZE) == eva) { 1570 pmap_remove_page(pmap, sva); 1571 goto out; 1572 } 1573 for (va = sva; va < eva; va = nva) { 1574 if (!*pmap_pde(pmap, va)) { 1575 nva = mips_segtrunc(va + MIPS_SEGSIZE); 1576 continue; 1577 } 1578 pmap_remove_page(pmap, va); 1579 nva = va + PAGE_SIZE; 1580 } 1581 1582out: 1583 vm_page_unlock_queues(); 1584 PMAP_UNLOCK(pmap); 1585} 1586 1587/* 1588 * Routine: pmap_remove_all 1589 * Function: 1590 * Removes this physical page from 1591 * all physical maps in which it resides. 1592 * Reflects back modify bits to the pager. 1593 * 1594 * Notes: 1595 * Original versions of this routine were very 1596 * inefficient because they iteratively called 1597 * pmap_remove (slow...) 1598 */ 1599 1600void 1601pmap_remove_all(vm_page_t m) 1602{ 1603 register pv_entry_t pv; 1604 register pt_entry_t *pte, tpte; 1605 1606 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1607 ("pmap_remove_all: page %p is fictitious", m)); 1608 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1609 1610 if (m->md.pv_flags & PV_TABLE_REF) 1611 vm_page_flag_set(m, PG_REFERENCED); 1612 1613 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1614 PMAP_LOCK(pv->pv_pmap); 1615 pv->pv_pmap->pm_stats.resident_count--; 1616 1617 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1618 1619 tpte = loadandclear((u_int *)pte); 1620 if (is_kernel_pmap(pv->pv_pmap)) 1621 *pte = PTE_G; 1622 1623 if (tpte & PTE_W) 1624 pv->pv_pmap->pm_stats.wired_count--; 1625 1626 /* 1627 * Update the vm_page_t clean and reference bits. 1628 */ 1629 if (tpte & PTE_M) { 1630#if defined(PMAP_DIAGNOSTIC) 1631 if (pmap_nw_modified(tpte)) { 1632 printf( 1633 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1634 pv->pv_va, tpte); 1635 } 1636#endif 1637 vm_page_dirty(m); 1638 } 1639 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1640 1641 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1642 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1643 m->md.pv_list_count--; 1644 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1645 PMAP_UNLOCK(pv->pv_pmap); 1646 free_pv_entry(pv); 1647 } 1648 1649 vm_page_flag_clear(m, PG_WRITEABLE); 1650 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1651} 1652 1653/* 1654 * Set the physical protection on the 1655 * specified range of this map as requested. 1656 */ 1657void 1658pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1659{ 1660 pt_entry_t *pte; 1661 1662 if (pmap == NULL) 1663 return; 1664 1665 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1666 pmap_remove(pmap, sva, eva); 1667 return; 1668 } 1669 if (prot & VM_PROT_WRITE) 1670 return; 1671 1672 vm_page_lock_queues(); 1673 PMAP_LOCK(pmap); 1674 while (sva < eva) { 1675 pt_entry_t pbits, obits; 1676 vm_page_t m; 1677 vm_offset_t pa; 1678 1679 /* 1680 * If segment table entry is empty, skip this segment. 1681 */ 1682 if (!*pmap_pde(pmap, sva)) { 1683 sva = mips_segtrunc(sva + MIPS_SEGSIZE); 1684 continue; 1685 } 1686 /* 1687 * If pte is invalid, skip this page 1688 */ 1689 pte = pmap_pte(pmap, sva); 1690 if (!pmap_pte_v(pte)) { 1691 sva += PAGE_SIZE; 1692 continue; 1693 } 1694retry: 1695 obits = pbits = *pte; 1696 pa = mips_tlbpfn_to_paddr(pbits); 1697 1698 if (page_is_managed(pa)) { 1699 m = PHYS_TO_VM_PAGE(pa); 1700 if (m->md.pv_flags & PV_TABLE_REF) { 1701 vm_page_flag_set(m, PG_REFERENCED); 1702 m->md.pv_flags &= ~PV_TABLE_REF; 1703 } 1704 if (pbits & PTE_M) { 1705 vm_page_dirty(m); 1706 m->md.pv_flags &= ~PV_TABLE_MOD; 1707 } 1708 } 1709 pbits = (pbits & ~PTE_M) | PTE_RO; 1710 1711 if (pbits != *pte) { 1712 if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) 1713 goto retry; 1714 pmap_update_page(pmap, sva, pbits); 1715 } 1716 sva += PAGE_SIZE; 1717 } 1718 vm_page_unlock_queues(); 1719 PMAP_UNLOCK(pmap); 1720} 1721 1722/* 1723 * Insert the given physical page (p) at 1724 * the specified virtual address (v) in the 1725 * target physical map with the protection requested. 1726 * 1727 * If specified, the page will be wired down, meaning 1728 * that the related pte can not be reclaimed. 1729 * 1730 * NB: This is the only routine which MAY NOT lazy-evaluate 1731 * or lose information. That is, this routine must actually 1732 * insert this page into the given map NOW. 1733 */ 1734void 1735pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1736 vm_prot_t prot, boolean_t wired) 1737{ 1738 vm_offset_t pa, opa; 1739 register pt_entry_t *pte; 1740 pt_entry_t origpte, newpte; 1741 vm_page_t mpte, om; 1742 int rw = 0; 1743 1744 if (pmap == NULL) 1745 return; 1746 1747 va &= ~PAGE_MASK; 1748#ifdef PMAP_DIAGNOSTIC 1749 if (va > VM_MAX_KERNEL_ADDRESS) 1750 panic("pmap_enter: toobig"); 1751#endif 1752 1753 mpte = NULL; 1754 1755 vm_page_lock_queues(); 1756 PMAP_LOCK(pmap); 1757 1758 /* 1759 * In the case that a page table page is not resident, we are 1760 * creating it here. 1761 */ 1762 if (va < VM_MAXUSER_ADDRESS) { 1763 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1764 } 1765 pte = pmap_pte(pmap, va); 1766 1767 /* 1768 * Page Directory table entry not valid, we need a new PT page 1769 */ 1770 if (pte == NULL) { 1771 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n", 1772 (void *)pmap->pm_segtab, va); 1773 } 1774 pa = VM_PAGE_TO_PHYS(m); 1775 om = NULL; 1776 origpte = *pte; 1777 opa = mips_tlbpfn_to_paddr(origpte); 1778 1779 /* 1780 * Mapping has not changed, must be protection or wiring change. 1781 */ 1782 if ((origpte & PTE_V) && (opa == pa)) { 1783 /* 1784 * Wiring change, just update stats. We don't worry about 1785 * wiring PT pages as they remain resident as long as there 1786 * are valid mappings in them. Hence, if a user page is 1787 * wired, the PT page will be also. 1788 */ 1789 if (wired && ((origpte & PTE_W) == 0)) 1790 pmap->pm_stats.wired_count++; 1791 else if (!wired && (origpte & PTE_W)) 1792 pmap->pm_stats.wired_count--; 1793 1794#if defined(PMAP_DIAGNOSTIC) 1795 if (pmap_nw_modified(origpte)) { 1796 printf( 1797 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1798 va, origpte); 1799 } 1800#endif 1801 1802 /* 1803 * Remove extra pte reference 1804 */ 1805 if (mpte) 1806 mpte->wire_count--; 1807 1808 /* 1809 * We might be turning off write access to the page, so we 1810 * go ahead and sense modify status. 1811 */ 1812 if (page_is_managed(opa)) { 1813 om = m; 1814 } 1815 goto validate; 1816 } 1817 /* 1818 * Mapping has changed, invalidate old range and fall through to 1819 * handle validating new mapping. 1820 */ 1821 if (opa) { 1822 if (origpte & PTE_W) 1823 pmap->pm_stats.wired_count--; 1824 1825 if (page_is_managed(opa)) { 1826 om = PHYS_TO_VM_PAGE(opa); 1827 pmap_remove_entry(pmap, om, va); 1828 } 1829 if (mpte != NULL) { 1830 mpte->wire_count--; 1831 KASSERT(mpte->wire_count > 0, 1832 ("pmap_enter: missing reference to page table page," 1833 " va: 0x%x", va)); 1834 } 1835 } else 1836 pmap->pm_stats.resident_count++; 1837 1838 /* 1839 * Enter on the PV list if part of our managed memory. Note that we 1840 * raise IPL while manipulating pv_table since pmap_enter can be 1841 * called at interrupt time. 1842 */ 1843 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1844 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1845 ("pmap_enter: managed mapping within the clean submap")); 1846 pmap_insert_entry(pmap, va, mpte, m, wired); 1847 } 1848 /* 1849 * Increment counters 1850 */ 1851 if (wired) 1852 pmap->pm_stats.wired_count++; 1853 1854validate: 1855 if ((access & VM_PROT_WRITE) != 0) 1856 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; 1857 rw = init_pte_prot(va, m, prot); 1858 1859#ifdef PMAP_DEBUG 1860 printf("pmap_enter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 1861#endif 1862 /* 1863 * Now validate mapping with desired protection/wiring. 1864 */ 1865 newpte = mips_paddr_to_tlbpfn(pa) | rw | PTE_V; 1866 1867 if (is_cacheable_mem(pa)) 1868 newpte |= PTE_CACHE; 1869 else 1870 newpte |= PTE_UNCACHED; 1871 1872 if (wired) 1873 newpte |= PTE_W; 1874 1875 if (is_kernel_pmap(pmap)) { 1876 newpte |= PTE_G; 1877 } 1878 1879 /* 1880 * if the mapping or permission bits are different, we need to 1881 * update the pte. 1882 */ 1883 if (origpte != newpte) { 1884 if (origpte & PTE_V) { 1885 *pte = newpte; 1886 if (page_is_managed(opa) && (opa != pa)) { 1887 if (om->md.pv_flags & PV_TABLE_REF) 1888 vm_page_flag_set(om, PG_REFERENCED); 1889 om->md.pv_flags &= 1890 ~(PV_TABLE_REF | PV_TABLE_MOD); 1891 } 1892 if (origpte & PTE_M) { 1893 KASSERT((origpte & PTE_RW), 1894 ("pmap_enter: modified page not writable:" 1895 " va: 0x%x, pte: 0x%lx", va, origpte)); 1896 if (page_is_managed(opa)) 1897 vm_page_dirty(om); 1898 } 1899 } else { 1900 *pte = newpte; 1901 } 1902 } 1903 pmap_update_page(pmap, va, newpte); 1904 1905 /* 1906 * Sync I & D caches for executable pages. Do this only if the the 1907 * target pmap belongs to the current process. Otherwise, an 1908 * unresolvable TLB miss may occur. 1909 */ 1910 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 1911 (prot & VM_PROT_EXECUTE)) { 1912 mips_icache_sync_range(va, NBPG); 1913 mips_dcache_wbinv_range(va, NBPG); 1914 } 1915 vm_page_unlock_queues(); 1916 PMAP_UNLOCK(pmap); 1917} 1918 1919/* 1920 * this code makes some *MAJOR* assumptions: 1921 * 1. Current pmap & pmap exists. 1922 * 2. Not wired. 1923 * 3. Read access. 1924 * 4. No page table pages. 1925 * but is *MUCH* faster than pmap_enter... 1926 */ 1927 1928void 1929pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1930{ 1931 1932 PMAP_LOCK(pmap); 1933 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 1934 PMAP_UNLOCK(pmap); 1935} 1936 1937static vm_page_t 1938pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1939 vm_prot_t prot, vm_page_t mpte) 1940{ 1941 pt_entry_t *pte; 1942 vm_offset_t pa; 1943 1944 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1945 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 1946 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1947 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1948 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1949 1950 /* 1951 * In the case that a page table page is not resident, we are 1952 * creating it here. 1953 */ 1954 if (va < VM_MAXUSER_ADDRESS) { 1955 unsigned ptepindex; 1956 vm_offset_t pteva; 1957 1958 /* 1959 * Calculate pagetable page index 1960 */ 1961 ptepindex = va >> SEGSHIFT; 1962 if (mpte && (mpte->pindex == ptepindex)) { 1963 mpte->wire_count++; 1964 } else { 1965 /* 1966 * Get the page directory entry 1967 */ 1968 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1969 1970 /* 1971 * If the page table page is mapped, we just 1972 * increment the hold count, and activate it. 1973 */ 1974 if (pteva) { 1975 if (pmap->pm_ptphint && 1976 (pmap->pm_ptphint->pindex == ptepindex)) { 1977 mpte = pmap->pm_ptphint; 1978 } else { 1979 mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva)); 1980 pmap->pm_ptphint = mpte; 1981 } 1982 mpte->wire_count++; 1983 } else { 1984 mpte = _pmap_allocpte(pmap, ptepindex, 1985 M_NOWAIT); 1986 if (mpte == NULL) 1987 return (mpte); 1988 } 1989 } 1990 } else { 1991 mpte = NULL; 1992 } 1993 1994 pte = pmap_pte(pmap, va); 1995 if (pmap_pte_v(pte)) { 1996 if (mpte != NULL) { 1997 mpte->wire_count--; 1998 mpte = NULL; 1999 } 2000 return (mpte); 2001 } 2002 2003 /* 2004 * Enter on the PV list if part of our managed memory. 2005 */ 2006 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2007 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2008 if (mpte != NULL) { 2009 pmap_unwire_pte_hold(pmap, mpte); 2010 mpte = NULL; 2011 } 2012 return (mpte); 2013 } 2014 2015 /* 2016 * Increment counters 2017 */ 2018 pmap->pm_stats.resident_count++; 2019 2020 pa = VM_PAGE_TO_PHYS(m); 2021 2022 /* 2023 * Now validate mapping with RO protection 2024 */ 2025 *pte = mips_paddr_to_tlbpfn(pa) | PTE_V; 2026 2027 if (is_cacheable_mem(pa)) 2028 *pte |= PTE_CACHE; 2029 else 2030 *pte |= PTE_UNCACHED; 2031 2032 if (is_kernel_pmap(pmap)) 2033 *pte |= PTE_G; 2034 else { 2035 *pte |= PTE_RO; 2036 /* 2037 * Sync I & D caches. Do this only if the the target pmap 2038 * belongs to the current process. Otherwise, an 2039 * unresolvable TLB miss may occur. */ 2040 if (pmap == &curproc->p_vmspace->vm_pmap) { 2041 va &= ~PAGE_MASK; 2042 mips_icache_sync_range(va, NBPG); 2043 mips_dcache_wbinv_range(va, NBPG); 2044 } 2045 } 2046 return (mpte); 2047} 2048 2049/* 2050 * Make a temporary mapping for a physical address. This is only intended 2051 * to be used for panic dumps. 2052 */ 2053void * 2054pmap_kenter_temporary(vm_paddr_t pa, int i) 2055{ 2056 vm_offset_t va; 2057 2058 if (i != 0) 2059 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2060 __func__); 2061 2062#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2063 if (need_wired_tlb_page_pool) { 2064 va = pmap_map_fpage(pa, &fpages_shared[PMAP_FPAGE_KENTER_TEMP], 2065 TRUE); 2066 } else 2067#endif 2068 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2069 va = MIPS_PHYS_TO_CACHED(pa); 2070 } else { 2071 int cpu; 2072 struct local_sysmaps *sysm; 2073 2074 cpu = PCPU_GET(cpuid); 2075 sysm = &sysmap_lmem[cpu]; 2076 /* Since this is for the debugger, no locks or any other fun */ 2077 sysm->CMAP1 = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2078 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2079 sysm->valid1 = 1; 2080 va = (vm_offset_t)sysm->CADDR1; 2081 } 2082 return ((void *)va); 2083} 2084 2085void 2086pmap_kenter_temporary_free(vm_paddr_t pa) 2087{ 2088 int cpu; 2089 struct local_sysmaps *sysm; 2090 2091 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2092 /* nothing to do for this case */ 2093 return; 2094 } 2095 cpu = PCPU_GET(cpuid); 2096 sysm = &sysmap_lmem[cpu]; 2097 if (sysm->valid1) { 2098 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2099 sysm->CMAP1 = 0; 2100 sysm->valid1 = 0; 2101 } 2102} 2103 2104/* 2105 * Moved the code to Machine Independent 2106 * vm_map_pmap_enter() 2107 */ 2108 2109/* 2110 * Maps a sequence of resident pages belonging to the same object. 2111 * The sequence begins with the given page m_start. This page is 2112 * mapped at the given virtual address start. Each subsequent page is 2113 * mapped at a virtual address that is offset from start by the same 2114 * amount as the page is offset from m_start within the object. The 2115 * last page in the sequence is the page with the largest offset from 2116 * m_start that can be mapped at a virtual address less than the given 2117 * virtual address end. Not every virtual page between start and end 2118 * is mapped; only those for which a resident page exists with the 2119 * corresponding offset from m_start are mapped. 2120 */ 2121void 2122pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2123 vm_page_t m_start, vm_prot_t prot) 2124{ 2125 vm_page_t m, mpte; 2126 vm_pindex_t diff, psize; 2127 2128 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2129 psize = atop(end - start); 2130 mpte = NULL; 2131 m = m_start; 2132 PMAP_LOCK(pmap); 2133 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2134 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2135 prot, mpte); 2136 m = TAILQ_NEXT(m, listq); 2137 } 2138 PMAP_UNLOCK(pmap); 2139} 2140 2141/* 2142 * pmap_object_init_pt preloads the ptes for a given object 2143 * into the specified pmap. This eliminates the blast of soft 2144 * faults on process startup and immediately after an mmap. 2145 */ 2146void 2147pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2148 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2149{ 2150 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2151 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2152 ("pmap_object_init_pt: non-device object")); 2153} 2154 2155/* 2156 * Routine: pmap_change_wiring 2157 * Function: Change the wiring attribute for a map/virtual-address 2158 * pair. 2159 * In/out conditions: 2160 * The mapping must already exist in the pmap. 2161 */ 2162void 2163pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2164{ 2165 register pt_entry_t *pte; 2166 2167 if (pmap == NULL) 2168 return; 2169 2170 PMAP_LOCK(pmap); 2171 pte = pmap_pte(pmap, va); 2172 2173 if (wired && !pmap_pte_w(pte)) 2174 pmap->pm_stats.wired_count++; 2175 else if (!wired && pmap_pte_w(pte)) 2176 pmap->pm_stats.wired_count--; 2177 2178 /* 2179 * Wiring is not a hardware characteristic so there is no need to 2180 * invalidate TLB. 2181 */ 2182 pmap_pte_set_w(pte, wired); 2183 PMAP_UNLOCK(pmap); 2184} 2185 2186/* 2187 * Copy the range specified by src_addr/len 2188 * from the source map to the range dst_addr/len 2189 * in the destination map. 2190 * 2191 * This routine is only advisory and need not do anything. 2192 */ 2193 2194void 2195pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2196 vm_size_t len, vm_offset_t src_addr) 2197{ 2198} 2199 2200/* 2201 * pmap_zero_page zeros the specified hardware page by mapping 2202 * the page into KVM and using bzero to clear its contents. 2203 */ 2204void 2205pmap_zero_page(vm_page_t m) 2206{ 2207 vm_offset_t va; 2208 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2209 2210#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2211 if (need_wired_tlb_page_pool) { 2212 struct fpage *fp1; 2213 struct sysmaps *sysmaps; 2214 2215 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2216 mtx_lock(&sysmaps->lock); 2217 sched_pin(); 2218 2219 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2220 va = pmap_map_fpage(phys, fp1, FALSE); 2221 bzero((caddr_t)va, PAGE_SIZE); 2222 pmap_unmap_fpage(phys, fp1); 2223 sched_unpin(); 2224 mtx_unlock(&sysmaps->lock); 2225 /* 2226 * Should you do cache flush? 2227 */ 2228 } else 2229#endif 2230 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2231 2232 va = MIPS_PHYS_TO_UNCACHED(phys); 2233 2234 bzero((caddr_t)va, PAGE_SIZE); 2235 mips_dcache_wbinv_range(va, PAGE_SIZE); 2236 } else { 2237 int cpu; 2238 struct local_sysmaps *sysm; 2239 2240 cpu = PCPU_GET(cpuid); 2241 sysm = &sysmap_lmem[cpu]; 2242 PMAP_LGMEM_LOCK(sysm); 2243 sched_pin(); 2244 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2245 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2246 sysm->valid1 = 1; 2247 bzero(sysm->CADDR1, PAGE_SIZE); 2248 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2249 sysm->CMAP1 = 0; 2250 sysm->valid1 = 0; 2251 sched_unpin(); 2252 PMAP_LGMEM_UNLOCK(sysm); 2253 } 2254 2255} 2256 2257/* 2258 * pmap_zero_page_area zeros the specified hardware page by mapping 2259 * the page into KVM and using bzero to clear its contents. 2260 * 2261 * off and size may not cover an area beyond a single hardware page. 2262 */ 2263void 2264pmap_zero_page_area(vm_page_t m, int off, int size) 2265{ 2266 vm_offset_t va; 2267 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2268 2269#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2270 if (need_wired_tlb_page_pool) { 2271 struct fpage *fp1; 2272 struct sysmaps *sysmaps; 2273 2274 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2275 mtx_lock(&sysmaps->lock); 2276 sched_pin(); 2277 2278 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2279 va = pmap_map_fpage(phys, fp1, FALSE); 2280 bzero((caddr_t)va + off, size); 2281 pmap_unmap_fpage(phys, fp1); 2282 2283 sched_unpin(); 2284 mtx_unlock(&sysmaps->lock); 2285 } else 2286#endif 2287 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2288 va = MIPS_PHYS_TO_UNCACHED(phys); 2289 bzero((char *)(caddr_t)va + off, size); 2290 mips_dcache_wbinv_range(va + off, size); 2291 } else { 2292 int cpu; 2293 struct local_sysmaps *sysm; 2294 2295 cpu = PCPU_GET(cpuid); 2296 sysm = &sysmap_lmem[cpu]; 2297 PMAP_LGMEM_LOCK(sysm); 2298 sched_pin(); 2299 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2300 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2301 sysm->valid1 = 1; 2302 bzero((char *)sysm->CADDR1 + off, size); 2303 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2304 sysm->CMAP1 = 0; 2305 sysm->valid1 = 0; 2306 sched_unpin(); 2307 PMAP_LGMEM_UNLOCK(sysm); 2308 } 2309} 2310 2311void 2312pmap_zero_page_idle(vm_page_t m) 2313{ 2314 vm_offset_t va; 2315 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2316 2317#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2318 if (need_wired_tlb_page_pool) { 2319 sched_pin(); 2320 va = pmap_map_fpage(phys, &fpages_shared[PMAP_FPAGE3], FALSE); 2321 bzero((caddr_t)va, PAGE_SIZE); 2322 pmap_unmap_fpage(phys, &fpages_shared[PMAP_FPAGE3]); 2323 sched_unpin(); 2324 } else 2325#endif 2326 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2327 va = MIPS_PHYS_TO_UNCACHED(phys); 2328 bzero((caddr_t)va, PAGE_SIZE); 2329 mips_dcache_wbinv_range(va, PAGE_SIZE); 2330 } else { 2331 int cpu; 2332 struct local_sysmaps *sysm; 2333 2334 cpu = PCPU_GET(cpuid); 2335 sysm = &sysmap_lmem[cpu]; 2336 PMAP_LGMEM_LOCK(sysm); 2337 sched_pin(); 2338 sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2339 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2340 sysm->valid1 = 1; 2341 bzero(sysm->CADDR1, PAGE_SIZE); 2342 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2343 sysm->CMAP1 = 0; 2344 sysm->valid1 = 0; 2345 sched_unpin(); 2346 PMAP_LGMEM_UNLOCK(sysm); 2347 } 2348 2349} 2350 2351/* 2352 * pmap_copy_page copies the specified (machine independent) 2353 * page by mapping the page into virtual memory and using 2354 * bcopy to copy the page, one machine dependent page at a 2355 * time. 2356 */ 2357void 2358pmap_copy_page(vm_page_t src, vm_page_t dst) 2359{ 2360 vm_offset_t va_src, va_dst; 2361 vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); 2362 vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); 2363 2364 2365#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 2366 if (need_wired_tlb_page_pool) { 2367 struct fpage *fp1, *fp2; 2368 struct sysmaps *sysmaps; 2369 2370 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 2371 mtx_lock(&sysmaps->lock); 2372 sched_pin(); 2373 2374 fp1 = &sysmaps->fp[PMAP_FPAGE1]; 2375 fp2 = &sysmaps->fp[PMAP_FPAGE2]; 2376 2377 va_src = pmap_map_fpage(phy_src, fp1, FALSE); 2378 va_dst = pmap_map_fpage(phy_dst, fp2, FALSE); 2379 2380 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2381 2382 pmap_unmap_fpage(phy_src, fp1); 2383 pmap_unmap_fpage(phy_dst, fp2); 2384 sched_unpin(); 2385 mtx_unlock(&sysmaps->lock); 2386 2387 /* 2388 * Should you flush the cache? 2389 */ 2390 } else 2391#endif 2392 { 2393 if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { 2394 /* easy case, all can be accessed via KSEG0 */ 2395 va_src = MIPS_PHYS_TO_CACHED(phy_src); 2396 va_dst = MIPS_PHYS_TO_CACHED(phy_dst); 2397 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2398 } else { 2399 int cpu; 2400 struct local_sysmaps *sysm; 2401 2402 cpu = PCPU_GET(cpuid); 2403 sysm = &sysmap_lmem[cpu]; 2404 PMAP_LGMEM_LOCK(sysm); 2405 sched_pin(); 2406 if (phy_src < MIPS_KSEG0_LARGEST_PHYS) { 2407 /* one side needs mapping - dest */ 2408 va_src = MIPS_PHYS_TO_CACHED(phy_src); 2409 sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2410 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); 2411 sysm->valid2 = 2; 2412 va_dst = (vm_offset_t)sysm->CADDR2; 2413 } else if (phy_dst < MIPS_KSEG0_LARGEST_PHYS) { 2414 /* one side needs mapping - src */ 2415 va_dst = MIPS_PHYS_TO_CACHED(phy_dst); 2416 sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2417 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2418 va_src = (vm_offset_t)sysm->CADDR1; 2419 sysm->valid1 = 1; 2420 } else { 2421 /* all need mapping */ 2422 sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2423 sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2424 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); 2425 pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); 2426 sysm->valid1 = sysm->valid2 = 1; 2427 va_src = (vm_offset_t)sysm->CADDR1; 2428 va_dst = (vm_offset_t)sysm->CADDR2; 2429 } 2430 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2431 if (sysm->valid1) { 2432 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); 2433 sysm->CMAP1 = 0; 2434 sysm->valid1 = 0; 2435 } 2436 if (sysm->valid2) { 2437 pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR2); 2438 sysm->CMAP2 = 0; 2439 sysm->valid2 = 0; 2440 } 2441 sched_unpin(); 2442 PMAP_LGMEM_UNLOCK(sysm); 2443 } 2444 } 2445} 2446 2447/* 2448 * Returns true if the pmap's pv is one of the first 2449 * 16 pvs linked to from this page. This count may 2450 * be changed upwards or downwards in the future; it 2451 * is only necessary that true be returned for a small 2452 * subset of pmaps for proper page aging. 2453 */ 2454boolean_t 2455pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2456{ 2457 pv_entry_t pv; 2458 int loops = 0; 2459 2460 if (m->flags & PG_FICTITIOUS) 2461 return FALSE; 2462 2463 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2464 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2465 if (pv->pv_pmap == pmap) { 2466 return TRUE; 2467 } 2468 loops++; 2469 if (loops >= 16) 2470 break; 2471 } 2472 return (FALSE); 2473} 2474 2475/* 2476 * Remove all pages from specified address space 2477 * this aids process exit speeds. Also, this code 2478 * is special cased for current process only, but 2479 * can have the more generic (and slightly slower) 2480 * mode enabled. This is much faster than pmap_remove 2481 * in the case of running down an entire address space. 2482 */ 2483void 2484pmap_remove_pages(pmap_t pmap) 2485{ 2486 pt_entry_t *pte, tpte; 2487 pv_entry_t pv, npv; 2488 vm_page_t m; 2489 2490 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2491 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2492 return; 2493 } 2494 vm_page_lock_queues(); 2495 PMAP_LOCK(pmap); 2496 sched_pin(); 2497 //XXX need to be TAILQ_FOREACH_SAFE ? 2498 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); 2499 pv; 2500 pv = npv) { 2501 2502 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2503 if (!pmap_pte_v(pte)) 2504 panic("pmap_remove_pages: page on pm_pvlist has no pte\n"); 2505 tpte = *pte; 2506 2507/* 2508 * We cannot remove wired pages from a process' mapping at this time 2509 */ 2510 if (tpte & PTE_W) { 2511 npv = TAILQ_NEXT(pv, pv_plist); 2512 continue; 2513 } 2514 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2515 2516 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte)); 2517 2518 KASSERT(m < &vm_page_array[vm_page_array_size], 2519 ("pmap_remove_pages: bad tpte %lx", tpte)); 2520 2521 pv->pv_pmap->pm_stats.resident_count--; 2522 2523 /* 2524 * Update the vm_page_t clean and reference bits. 2525 */ 2526 if (tpte & PTE_M) { 2527 vm_page_dirty(m); 2528 } 2529 npv = TAILQ_NEXT(pv, pv_plist); 2530 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2531 2532 m->md.pv_list_count--; 2533 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2534 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2535 vm_page_flag_clear(m, PG_WRITEABLE); 2536 } 2537 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2538 free_pv_entry(pv); 2539 } 2540 sched_unpin(); 2541 pmap_invalidate_all(pmap); 2542 PMAP_UNLOCK(pmap); 2543 vm_page_unlock_queues(); 2544} 2545 2546/* 2547 * pmap_testbit tests bits in pte's 2548 * note that the testbit/changebit routines are inline, 2549 * and a lot of things compile-time evaluate. 2550 */ 2551static boolean_t 2552pmap_testbit(vm_page_t m, int bit) 2553{ 2554 pv_entry_t pv; 2555 pt_entry_t *pte; 2556 boolean_t rv = FALSE; 2557 2558 if (m->flags & PG_FICTITIOUS) 2559 return rv; 2560 2561 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2562 return rv; 2563 2564 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2565 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2566#if defined(PMAP_DIAGNOSTIC) 2567 if (!pv->pv_pmap) { 2568 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2569 continue; 2570 } 2571#endif 2572 PMAP_LOCK(pv->pv_pmap); 2573 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2574 rv = (*pte & bit) != 0; 2575 PMAP_UNLOCK(pv->pv_pmap); 2576 if (rv) 2577 break; 2578 } 2579 return (rv); 2580} 2581 2582/* 2583 * this routine is used to modify bits in ptes 2584 */ 2585static __inline void 2586pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2587{ 2588 register pv_entry_t pv; 2589 register pt_entry_t *pte; 2590 2591 if (m->flags & PG_FICTITIOUS) 2592 return; 2593 2594 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2595 /* 2596 * Loop over all current mappings setting/clearing as appropos If 2597 * setting RO do we need to clear the VAC? 2598 */ 2599 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2600#if defined(PMAP_DIAGNOSTIC) 2601 if (!pv->pv_pmap) { 2602 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2603 continue; 2604 } 2605#endif 2606 2607 PMAP_LOCK(pv->pv_pmap); 2608 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2609 2610 if (setem) { 2611 *(int *)pte |= bit; 2612 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2613 } else { 2614 vm_offset_t pbits = *(vm_offset_t *)pte; 2615 2616 if (pbits & bit) { 2617 if (bit == PTE_RW) { 2618 if (pbits & PTE_M) { 2619 vm_page_dirty(m); 2620 } 2621 *(int *)pte = (pbits & ~(PTE_M | PTE_RW)) | 2622 PTE_RO; 2623 } else { 2624 *(int *)pte = pbits & ~bit; 2625 } 2626 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2627 } 2628 } 2629 PMAP_UNLOCK(pv->pv_pmap); 2630 } 2631 if (!setem && bit == PTE_RW) 2632 vm_page_flag_clear(m, PG_WRITEABLE); 2633} 2634 2635/* 2636 * pmap_page_wired_mappings: 2637 * 2638 * Return the number of managed mappings to the given physical page 2639 * that are wired. 2640 */ 2641int 2642pmap_page_wired_mappings(vm_page_t m) 2643{ 2644 pv_entry_t pv; 2645 int count; 2646 2647 count = 0; 2648 if ((m->flags & PG_FICTITIOUS) != 0) 2649 return (count); 2650 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2651 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) 2652 if (pv->pv_wired) 2653 count++; 2654 return (count); 2655} 2656 2657/* 2658 * Clear the write and modified bits in each of the given page's mappings. 2659 */ 2660void 2661pmap_remove_write(vm_page_t m) 2662{ 2663 pv_entry_t pv, npv; 2664 vm_offset_t va; 2665 pt_entry_t *pte; 2666 2667 if ((m->flags & PG_WRITEABLE) == 0) 2668 return; 2669 2670 /* 2671 * Loop over all current mappings setting/clearing as appropos. 2672 */ 2673 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { 2674 npv = TAILQ_NEXT(pv, pv_plist); 2675 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2676 2677 if ((pte == NULL) || !mips_pg_v(*pte)) 2678 panic("page on pm_pvlist has no pte\n"); 2679 2680 va = pv->pv_va; 2681 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 2682 VM_PROT_READ | VM_PROT_EXECUTE); 2683 } 2684 vm_page_flag_clear(m, PG_WRITEABLE); 2685} 2686 2687/* 2688 * pmap_ts_referenced: 2689 * 2690 * Return the count of reference bits for a page, clearing all of them. 2691 */ 2692int 2693pmap_ts_referenced(vm_page_t m) 2694{ 2695 if (m->flags & PG_FICTITIOUS) 2696 return (0); 2697 2698 if (m->md.pv_flags & PV_TABLE_REF) { 2699 m->md.pv_flags &= ~PV_TABLE_REF; 2700 return 1; 2701 } 2702 return 0; 2703} 2704 2705/* 2706 * pmap_is_modified: 2707 * 2708 * Return whether or not the specified physical page was modified 2709 * in any physical maps. 2710 */ 2711boolean_t 2712pmap_is_modified(vm_page_t m) 2713{ 2714 if (m->flags & PG_FICTITIOUS) 2715 return FALSE; 2716 2717 if (m->md.pv_flags & PV_TABLE_MOD) 2718 return TRUE; 2719 else 2720 return pmap_testbit(m, PTE_M); 2721} 2722 2723/* N/C */ 2724 2725/* 2726 * pmap_is_prefaultable: 2727 * 2728 * Return whether or not the specified virtual address is elgible 2729 * for prefault. 2730 */ 2731boolean_t 2732pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2733{ 2734 pt_entry_t *pte; 2735 boolean_t rv; 2736 2737 rv = FALSE; 2738 PMAP_LOCK(pmap); 2739 if (*pmap_pde(pmap, addr)) { 2740 pte = pmap_pte(pmap, addr); 2741 rv = (*pte == 0); 2742 } 2743 PMAP_UNLOCK(pmap); 2744 return (rv); 2745} 2746 2747/* 2748 * Clear the modify bits on the specified physical page. 2749 */ 2750void 2751pmap_clear_modify(vm_page_t m) 2752{ 2753 if (m->flags & PG_FICTITIOUS) 2754 return; 2755 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2756 if (m->md.pv_flags & PV_TABLE_MOD) { 2757 pmap_changebit(m, PTE_M, FALSE); 2758 m->md.pv_flags &= ~PV_TABLE_MOD; 2759 } 2760} 2761 2762/* 2763 * pmap_clear_reference: 2764 * 2765 * Clear the reference bit on the specified physical page. 2766 */ 2767void 2768pmap_clear_reference(vm_page_t m) 2769{ 2770 if (m->flags & PG_FICTITIOUS) 2771 return; 2772 2773 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2774 if (m->md.pv_flags & PV_TABLE_REF) { 2775 m->md.pv_flags &= ~PV_TABLE_REF; 2776 } 2777} 2778 2779/* 2780 * Miscellaneous support routines follow 2781 */ 2782 2783/* 2784 * Map a set of physical memory pages into the kernel virtual 2785 * address space. Return a pointer to where it is mapped. This 2786 * routine is intended to be used for mapping device memory, 2787 * NOT real memory. 2788 */ 2789 2790/* 2791 * Map a set of physical memory pages into the kernel virtual 2792 * address space. Return a pointer to where it is mapped. This 2793 * routine is intended to be used for mapping device memory, 2794 * NOT real memory. 2795 */ 2796void * 2797pmap_mapdev(vm_offset_t pa, vm_size_t size) 2798{ 2799 vm_offset_t va, tmpva, offset; 2800 2801 /* 2802 * KSEG1 maps only first 512M of phys address space. For 2803 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2804 */ 2805 if (pa + size < MIPS_KSEG0_LARGEST_PHYS) 2806 return (void *)MIPS_PHYS_TO_KSEG1(pa); 2807 else { 2808 offset = pa & PAGE_MASK; 2809 size = roundup(size, PAGE_SIZE); 2810 2811 va = kmem_alloc_nofault(kernel_map, size); 2812 if (!va) 2813 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2814 for (tmpva = va; size > 0;) { 2815 pmap_kenter(tmpva, pa); 2816 size -= PAGE_SIZE; 2817 tmpva += PAGE_SIZE; 2818 pa += PAGE_SIZE; 2819 } 2820 } 2821 2822 return ((void *)(va + offset)); 2823} 2824 2825void 2826pmap_unmapdev(vm_offset_t va, vm_size_t size) 2827{ 2828} 2829 2830/* 2831 * perform the pmap work for mincore 2832 */ 2833int 2834pmap_mincore(pmap_t pmap, vm_offset_t addr) 2835{ 2836 2837 pt_entry_t *ptep, pte; 2838 vm_page_t m; 2839 int val = 0; 2840 2841 PMAP_LOCK(pmap); 2842 ptep = pmap_pte(pmap, addr); 2843 pte = (ptep != NULL) ? *ptep : 0; 2844 PMAP_UNLOCK(pmap); 2845 2846 if (mips_pg_v(pte)) { 2847 vm_offset_t pa; 2848 2849 val = MINCORE_INCORE; 2850 pa = mips_tlbpfn_to_paddr(pte); 2851 if (!page_is_managed(pa)) 2852 return val; 2853 2854 m = PHYS_TO_VM_PAGE(pa); 2855 2856 /* 2857 * Modified by us 2858 */ 2859 if (pte & PTE_M) 2860 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2861 /* 2862 * Modified by someone 2863 */ 2864 else { 2865 vm_page_lock_queues(); 2866 if (m->dirty || pmap_is_modified(m)) 2867 val |= MINCORE_MODIFIED_OTHER; 2868 vm_page_unlock_queues(); 2869 } 2870 /* 2871 * Referenced by us or someone 2872 */ 2873 vm_page_lock_queues(); 2874 if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 2875 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2876 vm_page_flag_set(m, PG_REFERENCED); 2877 } 2878 vm_page_unlock_queues(); 2879 } 2880 return val; 2881} 2882 2883void 2884pmap_activate(struct thread *td) 2885{ 2886 pmap_t pmap, oldpmap; 2887 struct proc *p = td->td_proc; 2888 2889 critical_enter(); 2890 2891 pmap = vmspace_pmap(p->p_vmspace); 2892 oldpmap = PCPU_GET(curpmap); 2893 2894 if (oldpmap) 2895 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); 2896 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2897 pmap_asid_alloc(pmap); 2898 if (td == curthread) { 2899 PCPU_SET(segbase, pmap->pm_segtab); 2900 MachSetPID(pmap->pm_asid[PCPU_GET(cpuid)].asid); 2901 } 2902 PCPU_SET(curpmap, pmap); 2903 critical_exit(); 2904} 2905 2906/* 2907 * Increase the starting virtual address of the given mapping if a 2908 * different alignment might result in more superpage mappings. 2909 */ 2910void 2911pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2912 vm_offset_t *addr, vm_size_t size) 2913{ 2914 vm_offset_t superpage_offset; 2915 2916 if (size < NBSEG) 2917 return; 2918 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 2919 offset += ptoa(object->pg_color); 2920 superpage_offset = offset & SEGOFSET; 2921 if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG || 2922 (*addr & SEGOFSET) == superpage_offset) 2923 return; 2924 if ((*addr & SEGOFSET) < superpage_offset) 2925 *addr = (*addr & ~SEGOFSET) + superpage_offset; 2926 else 2927 *addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset; 2928} 2929 2930int pmap_pid_dump(int pid); 2931 2932int 2933pmap_pid_dump(int pid) 2934{ 2935 pmap_t pmap; 2936 struct proc *p; 2937 int npte = 0; 2938 int index; 2939 2940 sx_slock(&allproc_lock); 2941 LIST_FOREACH(p, &allproc, p_list) { 2942 if (p->p_pid != pid) 2943 continue; 2944 2945 if (p->p_vmspace) { 2946 int i, j; 2947 2948 printf("vmspace is %p\n", 2949 p->p_vmspace); 2950 index = 0; 2951 pmap = vmspace_pmap(p->p_vmspace); 2952 printf("pmap asid:%x generation:%x\n", 2953 pmap->pm_asid[0].asid, 2954 pmap->pm_asid[0].gen); 2955 for (i = 0; i < NUSERPGTBLS; i++) { 2956 pd_entry_t *pde; 2957 pt_entry_t *pte; 2958 unsigned base = i << SEGSHIFT; 2959 2960 pde = &pmap->pm_segtab[i]; 2961 if (pde && pmap_pde_v(pde)) { 2962 for (j = 0; j < 1024; j++) { 2963 unsigned va = base + 2964 (j << PAGE_SHIFT); 2965 2966 pte = pmap_pte(pmap, va); 2967 if (pte && pmap_pte_v(pte)) { 2968 vm_offset_t pa; 2969 vm_page_t m; 2970 2971 pa = mips_tlbpfn_to_paddr(*pte); 2972 m = PHYS_TO_VM_PAGE(pa); 2973 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 2974 va, pa, 2975 m->hold_count, 2976 m->wire_count, 2977 m->flags); 2978 npte++; 2979 index++; 2980 if (index >= 2) { 2981 index = 0; 2982 printf("\n"); 2983 } else { 2984 printf(" "); 2985 } 2986 } 2987 } 2988 } 2989 } 2990 } else { 2991 printf("Process pid:%d has no vm_space\n", pid); 2992 } 2993 break; 2994 } 2995 sx_sunlock(&allproc_lock); 2996 return npte; 2997} 2998 2999 3000#if defined(DEBUG) 3001 3002static void pads(pmap_t pm); 3003void pmap_pvdump(vm_offset_t pa); 3004 3005/* print address space of pmap*/ 3006static void 3007pads(pmap_t pm) 3008{ 3009 unsigned va, i, j; 3010 pt_entry_t *ptep; 3011 3012 if (pm == kernel_pmap) 3013 return; 3014 for (i = 0; i < NPTEPG; i++) 3015 if (pm->pm_segtab[i]) 3016 for (j = 0; j < NPTEPG; j++) { 3017 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3018 if (pm == kernel_pmap && va < KERNBASE) 3019 continue; 3020 if (pm != kernel_pmap && 3021 va >= VM_MAXUSER_ADDRESS) 3022 continue; 3023 ptep = pmap_pte(pm, va); 3024 if (pmap_pte_v(ptep)) 3025 printf("%x:%x ", va, *(int *)ptep); 3026 } 3027 3028} 3029 3030void 3031pmap_pvdump(vm_offset_t pa) 3032{ 3033 register pv_entry_t pv; 3034 vm_page_t m; 3035 3036 printf("pa %x", pa); 3037 m = PHYS_TO_VM_PAGE(pa); 3038 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3039 pv = TAILQ_NEXT(pv, pv_list)) { 3040 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3041 pads(pv->pv_pmap); 3042 } 3043 printf(" "); 3044} 3045 3046/* N/C */ 3047#endif 3048 3049 3050/* 3051 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3052 * It takes almost as much or more time to search the TLB for a 3053 * specific ASID and flush those entries as it does to flush the entire TLB. 3054 * Therefore, when we allocate a new ASID, we just take the next number. When 3055 * we run out of numbers, we flush the TLB, increment the generation count 3056 * and start over. ASID zero is reserved for kernel use. 3057 */ 3058static void 3059pmap_asid_alloc(pmap) 3060 pmap_t pmap; 3061{ 3062 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3063 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3064 else { 3065 if (PCPU_GET(next_asid) == pmap_max_asid) { 3066 MIPS_TBIAP(); 3067 PCPU_SET(asid_generation, 3068 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3069 if (PCPU_GET(asid_generation) == 0) { 3070 PCPU_SET(asid_generation, 1); 3071 } 3072 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3073 } 3074 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3075 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3076 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3077 } 3078 3079#ifdef DEBUG 3080 if (pmapdebug & (PDB_FOLLOW | PDB_TLBPID)) { 3081 if (curproc) 3082 printf("pmap_asid_alloc: curproc %d '%s' ", 3083 curproc->p_pid, curproc->p_comm); 3084 else 3085 printf("pmap_asid_alloc: curproc <none> "); 3086 printf("segtab %p asid %d\n", pmap->pm_segtab, 3087 pmap->pm_asid[PCPU_GET(cpuid)].asid); 3088 } 3089#endif 3090} 3091 3092int 3093page_is_managed(vm_offset_t pa) 3094{ 3095 vm_offset_t pgnum = mips_btop(pa); 3096 3097 if (pgnum >= first_page && (pgnum < (first_page + vm_page_array_size))) { 3098 vm_page_t m; 3099 3100 m = PHYS_TO_VM_PAGE(pa); 3101 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 3102 return 1; 3103 } 3104 return 0; 3105} 3106 3107static int 3108init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) 3109{ 3110 int rw = 0; 3111 3112 if (!(prot & VM_PROT_WRITE)) 3113 rw = PTE_ROPAGE; 3114 else { 3115 if (va >= VM_MIN_KERNEL_ADDRESS) { 3116 /* 3117 * Don't bother to trap on kernel writes, just 3118 * record page as dirty. 3119 */ 3120 rw = PTE_RWPAGE; 3121 vm_page_dirty(m); 3122 } else if ((m->md.pv_flags & PV_TABLE_MOD) || 3123 m->dirty == VM_PAGE_BITS_ALL) 3124 rw = PTE_RWPAGE; 3125 else 3126 rw = PTE_CWPAGE; 3127 vm_page_flag_set(m, PG_WRITEABLE); 3128 } 3129 return rw; 3130} 3131 3132/* 3133 * pmap_page_is_free: 3134 * 3135 * Called when a page is freed to allow pmap to clean up 3136 * any extra state associated with the page. In this case 3137 * clear modified/referenced bits. 3138 */ 3139void 3140pmap_page_is_free(vm_page_t m) 3141{ 3142 3143 m->md.pv_flags = 0; 3144} 3145 3146/* 3147 * pmap_set_modified: 3148 * 3149 * Sets the page modified and reference bits for the specified page. 3150 */ 3151void 3152pmap_set_modified(vm_offset_t pa) 3153{ 3154 3155 PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); 3156} 3157 3158#include <machine/db_machdep.h> 3159 3160/* 3161 * Dump the translation buffer (TLB) in readable form. 3162 */ 3163 3164void 3165db_dump_tlb(int first, int last) 3166{ 3167 struct tlb tlb; 3168 int tlbno; 3169 3170 tlbno = first; 3171 3172 while (tlbno <= last) { 3173 MachTLBRead(tlbno, &tlb); 3174 if (tlb.tlb_lo0 & PTE_V || tlb.tlb_lo1 & PTE_V) { 3175 printf("TLB %2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00)); 3176 } else { 3177 printf("TLB*%2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00)); 3178 } 3179 printf("0=0x%08x ", pfn_to_vad(tlb.tlb_lo0)); 3180 printf("%c", tlb.tlb_lo0 & PTE_M ? 'M' : ' '); 3181 printf("%c", tlb.tlb_lo0 & PTE_G ? 'G' : ' '); 3182 printf(" atr %x ", (tlb.tlb_lo0 >> 3) & 7); 3183 printf("1=0x%08x ", pfn_to_vad(tlb.tlb_lo1)); 3184 printf("%c", tlb.tlb_lo1 & PTE_M ? 'M' : ' '); 3185 printf("%c", tlb.tlb_lo1 & PTE_G ? 'G' : ' '); 3186 printf(" atr %x ", (tlb.tlb_lo1 >> 3) & 7); 3187 printf(" sz=%x pid=%x\n", tlb.tlb_mask, 3188 (tlb.tlb_hi & 0x000000ff) 3189 ); 3190 tlbno++; 3191 } 3192} 3193 3194#ifdef DDB 3195#include <sys/kernel.h> 3196#include <ddb/ddb.h> 3197 3198DB_SHOW_COMMAND(tlb, ddb_dump_tlb) 3199{ 3200 db_dump_tlb(0, num_tlbentries - 1); 3201} 3202 3203#endif 3204 3205/* 3206 * Routine: pmap_kextract 3207 * Function: 3208 * Extract the physical page address associated 3209 * virtual address. 3210 */ 3211 /* PMAP_INLINE */ vm_offset_t 3212pmap_kextract(vm_offset_t va) 3213{ 3214 vm_offset_t pa = 0; 3215 3216 if (va < MIPS_CACHED_MEMORY_ADDR) { 3217 /* user virtual address */ 3218 pt_entry_t *ptep; 3219 3220 if (curproc && curproc->p_vmspace) { 3221 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3222 if (ptep) 3223 pa = mips_tlbpfn_to_paddr(*ptep) | 3224 (va & PAGE_MASK); 3225 } 3226 } else if (va >= MIPS_CACHED_MEMORY_ADDR && 3227 va < MIPS_UNCACHED_MEMORY_ADDR) 3228 pa = MIPS_CACHED_TO_PHYS(va); 3229 else if (va >= MIPS_UNCACHED_MEMORY_ADDR && 3230 va < MIPS_KSEG2_START) 3231 pa = MIPS_UNCACHED_TO_PHYS(va); 3232#ifdef VM_ALLOC_WIRED_TLB_PG_POOL 3233 else if (need_wired_tlb_page_pool && ((va >= VM_MIN_KERNEL_ADDRESS) && 3234 (va < (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET)))) 3235 pa = MIPS_CACHED_TO_PHYS(va); 3236#endif 3237 else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) { 3238 pt_entry_t *ptep; 3239 3240 /* Is the kernel pmap initialized? */ 3241 if (kernel_pmap->pm_active) { 3242 if (va >= (vm_offset_t)virtual_sys_start) { 3243 /* Its inside the virtual address range */ 3244 ptep = pmap_pte(kernel_pmap, va); 3245 if (ptep) 3246 pa = mips_tlbpfn_to_paddr(*ptep) | 3247 (va & PAGE_MASK); 3248 } else { 3249 int i; 3250 3251 /* 3252 * its inside the special mapping area, I 3253 * don't think this should happen, but if it 3254 * does I want it toa all work right :-) 3255 * Note if it does happen, we assume the 3256 * caller has the lock? FIXME, this needs to 3257 * be checked FIXEM - RRS. 3258 */ 3259 for (i = 0; i < MAXCPU; i++) { 3260 if ((sysmap_lmem[i].valid1) && ((vm_offset_t)sysmap_lmem[i].CADDR1 == va)) { 3261 pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP1); 3262 break; 3263 } 3264 if ((sysmap_lmem[i].valid2) && ((vm_offset_t)sysmap_lmem[i].CADDR2 == va)) { 3265 pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP2); 3266 break; 3267 } 3268 } 3269 } 3270 } 3271 } 3272 return pa; 3273} 3274