pmap.c revision 207155
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * In addition to hardware address maps, this 46 * module is called upon to provide software-use-only 47 * maps which may or may not be stored in the same 48 * form as hardware maps. These pseudo-maps are 49 * used to store intermediate results from copy 50 * operations to and from address spaces. 51 * 52 * Since the information managed by this module is 53 * also stored by the logical address mapping module, 54 * this module may throw away valid virtual-to-physical 55 * mappings at almost any time. However, invalidations 56 * of virtual-to-physical mappings must be done as 57 * requested. 58 * 59 * In order to cope with hardware architectures which 60 * make virtual-to-physical map invalidates expensive, 61 * this module may delay invalidate or reduced protection 62 * operations until such time as they are actually 63 * necessary. This module is given full information as 64 * to which processors are currently using which maps, 65 * and to when physical maps must be made correct. 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 207155 2010-04-24 17:32:52Z alc $"); 70 71#include "opt_ddb.h" 72#include "opt_msgbuf.h" 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/proc.h> 76#include <sys/msgbuf.h> 77#include <sys/vmmeter.h> 78#include <sys/mman.h> 79#include <sys/smp.h> 80 81#include <vm/vm.h> 82#include <vm/vm_param.h> 83#include <sys/lock.h> 84#include <sys/mutex.h> 85#include <vm/vm_kern.h> 86#include <vm/vm_page.h> 87#include <vm/vm_map.h> 88#include <vm/vm_object.h> 89#include <vm/vm_extern.h> 90#include <vm/vm_pageout.h> 91#include <vm/vm_pager.h> 92#include <vm/uma.h> 93#include <sys/pcpu.h> 94#include <sys/sched.h> 95#ifdef SMP 96#include <sys/smp.h> 97#endif 98 99#include <machine/cache.h> 100#include <machine/md_var.h> 101 102#if defined(DIAGNOSTIC) 103#define PMAP_DIAGNOSTIC 104#endif 105 106#undef PMAP_DEBUG 107 108#ifndef PMAP_SHPGPERPROC 109#define PMAP_SHPGPERPROC 200 110#endif 111 112#if !defined(PMAP_DIAGNOSTIC) 113#define PMAP_INLINE __inline 114#else 115#define PMAP_INLINE 116#endif 117 118/* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121#define pmap_pde(m, v) (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT])) 122#define segtab_pde(m, v) (m[(vm_offset_t)(v) >> SEGSHIFT]) 123 124#define pmap_pte_w(pte) ((*(int *)pte & PTE_W) != 0) 125#define pmap_pde_v(pte) ((*(int *)pte) != 0) 126#define pmap_pte_m(pte) ((*(int *)pte & PTE_M) != 0) 127#define pmap_pte_v(pte) ((*(int *)pte & PTE_V) != 0) 128 129#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W)) 130#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 131 132#define MIPS_SEGSIZE (1L << SEGSHIFT) 133#define mips_segtrunc(va) ((va) & ~(MIPS_SEGSIZE-1)) 134#define pmap_TLB_invalidate_all() MIPS_TBIAP() 135#define pmap_va_asid(pmap, va) ((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT)) 136#define is_kernel_pmap(x) ((x) == kernel_pmap) 137 138struct pmap kernel_pmap_store; 139pd_entry_t *kernel_segmap; 140 141vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 142vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 143 144static int nkpt; 145unsigned pmap_max_asid; /* max ASID supported by the system */ 146 147 148#define PMAP_ASID_RESERVED 0 149 150 151vm_offset_t kernel_vm_end; 152 153static struct tlb tlbstash[MAXCPU][MIPS_MAX_TLB_ENTRIES]; 154 155static void pmap_asid_alloc(pmap_t pmap); 156 157/* 158 * Data for the pv entry allocation mechanism 159 */ 160static uma_zone_t pvzone; 161static struct vm_object pvzone_obj; 162static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 163 164static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 165static pv_entry_t get_pv_entry(pmap_t locked_pmap); 166static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); 167 168static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 169 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 170static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va); 171static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 172static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 173static boolean_t pmap_testbit(vm_page_t m, int bit); 174static void 175pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, 176 vm_page_t m, boolean_t wired); 177static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 178 vm_offset_t va, vm_page_t m); 179 180static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 181 182static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 183static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 184static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); 185static void pmap_TLB_invalidate_kernel(vm_offset_t); 186static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t); 187 188#ifdef SMP 189static void pmap_invalidate_page_action(void *arg); 190static void pmap_invalidate_all_action(void *arg); 191static void pmap_update_page_action(void *arg); 192 193#endif 194 195struct local_sysmaps { 196 struct mtx lock; 197 vm_offset_t base; 198 uint16_t valid1, valid2; 199}; 200 201/* This structure is for large memory 202 * above 512Meg. We can't (in 32 bit mode) 203 * just use the direct mapped MIPS_KSEG0_TO_PHYS() 204 * macros since we can't see the memory and must 205 * map it in when we need to access it. In 64 206 * bit mode this goes away. 207 */ 208static struct local_sysmaps sysmap_lmem[MAXCPU]; 209caddr_t virtual_sys_start = (caddr_t)0; 210 211#define PMAP_LMEM_MAP1(va, phys) \ 212 int cpu; \ 213 struct local_sysmaps *sysm; \ 214 pt_entry_t *pte, npte; \ 215 \ 216 cpu = PCPU_GET(cpuid); \ 217 sysm = &sysmap_lmem[cpu]; \ 218 PMAP_LGMEM_LOCK(sysm); \ 219 intr = intr_disable(); \ 220 sched_pin(); \ 221 va = sysm->base; \ 222 npte = mips_paddr_to_tlbpfn(phys) | \ 223 PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ 224 pte = pmap_pte(kernel_pmap, va); \ 225 *pte = npte; \ 226 sysm->valid1 = 1; 227 228#define PMAP_LMEM_MAP2(va1, phys1, va2, phys2) \ 229 int cpu; \ 230 struct local_sysmaps *sysm; \ 231 pt_entry_t *pte, npte; \ 232 \ 233 cpu = PCPU_GET(cpuid); \ 234 sysm = &sysmap_lmem[cpu]; \ 235 PMAP_LGMEM_LOCK(sysm); \ 236 intr = intr_disable(); \ 237 sched_pin(); \ 238 va1 = sysm->base; \ 239 va2 = sysm->base + PAGE_SIZE; \ 240 npte = mips_paddr_to_tlbpfn(phys2) | \ 241 PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ 242 pte = pmap_pte(kernel_pmap, va1); \ 243 *pte = npte; \ 244 npte = mips_paddr_to_tlbpfn(phys2) | \ 245 PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ 246 pte = pmap_pte(kernel_pmap, va2); \ 247 *pte = npte; \ 248 sysm->valid1 = 1; \ 249 sysm->valid2 = 1; 250 251#define PMAP_LMEM_UNMAP() \ 252 pte = pmap_pte(kernel_pmap, sysm->base); \ 253 *pte = PTE_G; \ 254 pmap_TLB_invalidate_kernel(sysm->base); \ 255 sysm->valid1 = 0; \ 256 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); \ 257 *pte = PTE_G; \ 258 pmap_TLB_invalidate_kernel(sysm->base + PAGE_SIZE); \ 259 sysm->valid2 = 0; \ 260 sched_unpin(); \ 261 intr_restore(intr); \ 262 PMAP_LGMEM_UNLOCK(sysm); 263 264pd_entry_t 265pmap_segmap(pmap_t pmap, vm_offset_t va) 266{ 267 if (pmap->pm_segtab) 268 return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]); 269 else 270 return ((pd_entry_t)0); 271} 272 273/* 274 * Routine: pmap_pte 275 * Function: 276 * Extract the page table entry associated 277 * with the given map/virtual_address pair. 278 */ 279pt_entry_t * 280pmap_pte(pmap_t pmap, vm_offset_t va) 281{ 282 pt_entry_t *pdeaddr; 283 284 if (pmap) { 285 pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va); 286 if (pdeaddr) { 287 return pdeaddr + vad_to_pte_offset(va); 288 } 289 } 290 return ((pt_entry_t *)0); 291} 292 293 294vm_offset_t 295pmap_steal_memory(vm_size_t size) 296{ 297 vm_size_t bank_size; 298 vm_offset_t pa, va; 299 300 size = round_page(size); 301 302 bank_size = phys_avail[1] - phys_avail[0]; 303 while (size > bank_size) { 304 int i; 305 306 for (i = 0; phys_avail[i + 2]; i += 2) { 307 phys_avail[i] = phys_avail[i + 2]; 308 phys_avail[i + 1] = phys_avail[i + 3]; 309 } 310 phys_avail[i] = 0; 311 phys_avail[i + 1] = 0; 312 if (!phys_avail[0]) 313 panic("pmap_steal_memory: out of memory"); 314 bank_size = phys_avail[1] - phys_avail[0]; 315 } 316 317 pa = phys_avail[0]; 318 phys_avail[0] += size; 319 if (pa >= MIPS_KSEG0_LARGEST_PHYS) { 320 panic("Out of memory below 512Meg?"); 321 } 322 va = MIPS_PHYS_TO_KSEG0(pa); 323 bzero((caddr_t)va, size); 324 return va; 325} 326 327/* 328 * Bootstrap the system enough to run with virtual memory. This 329 * assumes that the phys_avail array has been initialized. 330 */ 331void 332pmap_bootstrap(void) 333{ 334 pt_entry_t *pgtab; 335 pt_entry_t *pte; 336 int i, j; 337 int memory_larger_than_512meg = 0; 338 339 /* Sort. */ 340again: 341 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 342 /* 343 * Keep the memory aligned on page boundary. 344 */ 345 phys_avail[i] = round_page(phys_avail[i]); 346 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 347 348 if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS) 349 memory_larger_than_512meg++; 350 if (i < 2) 351 continue; 352 if (phys_avail[i - 2] > phys_avail[i]) { 353 vm_paddr_t ptemp[2]; 354 355 356 ptemp[0] = phys_avail[i + 0]; 357 ptemp[1] = phys_avail[i + 1]; 358 359 phys_avail[i + 0] = phys_avail[i - 2]; 360 phys_avail[i + 1] = phys_avail[i - 1]; 361 362 phys_avail[i - 2] = ptemp[0]; 363 phys_avail[i - 1] = ptemp[1]; 364 goto again; 365 } 366 } 367 368 /* 369 * Copy the phys_avail[] array before we start stealing memory from it. 370 */ 371 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 372 physmem_desc[i] = phys_avail[i]; 373 physmem_desc[i + 1] = phys_avail[i + 1]; 374 } 375 376 Maxmem = atop(phys_avail[i - 1]); 377 378 if (bootverbose) { 379 printf("Physical memory chunk(s):\n"); 380 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 381 vm_paddr_t size; 382 383 size = phys_avail[i + 1] - phys_avail[i]; 384 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 385 (uintmax_t) phys_avail[i], 386 (uintmax_t) phys_avail[i + 1] - 1, 387 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 388 } 389 printf("Maxmem is 0x%0lx\n", ptoa(Maxmem)); 390 } 391 /* 392 * Steal the message buffer from the beginning of memory. 393 */ 394 msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); 395 msgbufinit(msgbufp, MSGBUF_SIZE); 396 397 /* 398 * Steal thread0 kstack. 399 */ 400 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 401 402 403 virtual_avail = VM_MIN_KERNEL_ADDRESS; 404 virtual_end = VM_MAX_KERNEL_ADDRESS; 405 406#ifdef SMP 407 /* 408 * Steal some virtual address space to map the pcpu area. 409 */ 410 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 411 pcpup = (struct pcpu *)virtual_avail; 412 virtual_avail += PAGE_SIZE * 2; 413 414 /* 415 * Initialize the wired TLB entry mapping the pcpu region for 416 * the BSP at 'pcpup'. Up until this point we were operating 417 * with the 'pcpup' for the BSP pointing to a virtual address 418 * in KSEG0 so there was no need for a TLB mapping. 419 */ 420 mips_pcpu_tlb_init(PCPU_ADDR(0)); 421 422 if (bootverbose) 423 printf("pcpu is available at virtual address %p.\n", pcpup); 424#endif 425 426 /* 427 * Steal some virtual space that will not be in kernel_segmap. This 428 * va memory space will be used to map in kernel pages that are 429 * outside the 512Meg region. Note that we only do this steal when 430 * we do have memory in this region, that way for systems with 431 * smaller memory we don't "steal" any va ranges :-) 432 */ 433 if (memory_larger_than_512meg) { 434 for (i = 0; i < MAXCPU; i++) { 435 sysmap_lmem[i].base = virtual_avail; 436 virtual_avail += PAGE_SIZE * 2; 437 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 438 PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]); 439 } 440 } 441 virtual_sys_start = (caddr_t)virtual_avail; 442 /* 443 * Allocate segment table for the kernel 444 */ 445 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 446 447 /* 448 * Allocate second level page tables for the kernel 449 */ 450 nkpt = NKPT; 451 if (memory_larger_than_512meg) { 452 /* 453 * If we have a large memory system we CANNOT afford to hit 454 * pmap_growkernel() and allocate memory. Since we MAY end 455 * up with a page that is NOT mappable. For that reason we 456 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h) 457 * this gives us 480meg of kernel virtual addresses at the 458 * cost of 120 pages (each page gets us 4 Meg). Since the 459 * kernel starts at virtual_avail, we can use this to 460 * calculate how many entris are left from there to the end 461 * of the segmap, we want to allocate all of it, which would 462 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results 463 * in about 256 entries or so instead of the 120. 464 */ 465 nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT); 466 } 467 pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt); 468 469 /* 470 * The R[4-7]?00 stores only one copy of the Global bit in the 471 * translation lookaside buffer for each 2 page entry. Thus invalid 472 * entrys must have the Global bit set so when Entry LO and Entry HI 473 * G bits are anded together they will produce a global bit to store 474 * in the tlb. 475 */ 476 for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++) 477 *pte = PTE_G; 478 479 /* 480 * The segment table contains the KVA of the pages in the second 481 * level page table. 482 */ 483 for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++) 484 kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG)); 485 486 /* 487 * The kernel's pmap is statically allocated so we don't have to use 488 * pmap_create, which is unlikely to work correctly at this part of 489 * the boot sequence (XXX and which no longer exists). 490 */ 491 PMAP_LOCK_INIT(kernel_pmap); 492 kernel_pmap->pm_segtab = kernel_segmap; 493 kernel_pmap->pm_active = ~0; 494 TAILQ_INIT(&kernel_pmap->pm_pvlist); 495 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 496 kernel_pmap->pm_asid[0].gen = 0; 497 pmap_max_asid = VMNUM_PIDS; 498 MachSetPID(0); 499} 500 501/* 502 * Initialize a vm_page's machine-dependent fields. 503 */ 504void 505pmap_page_init(vm_page_t m) 506{ 507 508 TAILQ_INIT(&m->md.pv_list); 509 m->md.pv_list_count = 0; 510 m->md.pv_flags = 0; 511} 512 513/* 514 * Initialize the pmap module. 515 * Called by vm_init, to initialize any structures that the pmap 516 * system needs to map virtual memory. 517 * pmap_init has been enhanced to support in a fairly consistant 518 * way, discontiguous physical memory. 519 */ 520void 521pmap_init(void) 522{ 523 524 /* 525 * Initialize the address space (zone) for the pv entries. Set a 526 * high water mark so that the system can recover from excessive 527 * numbers of pv entries. 528 */ 529 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 530 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 531 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; 532 pv_entry_high_water = 9 * (pv_entry_max / 10); 533 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 534} 535 536/*************************************************** 537 * Low level helper routines..... 538 ***************************************************/ 539 540#if defined(PMAP_DIAGNOSTIC) 541 542/* 543 * This code checks for non-writeable/modified pages. 544 * This should be an invalid condition. 545 */ 546static int 547pmap_nw_modified(pt_entry_t pte) 548{ 549 if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO)) 550 return (1); 551 else 552 return (0); 553} 554 555#endif 556 557static void 558pmap_invalidate_all(pmap_t pmap) 559{ 560#ifdef SMP 561 smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap); 562} 563 564static void 565pmap_invalidate_all_action(void *arg) 566{ 567 pmap_t pmap = (pmap_t)arg; 568 569#endif 570 571 if (pmap->pm_active & PCPU_GET(cpumask)) { 572 pmap_TLB_invalidate_all(); 573 } else 574 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 575} 576 577struct pmap_invalidate_page_arg { 578 pmap_t pmap; 579 vm_offset_t va; 580}; 581 582static __inline void 583pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 584{ 585#ifdef SMP 586 struct pmap_invalidate_page_arg arg; 587 588 arg.pmap = pmap; 589 arg.va = va; 590 591 smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); 592} 593 594static void 595pmap_invalidate_page_action(void *arg) 596{ 597 pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; 598 vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; 599 600#endif 601 602 if (is_kernel_pmap(pmap)) { 603 pmap_TLB_invalidate_kernel(va); 604 return; 605 } 606 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 607 return; 608 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 609 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 610 return; 611 } 612 va = pmap_va_asid(pmap, (va & ~PAGE_MASK)); 613 mips_TBIS(va); 614} 615 616static void 617pmap_TLB_invalidate_kernel(vm_offset_t va) 618{ 619 u_int32_t pid; 620 621 MachTLBGetPID(pid); 622 va = va | (pid << VMTLB_PID_SHIFT); 623 mips_TBIS(va); 624} 625 626struct pmap_update_page_arg { 627 pmap_t pmap; 628 vm_offset_t va; 629 pt_entry_t pte; 630}; 631 632void 633pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 634{ 635#ifdef SMP 636 struct pmap_update_page_arg arg; 637 638 arg.pmap = pmap; 639 arg.va = va; 640 arg.pte = pte; 641 642 smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg); 643} 644 645static void 646pmap_update_page_action(void *arg) 647{ 648 pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap; 649 vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va; 650 pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte; 651 652#endif 653 if (is_kernel_pmap(pmap)) { 654 pmap_TLB_update_kernel(va, pte); 655 return; 656 } 657 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) 658 return; 659 else if (!(pmap->pm_active & PCPU_GET(cpumask))) { 660 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; 661 return; 662 } 663 va = pmap_va_asid(pmap, (va & ~PAGE_MASK)); 664 MachTLBUpdate(va, pte); 665} 666 667static void 668pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte) 669{ 670 u_int32_t pid; 671 672 va &= ~PAGE_MASK; 673 674 MachTLBGetPID(pid); 675 va = va | (pid << VMTLB_PID_SHIFT); 676 677 MachTLBUpdate(va, pte); 678} 679 680/* 681 * Routine: pmap_extract 682 * Function: 683 * Extract the physical page address associated 684 * with the given map/virtual_address pair. 685 */ 686vm_paddr_t 687pmap_extract(pmap_t pmap, vm_offset_t va) 688{ 689 pt_entry_t *pte; 690 vm_offset_t retval = 0; 691 692 PMAP_LOCK(pmap); 693 pte = pmap_pte(pmap, va); 694 if (pte) { 695 retval = mips_tlbpfn_to_paddr(*pte) | (va & PAGE_MASK); 696 } 697 PMAP_UNLOCK(pmap); 698 return retval; 699} 700 701/* 702 * Routine: pmap_extract_and_hold 703 * Function: 704 * Atomically extract and hold the physical page 705 * with the given pmap and virtual address pair 706 * if that mapping permits the given protection. 707 */ 708vm_page_t 709pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 710{ 711 pt_entry_t pte; 712 vm_page_t m; 713 714 m = NULL; 715 vm_page_lock_queues(); 716 PMAP_LOCK(pmap); 717 718 pte = *pmap_pte(pmap, va); 719 if (pte != 0 && pmap_pte_v(&pte) && 720 ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) { 721 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte)); 722 vm_page_hold(m); 723 } 724 vm_page_unlock_queues(); 725 PMAP_UNLOCK(pmap); 726 return (m); 727} 728 729/*************************************************** 730 * Low level mapping routines..... 731 ***************************************************/ 732 733/* 734 * add a wired page to the kva 735 */ 736 /* PMAP_INLINE */ void 737pmap_kenter(vm_offset_t va, vm_paddr_t pa) 738{ 739 register pt_entry_t *pte; 740 pt_entry_t npte, opte; 741 742#ifdef PMAP_DEBUG 743 printf("pmap_kenter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 744#endif 745 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W; 746 747 if (is_cacheable_mem(pa)) 748 npte |= PTE_CACHE; 749 else 750 npte |= PTE_UNCACHED; 751 752 pte = pmap_pte(kernel_pmap, va); 753 opte = *pte; 754 *pte = npte; 755 756 pmap_update_page(kernel_pmap, va, npte); 757} 758 759/* 760 * remove a page from the kernel pagetables 761 */ 762 /* PMAP_INLINE */ void 763pmap_kremove(vm_offset_t va) 764{ 765 register pt_entry_t *pte; 766 767 /* 768 * Write back all caches from the page being destroyed 769 */ 770 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 771 772 pte = pmap_pte(kernel_pmap, va); 773 *pte = PTE_G; 774 pmap_invalidate_page(kernel_pmap, va); 775} 776 777/* 778 * Used to map a range of physical addresses into kernel 779 * virtual address space. 780 * 781 * The value passed in '*virt' is a suggested virtual address for 782 * the mapping. Architectures which can support a direct-mapped 783 * physical to virtual region can return the appropriate address 784 * within that region, leaving '*virt' unchanged. Other 785 * architectures should map the pages starting at '*virt' and 786 * update '*virt' with the first usable address after the mapped 787 * region. 788 */ 789vm_offset_t 790pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 791{ 792 vm_offset_t va, sva; 793 794 va = sva = *virt; 795 while (start < end) { 796 pmap_kenter(va, start); 797 va += PAGE_SIZE; 798 start += PAGE_SIZE; 799 } 800 *virt = va; 801 return (sva); 802} 803 804/* 805 * Add a list of wired pages to the kva 806 * this routine is only used for temporary 807 * kernel mappings that do not need to have 808 * page modification or references recorded. 809 * Note that old mappings are simply written 810 * over. The page *must* be wired. 811 */ 812void 813pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 814{ 815 int i; 816 vm_offset_t origva = va; 817 818 for (i = 0; i < count; i++) { 819 pmap_flush_pvcache(m[i]); 820 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 821 va += PAGE_SIZE; 822 } 823 824 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 825} 826 827/* 828 * this routine jerks page mappings from the 829 * kernel -- it is meant only for temporary mappings. 830 */ 831void 832pmap_qremove(vm_offset_t va, int count) 833{ 834 /* 835 * No need to wb/inv caches here, 836 * pmap_kremove will do it for us 837 */ 838 839 while (count-- > 0) { 840 pmap_kremove(va); 841 va += PAGE_SIZE; 842 } 843} 844 845/*************************************************** 846 * Page table page management routines..... 847 ***************************************************/ 848 849/* Revision 1.507 850 * 851 * Simplify the reference counting of page table pages. Specifically, use 852 * the page table page's wired count rather than its hold count to contain 853 * the reference count. 854 */ 855 856/* 857 * This routine unholds page table pages, and if the hold count 858 * drops to zero, then it decrements the wire count. 859 */ 860static int 861_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 862{ 863 vm_offset_t pteva; 864 865 /* 866 * unmap the page table page 867 */ 868 pteva = (vm_offset_t)pmap->pm_segtab[m->pindex]; 869 if (pteva >= VM_MIN_KERNEL_ADDRESS) { 870 pmap_kremove(pteva); 871 kmem_free(kernel_map, pteva, PAGE_SIZE); 872 } else { 873 KASSERT(MIPS_IS_KSEG0_ADDR(pteva), 874 ("_pmap_unwire_pte_hold: 0x%0lx is not in kseg0", 875 (long)pteva)); 876 } 877 878 pmap->pm_segtab[m->pindex] = 0; 879 --pmap->pm_stats.resident_count; 880 881 if (pmap->pm_ptphint == m) 882 pmap->pm_ptphint = NULL; 883 884 /* 885 * If the page is finally unwired, simply free it. 886 */ 887 vm_page_free_zero(m); 888 atomic_subtract_int(&cnt.v_wire_count, 1); 889 return (1); 890} 891 892static PMAP_INLINE int 893pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 894{ 895 --m->wire_count; 896 if (m->wire_count == 0) 897 return (_pmap_unwire_pte_hold(pmap, m)); 898 else 899 return (0); 900} 901 902/* 903 * After removing a page table entry, this routine is used to 904 * conditionally free the page, and manage the hold/wire counts. 905 */ 906static int 907pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 908{ 909 unsigned ptepindex; 910 pd_entry_t pteva; 911 912 if (va >= VM_MAXUSER_ADDRESS) 913 return (0); 914 915 if (mpte == NULL) { 916 ptepindex = (va >> SEGSHIFT); 917 if (pmap->pm_ptphint && 918 (pmap->pm_ptphint->pindex == ptepindex)) { 919 mpte = pmap->pm_ptphint; 920 } else { 921 pteva = *pmap_pde(pmap, va); 922 mpte = PHYS_TO_VM_PAGE(vtophys(pteva)); 923 pmap->pm_ptphint = mpte; 924 } 925 } 926 return pmap_unwire_pte_hold(pmap, mpte); 927} 928 929void 930pmap_pinit0(pmap_t pmap) 931{ 932 int i; 933 934 PMAP_LOCK_INIT(pmap); 935 pmap->pm_segtab = kernel_segmap; 936 pmap->pm_active = 0; 937 pmap->pm_ptphint = NULL; 938 for (i = 0; i < MAXCPU; i++) { 939 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 940 pmap->pm_asid[i].gen = 0; 941 } 942 PCPU_SET(curpmap, pmap); 943 TAILQ_INIT(&pmap->pm_pvlist); 944 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 945} 946 947/* 948 * Initialize a preallocated and zeroed pmap structure, 949 * such as one in a vmspace structure. 950 */ 951int 952pmap_pinit(pmap_t pmap) 953{ 954 vm_offset_t ptdva; 955 vm_paddr_t ptdpa; 956 vm_page_t ptdpg; 957 int i; 958 int req; 959 960 PMAP_LOCK_INIT(pmap); 961 962 req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | 963 VM_ALLOC_ZERO; 964 965 /* 966 * allocate the page directory page 967 */ 968 while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL) 969 VM_WAIT; 970 971 ptdpg->valid = VM_PAGE_BITS_ALL; 972 973 ptdpa = VM_PAGE_TO_PHYS(ptdpg); 974 if (ptdpa < MIPS_KSEG0_LARGEST_PHYS) { 975 ptdva = MIPS_PHYS_TO_KSEG0(ptdpa); 976 } else { 977 ptdva = kmem_alloc_nofault(kernel_map, PAGE_SIZE); 978 if (ptdva == 0) 979 panic("pmap_pinit: unable to allocate kva"); 980 pmap_kenter(ptdva, ptdpa); 981 } 982 983 pmap->pm_segtab = (pd_entry_t *)ptdva; 984 if ((ptdpg->flags & PG_ZERO) == 0) 985 bzero(pmap->pm_segtab, PAGE_SIZE); 986 987 pmap->pm_active = 0; 988 pmap->pm_ptphint = NULL; 989 for (i = 0; i < MAXCPU; i++) { 990 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 991 pmap->pm_asid[i].gen = 0; 992 } 993 TAILQ_INIT(&pmap->pm_pvlist); 994 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 995 996 return (1); 997} 998 999/* 1000 * this routine is called if the page table page is not 1001 * mapped correctly. 1002 */ 1003static vm_page_t 1004_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1005{ 1006 vm_offset_t pteva, ptepa; 1007 vm_page_t m; 1008 int req; 1009 1010 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1011 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1012 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1013 1014 req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ; 1015 /* 1016 * Find or fabricate a new pagetable page 1017 */ 1018 if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) { 1019 if (flags & M_WAITOK) { 1020 PMAP_UNLOCK(pmap); 1021 vm_page_unlock_queues(); 1022 VM_WAIT; 1023 vm_page_lock_queues(); 1024 PMAP_LOCK(pmap); 1025 } 1026 /* 1027 * Indicate the need to retry. While waiting, the page 1028 * table page may have been allocated. 1029 */ 1030 return (NULL); 1031 } 1032 if ((m->flags & PG_ZERO) == 0) 1033 pmap_zero_page(m); 1034 1035 KASSERT(m->queue == PQ_NONE, 1036 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1037 1038 /* 1039 * Map the pagetable page into the process address space, if it 1040 * isn't already there. 1041 */ 1042 1043 pmap->pm_stats.resident_count++; 1044 1045 ptepa = VM_PAGE_TO_PHYS(m); 1046 if (ptepa < MIPS_KSEG0_LARGEST_PHYS) { 1047 pteva = MIPS_PHYS_TO_KSEG0(ptepa); 1048 } else { 1049 pteva = kmem_alloc_nofault(kernel_map, PAGE_SIZE); 1050 if (pteva == 0) 1051 panic("_pmap_allocpte: unable to allocate kva"); 1052 pmap_kenter(pteva, ptepa); 1053 } 1054 1055 pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva; 1056 1057 /* 1058 * Set the page table hint 1059 */ 1060 pmap->pm_ptphint = m; 1061 1062 /* 1063 * Kernel page tables are allocated in pmap_bootstrap() or 1064 * pmap_growkernel(). 1065 */ 1066 if (is_kernel_pmap(pmap)) 1067 panic("_pmap_allocpte() called for kernel pmap\n"); 1068 1069 m->valid = VM_PAGE_BITS_ALL; 1070 vm_page_flag_clear(m, PG_ZERO); 1071 1072 return (m); 1073} 1074 1075static vm_page_t 1076pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1077{ 1078 unsigned ptepindex; 1079 vm_offset_t pteva; 1080 vm_page_t m; 1081 1082 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1083 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1084 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1085 1086 /* 1087 * Calculate pagetable page index 1088 */ 1089 ptepindex = va >> SEGSHIFT; 1090retry: 1091 /* 1092 * Get the page directory entry 1093 */ 1094 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1095 1096 /* 1097 * If the page table page is mapped, we just increment the hold 1098 * count, and activate it. 1099 */ 1100 if (pteva) { 1101 /* 1102 * In order to get the page table page, try the hint first. 1103 */ 1104 if (pmap->pm_ptphint && 1105 (pmap->pm_ptphint->pindex == ptepindex)) { 1106 m = pmap->pm_ptphint; 1107 } else { 1108 m = PHYS_TO_VM_PAGE(vtophys(pteva)); 1109 pmap->pm_ptphint = m; 1110 } 1111 m->wire_count++; 1112 } else { 1113 /* 1114 * Here if the pte page isn't mapped, or if it has been 1115 * deallocated. 1116 */ 1117 m = _pmap_allocpte(pmap, ptepindex, flags); 1118 if (m == NULL && (flags & M_WAITOK)) 1119 goto retry; 1120 } 1121 return m; 1122} 1123 1124 1125/*************************************************** 1126* Pmap allocation/deallocation routines. 1127 ***************************************************/ 1128/* 1129 * Revision 1.397 1130 * - Merged pmap_release and pmap_release_free_page. When pmap_release is 1131 * called only the page directory page(s) can be left in the pmap pte 1132 * object, since all page table pages will have been freed by 1133 * pmap_remove_pages and pmap_remove. In addition, there can only be one 1134 * reference to the pmap and the page directory is wired, so the page(s) 1135 * can never be busy. So all there is to do is clear the magic mappings 1136 * from the page directory and free the page(s). 1137 */ 1138 1139 1140/* 1141 * Release any resources held by the given physical map. 1142 * Called when a pmap initialized by pmap_pinit is being released. 1143 * Should only be called if the map contains no valid mappings. 1144 */ 1145void 1146pmap_release(pmap_t pmap) 1147{ 1148 vm_offset_t ptdva; 1149 vm_page_t ptdpg; 1150 1151 KASSERT(pmap->pm_stats.resident_count == 0, 1152 ("pmap_release: pmap resident count %ld != 0", 1153 pmap->pm_stats.resident_count)); 1154 1155 ptdva = (vm_offset_t)pmap->pm_segtab; 1156 ptdpg = PHYS_TO_VM_PAGE(vtophys(ptdva)); 1157 1158 if (ptdva >= VM_MIN_KERNEL_ADDRESS) { 1159 pmap_kremove(ptdva); 1160 kmem_free(kernel_map, ptdva, PAGE_SIZE); 1161 } else { 1162 KASSERT(MIPS_IS_KSEG0_ADDR(ptdva), 1163 ("pmap_release: 0x%0lx is not in kseg0", (long)ptdva)); 1164 } 1165 1166 ptdpg->wire_count--; 1167 atomic_subtract_int(&cnt.v_wire_count, 1); 1168 vm_page_free_zero(ptdpg); 1169 PMAP_LOCK_DESTROY(pmap); 1170} 1171 1172/* 1173 * grow the number of kernel page table entries, if needed 1174 */ 1175void 1176pmap_growkernel(vm_offset_t addr) 1177{ 1178 vm_offset_t ptppaddr; 1179 vm_page_t nkpg; 1180 pt_entry_t *pte; 1181 int i, req; 1182 1183 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1184 if (kernel_vm_end == 0) { 1185 kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 1186 nkpt = 0; 1187 while (segtab_pde(kernel_segmap, kernel_vm_end)) { 1188 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1189 ~(PAGE_SIZE * NPTEPG - 1); 1190 nkpt++; 1191 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1192 kernel_vm_end = kernel_map->max_offset; 1193 break; 1194 } 1195 } 1196 } 1197 addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1198 if (addr - 1 >= kernel_map->max_offset) 1199 addr = kernel_map->max_offset; 1200 while (kernel_vm_end < addr) { 1201 if (segtab_pde(kernel_segmap, kernel_vm_end)) { 1202 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1203 ~(PAGE_SIZE * NPTEPG - 1); 1204 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1205 kernel_vm_end = kernel_map->max_offset; 1206 break; 1207 } 1208 continue; 1209 } 1210 /* 1211 * This index is bogus, but out of the way 1212 */ 1213 req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 1214 nkpg = vm_page_alloc(NULL, nkpt, req); 1215 if (!nkpg) 1216 panic("pmap_growkernel: no memory to grow kernel"); 1217 1218 nkpt++; 1219 1220 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1221 if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) { 1222 /* 1223 * We need to do something here, but I am not sure 1224 * what. We can access anything in the 0 - 512Meg 1225 * region, but if we get a page to go in the kernel 1226 * segmap that is outside of of that we really need 1227 * to have another mapping beyond the temporary ones 1228 * I have. Not sure how to do this yet. FIXME FIXME. 1229 */ 1230 panic("Gak, can't handle a k-page table outside of lower 512Meg"); 1231 } 1232 pte = (pt_entry_t *)MIPS_PHYS_TO_KSEG0(ptppaddr); 1233 segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte; 1234 1235 /* 1236 * The R[4-7]?00 stores only one copy of the Global bit in 1237 * the translation lookaside buffer for each 2 page entry. 1238 * Thus invalid entrys must have the Global bit set so when 1239 * Entry LO and Entry HI G bits are anded together they will 1240 * produce a global bit to store in the tlb. 1241 */ 1242 for (i = 0; i < NPTEPG; i++, pte++) 1243 *pte = PTE_G; 1244 1245 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1246 ~(PAGE_SIZE * NPTEPG - 1); 1247 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1248 kernel_vm_end = kernel_map->max_offset; 1249 break; 1250 } 1251 } 1252} 1253 1254/*************************************************** 1255* page management routines. 1256 ***************************************************/ 1257 1258/* 1259 * free the pv_entry back to the free list 1260 */ 1261static PMAP_INLINE void 1262free_pv_entry(pv_entry_t pv) 1263{ 1264 1265 pv_entry_count--; 1266 uma_zfree(pvzone, pv); 1267} 1268 1269/* 1270 * get a new pv_entry, allocating a block from the system 1271 * when needed. 1272 * the memory allocation is performed bypassing the malloc code 1273 * because of the possibility of allocations at interrupt time. 1274 */ 1275static pv_entry_t 1276get_pv_entry(pmap_t locked_pmap) 1277{ 1278 static const struct timeval printinterval = { 60, 0 }; 1279 static struct timeval lastprint; 1280 struct vpgqueues *vpq; 1281 pt_entry_t *pte, oldpte; 1282 pmap_t pmap; 1283 pv_entry_t allocated_pv, next_pv, pv; 1284 vm_offset_t va; 1285 vm_page_t m; 1286 1287 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1288 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1289 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 1290 if (allocated_pv != NULL) { 1291 pv_entry_count++; 1292 if (pv_entry_count > pv_entry_high_water) 1293 pagedaemon_wakeup(); 1294 else 1295 return (allocated_pv); 1296 } 1297 /* 1298 * Reclaim pv entries: At first, destroy mappings to inactive 1299 * pages. After that, if a pv entry is still needed, destroy 1300 * mappings to active pages. 1301 */ 1302 if (ratecheck(&lastprint, &printinterval)) 1303 printf("Approaching the limit on PV entries, " 1304 "increase the vm.pmap.shpgperproc tunable.\n"); 1305 vpq = &vm_page_queues[PQ_INACTIVE]; 1306retry: 1307 TAILQ_FOREACH(m, &vpq->pl, pageq) { 1308 if (m->hold_count || m->busy) 1309 continue; 1310 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 1311 va = pv->pv_va; 1312 pmap = pv->pv_pmap; 1313 /* Avoid deadlock and lock recursion. */ 1314 if (pmap > locked_pmap) 1315 PMAP_LOCK(pmap); 1316 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 1317 continue; 1318 pmap->pm_stats.resident_count--; 1319 pte = pmap_pte(pmap, va); 1320 KASSERT(pte != NULL, ("pte")); 1321 oldpte = loadandclear((u_int *)pte); 1322 if (is_kernel_pmap(pmap)) 1323 *pte = PTE_G; 1324 KASSERT((oldpte & PTE_W) == 0, 1325 ("wired pte for unwired page")); 1326 if (m->md.pv_flags & PV_TABLE_REF) 1327 vm_page_flag_set(m, PG_REFERENCED); 1328 if (oldpte & PTE_M) 1329 vm_page_dirty(m); 1330 pmap_invalidate_page(pmap, va); 1331 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1332 m->md.pv_list_count--; 1333 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1334 if (TAILQ_EMPTY(&m->md.pv_list)) { 1335 vm_page_flag_clear(m, PG_WRITEABLE); 1336 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1337 } 1338 pmap_unuse_pt(pmap, va, pv->pv_ptem); 1339 if (pmap != locked_pmap) 1340 PMAP_UNLOCK(pmap); 1341 if (allocated_pv == NULL) 1342 allocated_pv = pv; 1343 else 1344 free_pv_entry(pv); 1345 } 1346 } 1347 if (allocated_pv == NULL) { 1348 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 1349 vpq = &vm_page_queues[PQ_ACTIVE]; 1350 goto retry; 1351 } 1352 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 1353 } 1354 return (allocated_pv); 1355} 1356 1357/* 1358 * Revision 1.370 1359 * 1360 * Move pmap_collect() out of the machine-dependent code, rename it 1361 * to reflect its new location, and add page queue and flag locking. 1362 * 1363 * Notes: (1) alpha, i386, and ia64 had identical implementations 1364 * of pmap_collect() in terms of machine-independent interfaces; 1365 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO. 1366 * 1367 * MIPS implementation was identical to alpha [Junos 8.2] 1368 */ 1369 1370/* 1371 * If it is the first entry on the list, it is actually 1372 * in the header and we must copy the following entry up 1373 * to the header. Otherwise we must search the list for 1374 * the entry. In either case we free the now unused entry. 1375 */ 1376 1377static void 1378pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1379{ 1380 pv_entry_t pv; 1381 1382 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1383 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1384 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1385 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1386 if (pmap == pv->pv_pmap && va == pv->pv_va) 1387 break; 1388 } 1389 } else { 1390 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1391 if (va == pv->pv_va) 1392 break; 1393 } 1394 } 1395 1396 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 1397 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1398 m->md.pv_list_count--; 1399 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1400 vm_page_flag_clear(m, PG_WRITEABLE); 1401 1402 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1403 free_pv_entry(pv); 1404} 1405 1406/* 1407 * Create a pv entry for page at pa for 1408 * (pmap, va). 1409 */ 1410static void 1411pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1412 boolean_t wired) 1413{ 1414 pv_entry_t pv; 1415 1416 pv = get_pv_entry(pmap); 1417 pv->pv_va = va; 1418 pv->pv_pmap = pmap; 1419 pv->pv_ptem = mpte; 1420 pv->pv_wired = wired; 1421 1422 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1423 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1424 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1425 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1426 m->md.pv_list_count++; 1427} 1428 1429/* 1430 * Conditionally create a pv entry. 1431 */ 1432static boolean_t 1433pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1434 vm_page_t m) 1435{ 1436 pv_entry_t pv; 1437 1438 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1439 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1440 if (pv_entry_count < pv_entry_high_water && 1441 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 1442 pv_entry_count++; 1443 pv->pv_va = va; 1444 pv->pv_pmap = pmap; 1445 pv->pv_ptem = mpte; 1446 pv->pv_wired = FALSE; 1447 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1448 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1449 m->md.pv_list_count++; 1450 return (TRUE); 1451 } else 1452 return (FALSE); 1453} 1454 1455/* 1456 * pmap_remove_pte: do the things to unmap a page in a process 1457 */ 1458static int 1459pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1460{ 1461 pt_entry_t oldpte; 1462 vm_page_t m; 1463 vm_offset_t pa; 1464 1465 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1466 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1467 1468 oldpte = loadandclear((u_int *)ptq); 1469 if (is_kernel_pmap(pmap)) 1470 *ptq = PTE_G; 1471 1472 if (oldpte & PTE_W) 1473 pmap->pm_stats.wired_count -= 1; 1474 1475 pmap->pm_stats.resident_count -= 1; 1476 pa = mips_tlbpfn_to_paddr(oldpte); 1477 1478 if (page_is_managed(pa)) { 1479 m = PHYS_TO_VM_PAGE(pa); 1480 if (oldpte & PTE_M) { 1481#if defined(PMAP_DIAGNOSTIC) 1482 if (pmap_nw_modified(oldpte)) { 1483 printf( 1484 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1485 va, oldpte); 1486 } 1487#endif 1488 vm_page_dirty(m); 1489 } 1490 if (m->md.pv_flags & PV_TABLE_REF) 1491 vm_page_flag_set(m, PG_REFERENCED); 1492 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1493 1494 pmap_remove_entry(pmap, m, va); 1495 } 1496 return pmap_unuse_pt(pmap, va, NULL); 1497} 1498 1499/* 1500 * Remove a single page from a process address space 1501 */ 1502static void 1503pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1504{ 1505 register pt_entry_t *ptq; 1506 1507 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1508 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1509 ptq = pmap_pte(pmap, va); 1510 1511 /* 1512 * if there is no pte for this address, just skip it!!! 1513 */ 1514 if (!ptq || !pmap_pte_v(ptq)) { 1515 return; 1516 } 1517 1518 /* 1519 * Write back all caches from the page being destroyed 1520 */ 1521 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1522 1523 /* 1524 * get a local va for mappings for this pmap. 1525 */ 1526 (void)pmap_remove_pte(pmap, ptq, va); 1527 pmap_invalidate_page(pmap, va); 1528 1529 return; 1530} 1531 1532/* 1533 * Remove the given range of addresses from the specified map. 1534 * 1535 * It is assumed that the start and end are properly 1536 * rounded to the page size. 1537 */ 1538void 1539pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1540{ 1541 vm_offset_t va, nva; 1542 1543 if (pmap == NULL) 1544 return; 1545 1546 if (pmap->pm_stats.resident_count == 0) 1547 return; 1548 1549 vm_page_lock_queues(); 1550 PMAP_LOCK(pmap); 1551 1552 /* 1553 * special handling of removing one page. a very common operation 1554 * and easy to short circuit some code. 1555 */ 1556 if ((sva + PAGE_SIZE) == eva) { 1557 pmap_remove_page(pmap, sva); 1558 goto out; 1559 } 1560 for (va = sva; va < eva; va = nva) { 1561 if (!*pmap_pde(pmap, va)) { 1562 nva = mips_segtrunc(va + MIPS_SEGSIZE); 1563 continue; 1564 } 1565 pmap_remove_page(pmap, va); 1566 nva = va + PAGE_SIZE; 1567 } 1568 1569out: 1570 vm_page_unlock_queues(); 1571 PMAP_UNLOCK(pmap); 1572} 1573 1574/* 1575 * Routine: pmap_remove_all 1576 * Function: 1577 * Removes this physical page from 1578 * all physical maps in which it resides. 1579 * Reflects back modify bits to the pager. 1580 * 1581 * Notes: 1582 * Original versions of this routine were very 1583 * inefficient because they iteratively called 1584 * pmap_remove (slow...) 1585 */ 1586 1587void 1588pmap_remove_all(vm_page_t m) 1589{ 1590 register pv_entry_t pv; 1591 register pt_entry_t *pte, tpte; 1592 1593 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1594 ("pmap_remove_all: page %p is fictitious", m)); 1595 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1596 1597 if (m->md.pv_flags & PV_TABLE_REF) 1598 vm_page_flag_set(m, PG_REFERENCED); 1599 1600 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1601 PMAP_LOCK(pv->pv_pmap); 1602 1603 /* 1604 * If it's last mapping writeback all caches from 1605 * the page being destroyed 1606 */ 1607 if (m->md.pv_list_count == 1) 1608 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1609 1610 pv->pv_pmap->pm_stats.resident_count--; 1611 1612 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1613 1614 tpte = loadandclear((u_int *)pte); 1615 if (is_kernel_pmap(pv->pv_pmap)) 1616 *pte = PTE_G; 1617 1618 if (tpte & PTE_W) 1619 pv->pv_pmap->pm_stats.wired_count--; 1620 1621 /* 1622 * Update the vm_page_t clean and reference bits. 1623 */ 1624 if (tpte & PTE_M) { 1625#if defined(PMAP_DIAGNOSTIC) 1626 if (pmap_nw_modified(tpte)) { 1627 printf( 1628 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1629 pv->pv_va, tpte); 1630 } 1631#endif 1632 vm_page_dirty(m); 1633 } 1634 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1635 1636 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1637 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1638 m->md.pv_list_count--; 1639 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1640 PMAP_UNLOCK(pv->pv_pmap); 1641 free_pv_entry(pv); 1642 } 1643 1644 vm_page_flag_clear(m, PG_WRITEABLE); 1645 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD); 1646} 1647 1648/* 1649 * Set the physical protection on the 1650 * specified range of this map as requested. 1651 */ 1652void 1653pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1654{ 1655 pt_entry_t *pte; 1656 1657 if (pmap == NULL) 1658 return; 1659 1660 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1661 pmap_remove(pmap, sva, eva); 1662 return; 1663 } 1664 if (prot & VM_PROT_WRITE) 1665 return; 1666 1667 vm_page_lock_queues(); 1668 PMAP_LOCK(pmap); 1669 while (sva < eva) { 1670 pt_entry_t pbits, obits; 1671 vm_page_t m; 1672 vm_offset_t pa; 1673 1674 /* 1675 * If segment table entry is empty, skip this segment. 1676 */ 1677 if (!*pmap_pde(pmap, sva)) { 1678 sva = mips_segtrunc(sva + MIPS_SEGSIZE); 1679 continue; 1680 } 1681 /* 1682 * If pte is invalid, skip this page 1683 */ 1684 pte = pmap_pte(pmap, sva); 1685 if (!pmap_pte_v(pte)) { 1686 sva += PAGE_SIZE; 1687 continue; 1688 } 1689retry: 1690 obits = pbits = *pte; 1691 pa = mips_tlbpfn_to_paddr(pbits); 1692 1693 if (page_is_managed(pa)) { 1694 m = PHYS_TO_VM_PAGE(pa); 1695 if (m->md.pv_flags & PV_TABLE_REF) { 1696 vm_page_flag_set(m, PG_REFERENCED); 1697 m->md.pv_flags &= ~PV_TABLE_REF; 1698 } 1699 if (pbits & PTE_M) { 1700 vm_page_dirty(m); 1701 m->md.pv_flags &= ~PV_TABLE_MOD; 1702 } 1703 } 1704 pbits = (pbits & ~PTE_M) | PTE_RO; 1705 1706 if (pbits != *pte) { 1707 if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) 1708 goto retry; 1709 pmap_update_page(pmap, sva, pbits); 1710 } 1711 sva += PAGE_SIZE; 1712 } 1713 vm_page_unlock_queues(); 1714 PMAP_UNLOCK(pmap); 1715} 1716 1717/* 1718 * Insert the given physical page (p) at 1719 * the specified virtual address (v) in the 1720 * target physical map with the protection requested. 1721 * 1722 * If specified, the page will be wired down, meaning 1723 * that the related pte can not be reclaimed. 1724 * 1725 * NB: This is the only routine which MAY NOT lazy-evaluate 1726 * or lose information. That is, this routine must actually 1727 * insert this page into the given map NOW. 1728 */ 1729void 1730pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1731 vm_prot_t prot, boolean_t wired) 1732{ 1733 vm_offset_t pa, opa; 1734 register pt_entry_t *pte; 1735 pt_entry_t origpte, newpte; 1736 vm_page_t mpte, om; 1737 int rw = 0; 1738 1739 if (pmap == NULL) 1740 return; 1741 1742 va &= ~PAGE_MASK; 1743#ifdef PMAP_DIAGNOSTIC 1744 if (va > VM_MAX_KERNEL_ADDRESS) 1745 panic("pmap_enter: toobig"); 1746#endif 1747 1748 mpte = NULL; 1749 1750 vm_page_lock_queues(); 1751 PMAP_LOCK(pmap); 1752 1753 /* 1754 * In the case that a page table page is not resident, we are 1755 * creating it here. 1756 */ 1757 if (va < VM_MAXUSER_ADDRESS) { 1758 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1759 } 1760 pte = pmap_pte(pmap, va); 1761 1762 /* 1763 * Page Directory table entry not valid, we need a new PT page 1764 */ 1765 if (pte == NULL) { 1766 panic("pmap_enter: invalid page directory, pdir=%p, va=%p\n", 1767 (void *)pmap->pm_segtab, (void *)va); 1768 } 1769 pa = VM_PAGE_TO_PHYS(m); 1770 om = NULL; 1771 origpte = *pte; 1772 opa = mips_tlbpfn_to_paddr(origpte); 1773 1774 /* 1775 * Mapping has not changed, must be protection or wiring change. 1776 */ 1777 if ((origpte & PTE_V) && (opa == pa)) { 1778 /* 1779 * Wiring change, just update stats. We don't worry about 1780 * wiring PT pages as they remain resident as long as there 1781 * are valid mappings in them. Hence, if a user page is 1782 * wired, the PT page will be also. 1783 */ 1784 if (wired && ((origpte & PTE_W) == 0)) 1785 pmap->pm_stats.wired_count++; 1786 else if (!wired && (origpte & PTE_W)) 1787 pmap->pm_stats.wired_count--; 1788 1789#if defined(PMAP_DIAGNOSTIC) 1790 if (pmap_nw_modified(origpte)) { 1791 printf( 1792 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1793 va, origpte); 1794 } 1795#endif 1796 1797 /* 1798 * Remove extra pte reference 1799 */ 1800 if (mpte) 1801 mpte->wire_count--; 1802 1803 /* 1804 * We might be turning off write access to the page, so we 1805 * go ahead and sense modify status. 1806 */ 1807 if (page_is_managed(opa)) { 1808 om = m; 1809 } 1810 goto validate; 1811 } 1812 /* 1813 * Mapping has changed, invalidate old range and fall through to 1814 * handle validating new mapping. 1815 */ 1816 if (opa) { 1817 if (origpte & PTE_W) 1818 pmap->pm_stats.wired_count--; 1819 1820 if (page_is_managed(opa)) { 1821 om = PHYS_TO_VM_PAGE(opa); 1822 pmap_remove_entry(pmap, om, va); 1823 } 1824 if (mpte != NULL) { 1825 mpte->wire_count--; 1826 KASSERT(mpte->wire_count > 0, 1827 ("pmap_enter: missing reference to page table page," 1828 " va: %p", (void *)va)); 1829 } 1830 } else 1831 pmap->pm_stats.resident_count++; 1832 1833 /* 1834 * Enter on the PV list if part of our managed memory. Note that we 1835 * raise IPL while manipulating pv_table since pmap_enter can be 1836 * called at interrupt time. 1837 */ 1838 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1839 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1840 ("pmap_enter: managed mapping within the clean submap")); 1841 pmap_insert_entry(pmap, va, mpte, m, wired); 1842 } 1843 /* 1844 * Increment counters 1845 */ 1846 if (wired) 1847 pmap->pm_stats.wired_count++; 1848 1849validate: 1850 if ((access & VM_PROT_WRITE) != 0) 1851 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF; 1852 rw = init_pte_prot(va, m, prot); 1853 1854#ifdef PMAP_DEBUG 1855 printf("pmap_enter: va: 0x%08x -> pa: 0x%08x\n", va, pa); 1856#endif 1857 /* 1858 * Now validate mapping with desired protection/wiring. 1859 */ 1860 newpte = mips_paddr_to_tlbpfn(pa) | rw | PTE_V; 1861 1862 if (is_cacheable_mem(pa)) 1863 newpte |= PTE_CACHE; 1864 else 1865 newpte |= PTE_UNCACHED; 1866 1867 if (wired) 1868 newpte |= PTE_W; 1869 1870 if (is_kernel_pmap(pmap)) { 1871 newpte |= PTE_G; 1872 } 1873 1874 /* 1875 * if the mapping or permission bits are different, we need to 1876 * update the pte. 1877 */ 1878 if (origpte != newpte) { 1879 if (origpte & PTE_V) { 1880 *pte = newpte; 1881 if (page_is_managed(opa) && (opa != pa)) { 1882 if (om->md.pv_flags & PV_TABLE_REF) 1883 vm_page_flag_set(om, PG_REFERENCED); 1884 om->md.pv_flags &= 1885 ~(PV_TABLE_REF | PV_TABLE_MOD); 1886 } 1887 if (origpte & PTE_M) { 1888 KASSERT((origpte & PTE_RW), 1889 ("pmap_enter: modified page not writable:" 1890 " va: %p, pte: 0x%x", (void *)va, origpte)); 1891 if (page_is_managed(opa)) 1892 vm_page_dirty(om); 1893 } 1894 } else { 1895 *pte = newpte; 1896 } 1897 } 1898 pmap_update_page(pmap, va, newpte); 1899 1900 /* 1901 * Sync I & D caches for executable pages. Do this only if the the 1902 * target pmap belongs to the current process. Otherwise, an 1903 * unresolvable TLB miss may occur. 1904 */ 1905 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 1906 (prot & VM_PROT_EXECUTE)) { 1907 mips_icache_sync_range(va, PAGE_SIZE); 1908 mips_dcache_wbinv_range(va, PAGE_SIZE); 1909 } 1910 vm_page_unlock_queues(); 1911 PMAP_UNLOCK(pmap); 1912} 1913 1914/* 1915 * this code makes some *MAJOR* assumptions: 1916 * 1. Current pmap & pmap exists. 1917 * 2. Not wired. 1918 * 3. Read access. 1919 * 4. No page table pages. 1920 * but is *MUCH* faster than pmap_enter... 1921 */ 1922 1923void 1924pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1925{ 1926 1927 PMAP_LOCK(pmap); 1928 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 1929 PMAP_UNLOCK(pmap); 1930} 1931 1932static vm_page_t 1933pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1934 vm_prot_t prot, vm_page_t mpte) 1935{ 1936 pt_entry_t *pte; 1937 vm_offset_t pa; 1938 1939 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1940 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 1941 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1942 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1943 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1944 1945 /* 1946 * In the case that a page table page is not resident, we are 1947 * creating it here. 1948 */ 1949 if (va < VM_MAXUSER_ADDRESS) { 1950 unsigned ptepindex; 1951 vm_offset_t pteva; 1952 1953 /* 1954 * Calculate pagetable page index 1955 */ 1956 ptepindex = va >> SEGSHIFT; 1957 if (mpte && (mpte->pindex == ptepindex)) { 1958 mpte->wire_count++; 1959 } else { 1960 /* 1961 * Get the page directory entry 1962 */ 1963 pteva = (vm_offset_t)pmap->pm_segtab[ptepindex]; 1964 1965 /* 1966 * If the page table page is mapped, we just 1967 * increment the hold count, and activate it. 1968 */ 1969 if (pteva) { 1970 if (pmap->pm_ptphint && 1971 (pmap->pm_ptphint->pindex == ptepindex)) { 1972 mpte = pmap->pm_ptphint; 1973 } else { 1974 mpte = PHYS_TO_VM_PAGE(vtophys(pteva)); 1975 pmap->pm_ptphint = mpte; 1976 } 1977 mpte->wire_count++; 1978 } else { 1979 mpte = _pmap_allocpte(pmap, ptepindex, 1980 M_NOWAIT); 1981 if (mpte == NULL) 1982 return (mpte); 1983 } 1984 } 1985 } else { 1986 mpte = NULL; 1987 } 1988 1989 pte = pmap_pte(pmap, va); 1990 if (pmap_pte_v(pte)) { 1991 if (mpte != NULL) { 1992 mpte->wire_count--; 1993 mpte = NULL; 1994 } 1995 return (mpte); 1996 } 1997 1998 /* 1999 * Enter on the PV list if part of our managed memory. 2000 */ 2001 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 2002 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2003 if (mpte != NULL) { 2004 pmap_unwire_pte_hold(pmap, mpte); 2005 mpte = NULL; 2006 } 2007 return (mpte); 2008 } 2009 2010 /* 2011 * Increment counters 2012 */ 2013 pmap->pm_stats.resident_count++; 2014 2015 pa = VM_PAGE_TO_PHYS(m); 2016 2017 /* 2018 * Now validate mapping with RO protection 2019 */ 2020 *pte = mips_paddr_to_tlbpfn(pa) | PTE_V; 2021 2022 if (is_cacheable_mem(pa)) 2023 *pte |= PTE_CACHE; 2024 else 2025 *pte |= PTE_UNCACHED; 2026 2027 if (is_kernel_pmap(pmap)) 2028 *pte |= PTE_G; 2029 else { 2030 *pte |= PTE_RO; 2031 /* 2032 * Sync I & D caches. Do this only if the the target pmap 2033 * belongs to the current process. Otherwise, an 2034 * unresolvable TLB miss may occur. */ 2035 if (pmap == &curproc->p_vmspace->vm_pmap) { 2036 va &= ~PAGE_MASK; 2037 mips_icache_sync_range(va, PAGE_SIZE); 2038 mips_dcache_wbinv_range(va, PAGE_SIZE); 2039 } 2040 } 2041 return (mpte); 2042} 2043 2044/* 2045 * Make a temporary mapping for a physical address. This is only intended 2046 * to be used for panic dumps. 2047 */ 2048void * 2049pmap_kenter_temporary(vm_paddr_t pa, int i) 2050{ 2051 vm_offset_t va; 2052 register_t intr; 2053 if (i != 0) 2054 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2055 __func__); 2056 2057 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2058 va = MIPS_PHYS_TO_KSEG0(pa); 2059 } else { 2060 int cpu; 2061 struct local_sysmaps *sysm; 2062 pt_entry_t *pte, npte; 2063 2064 /* If this is used other than for dumps, we may need to leave 2065 * interrupts disasbled on return. If crash dumps don't work when 2066 * we get to this point, we might want to consider this (leaving things 2067 * disabled as a starting point ;-) 2068 */ 2069 intr = intr_disable(); 2070 cpu = PCPU_GET(cpuid); 2071 sysm = &sysmap_lmem[cpu]; 2072 /* Since this is for the debugger, no locks or any other fun */ 2073 npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; 2074 pte = pmap_pte(kernel_pmap, sysm->base); 2075 *pte = npte; 2076 sysm->valid1 = 1; 2077 pmap_update_page(kernel_pmap, sysm->base, npte); 2078 va = sysm->base; 2079 intr_restore(intr); 2080 } 2081 return ((void *)va); 2082} 2083 2084void 2085pmap_kenter_temporary_free(vm_paddr_t pa) 2086{ 2087 int cpu; 2088 register_t intr; 2089 struct local_sysmaps *sysm; 2090 2091 if (pa < MIPS_KSEG0_LARGEST_PHYS) { 2092 /* nothing to do for this case */ 2093 return; 2094 } 2095 cpu = PCPU_GET(cpuid); 2096 sysm = &sysmap_lmem[cpu]; 2097 if (sysm->valid1) { 2098 pt_entry_t *pte; 2099 2100 intr = intr_disable(); 2101 pte = pmap_pte(kernel_pmap, sysm->base); 2102 *pte = PTE_G; 2103 pmap_invalidate_page(kernel_pmap, sysm->base); 2104 intr_restore(intr); 2105 sysm->valid1 = 0; 2106 } 2107} 2108 2109/* 2110 * Moved the code to Machine Independent 2111 * vm_map_pmap_enter() 2112 */ 2113 2114/* 2115 * Maps a sequence of resident pages belonging to the same object. 2116 * The sequence begins with the given page m_start. This page is 2117 * mapped at the given virtual address start. Each subsequent page is 2118 * mapped at a virtual address that is offset from start by the same 2119 * amount as the page is offset from m_start within the object. The 2120 * last page in the sequence is the page with the largest offset from 2121 * m_start that can be mapped at a virtual address less than the given 2122 * virtual address end. Not every virtual page between start and end 2123 * is mapped; only those for which a resident page exists with the 2124 * corresponding offset from m_start are mapped. 2125 */ 2126void 2127pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2128 vm_page_t m_start, vm_prot_t prot) 2129{ 2130 vm_page_t m, mpte; 2131 vm_pindex_t diff, psize; 2132 2133 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2134 psize = atop(end - start); 2135 mpte = NULL; 2136 m = m_start; 2137 PMAP_LOCK(pmap); 2138 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2139 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2140 prot, mpte); 2141 m = TAILQ_NEXT(m, listq); 2142 } 2143 PMAP_UNLOCK(pmap); 2144} 2145 2146/* 2147 * pmap_object_init_pt preloads the ptes for a given object 2148 * into the specified pmap. This eliminates the blast of soft 2149 * faults on process startup and immediately after an mmap. 2150 */ 2151void 2152pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2153 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2154{ 2155 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2156 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2157 ("pmap_object_init_pt: non-device object")); 2158} 2159 2160/* 2161 * Routine: pmap_change_wiring 2162 * Function: Change the wiring attribute for a map/virtual-address 2163 * pair. 2164 * In/out conditions: 2165 * The mapping must already exist in the pmap. 2166 */ 2167void 2168pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 2169{ 2170 register pt_entry_t *pte; 2171 2172 if (pmap == NULL) 2173 return; 2174 2175 PMAP_LOCK(pmap); 2176 pte = pmap_pte(pmap, va); 2177 2178 if (wired && !pmap_pte_w(pte)) 2179 pmap->pm_stats.wired_count++; 2180 else if (!wired && pmap_pte_w(pte)) 2181 pmap->pm_stats.wired_count--; 2182 2183 /* 2184 * Wiring is not a hardware characteristic so there is no need to 2185 * invalidate TLB. 2186 */ 2187 pmap_pte_set_w(pte, wired); 2188 PMAP_UNLOCK(pmap); 2189} 2190 2191/* 2192 * Copy the range specified by src_addr/len 2193 * from the source map to the range dst_addr/len 2194 * in the destination map. 2195 * 2196 * This routine is only advisory and need not do anything. 2197 */ 2198 2199void 2200pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2201 vm_size_t len, vm_offset_t src_addr) 2202{ 2203} 2204 2205/* 2206 * pmap_zero_page zeros the specified hardware page by mapping 2207 * the page into KVM and using bzero to clear its contents. 2208 */ 2209void 2210pmap_zero_page(vm_page_t m) 2211{ 2212 vm_offset_t va; 2213 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2214 register_t intr; 2215 2216 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2217 va = MIPS_PHYS_TO_KSEG0(phys); 2218 2219 bzero((caddr_t)va, PAGE_SIZE); 2220 mips_dcache_wbinv_range(va, PAGE_SIZE); 2221 } else { 2222 PMAP_LMEM_MAP1(va, phys); 2223 2224 bzero((caddr_t)va, PAGE_SIZE); 2225 mips_dcache_wbinv_range(va, PAGE_SIZE); 2226 2227 PMAP_LMEM_UNMAP(); 2228 } 2229} 2230 2231/* 2232 * pmap_zero_page_area zeros the specified hardware page by mapping 2233 * the page into KVM and using bzero to clear its contents. 2234 * 2235 * off and size may not cover an area beyond a single hardware page. 2236 */ 2237void 2238pmap_zero_page_area(vm_page_t m, int off, int size) 2239{ 2240 vm_offset_t va; 2241 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2242 register_t intr; 2243 2244 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2245 va = MIPS_PHYS_TO_KSEG0(phys); 2246 bzero((char *)(caddr_t)va + off, size); 2247 mips_dcache_wbinv_range(va + off, size); 2248 } else { 2249 PMAP_LMEM_MAP1(va, phys); 2250 2251 bzero((char *)va + off, size); 2252 mips_dcache_wbinv_range(va + off, size); 2253 2254 PMAP_LMEM_UNMAP(); 2255 } 2256} 2257 2258void 2259pmap_zero_page_idle(vm_page_t m) 2260{ 2261 vm_offset_t va; 2262 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2263 register_t intr; 2264 2265 if (phys < MIPS_KSEG0_LARGEST_PHYS) { 2266 va = MIPS_PHYS_TO_KSEG0(phys); 2267 bzero((caddr_t)va, PAGE_SIZE); 2268 mips_dcache_wbinv_range(va, PAGE_SIZE); 2269 } else { 2270 PMAP_LMEM_MAP1(va, phys); 2271 2272 bzero((caddr_t)va, PAGE_SIZE); 2273 mips_dcache_wbinv_range(va, PAGE_SIZE); 2274 2275 PMAP_LMEM_UNMAP(); 2276 } 2277} 2278 2279/* 2280 * pmap_copy_page copies the specified (machine independent) 2281 * page by mapping the page into virtual memory and using 2282 * bcopy to copy the page, one machine dependent page at a 2283 * time. 2284 */ 2285void 2286pmap_copy_page(vm_page_t src, vm_page_t dst) 2287{ 2288 vm_offset_t va_src, va_dst; 2289 vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); 2290 vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); 2291 register_t intr; 2292 2293 if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { 2294 /* easy case, all can be accessed via KSEG0 */ 2295 /* 2296 * Flush all caches for VA that are mapped to this page 2297 * to make sure that data in SDRAM is up to date 2298 */ 2299 pmap_flush_pvcache(src); 2300 mips_dcache_wbinv_range_index( 2301 MIPS_PHYS_TO_KSEG0(phy_dst), PAGE_SIZE); 2302 va_src = MIPS_PHYS_TO_KSEG0(phy_src); 2303 va_dst = MIPS_PHYS_TO_KSEG0(phy_dst); 2304 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2305 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2306 } else { 2307 PMAP_LMEM_MAP2(va_src, phy_src, va_dst, phy_dst); 2308 2309 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2310 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2311 2312 PMAP_LMEM_UNMAP(); 2313 } 2314} 2315 2316/* 2317 * Returns true if the pmap's pv is one of the first 2318 * 16 pvs linked to from this page. This count may 2319 * be changed upwards or downwards in the future; it 2320 * is only necessary that true be returned for a small 2321 * subset of pmaps for proper page aging. 2322 */ 2323boolean_t 2324pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2325{ 2326 pv_entry_t pv; 2327 int loops = 0; 2328 2329 if (m->flags & PG_FICTITIOUS) 2330 return FALSE; 2331 2332 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2333 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2334 if (pv->pv_pmap == pmap) { 2335 return TRUE; 2336 } 2337 loops++; 2338 if (loops >= 16) 2339 break; 2340 } 2341 return (FALSE); 2342} 2343 2344/* 2345 * Remove all pages from specified address space 2346 * this aids process exit speeds. Also, this code 2347 * is special cased for current process only, but 2348 * can have the more generic (and slightly slower) 2349 * mode enabled. This is much faster than pmap_remove 2350 * in the case of running down an entire address space. 2351 */ 2352void 2353pmap_remove_pages(pmap_t pmap) 2354{ 2355 pt_entry_t *pte, tpte; 2356 pv_entry_t pv, npv; 2357 vm_page_t m; 2358 2359 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2360 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2361 return; 2362 } 2363 vm_page_lock_queues(); 2364 PMAP_LOCK(pmap); 2365 sched_pin(); 2366 //XXX need to be TAILQ_FOREACH_SAFE ? 2367 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2368 2369 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2370 if (!pmap_pte_v(pte)) 2371 panic("pmap_remove_pages: page on pm_pvlist has no pte\n"); 2372 tpte = *pte; 2373 2374/* 2375 * We cannot remove wired pages from a process' mapping at this time 2376 */ 2377 if (tpte & PTE_W) { 2378 npv = TAILQ_NEXT(pv, pv_plist); 2379 continue; 2380 } 2381 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2382 2383 m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte)); 2384 KASSERT(m != NULL, 2385 ("pmap_remove_pages: bad tpte %x", tpte)); 2386 2387 pv->pv_pmap->pm_stats.resident_count--; 2388 2389 /* 2390 * Update the vm_page_t clean and reference bits. 2391 */ 2392 if (tpte & PTE_M) { 2393 vm_page_dirty(m); 2394 } 2395 npv = TAILQ_NEXT(pv, pv_plist); 2396 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2397 2398 m->md.pv_list_count--; 2399 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2400 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2401 vm_page_flag_clear(m, PG_WRITEABLE); 2402 } 2403 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2404 free_pv_entry(pv); 2405 } 2406 sched_unpin(); 2407 pmap_invalidate_all(pmap); 2408 PMAP_UNLOCK(pmap); 2409 vm_page_unlock_queues(); 2410} 2411 2412/* 2413 * pmap_testbit tests bits in pte's 2414 * note that the testbit/changebit routines are inline, 2415 * and a lot of things compile-time evaluate. 2416 */ 2417static boolean_t 2418pmap_testbit(vm_page_t m, int bit) 2419{ 2420 pv_entry_t pv; 2421 pt_entry_t *pte; 2422 boolean_t rv = FALSE; 2423 2424 if (m->flags & PG_FICTITIOUS) 2425 return rv; 2426 2427 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2428 return rv; 2429 2430 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2431 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2432#if defined(PMAP_DIAGNOSTIC) 2433 if (!pv->pv_pmap) { 2434 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2435 continue; 2436 } 2437#endif 2438 PMAP_LOCK(pv->pv_pmap); 2439 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2440 rv = (*pte & bit) != 0; 2441 PMAP_UNLOCK(pv->pv_pmap); 2442 if (rv) 2443 break; 2444 } 2445 return (rv); 2446} 2447 2448/* 2449 * this routine is used to modify bits in ptes 2450 */ 2451static __inline void 2452pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2453{ 2454 register pv_entry_t pv; 2455 register pt_entry_t *pte; 2456 2457 if (m->flags & PG_FICTITIOUS) 2458 return; 2459 2460 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2461 /* 2462 * Loop over all current mappings setting/clearing as appropos If 2463 * setting RO do we need to clear the VAC? 2464 */ 2465 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2466#if defined(PMAP_DIAGNOSTIC) 2467 if (!pv->pv_pmap) { 2468 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2469 continue; 2470 } 2471#endif 2472 2473 PMAP_LOCK(pv->pv_pmap); 2474 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2475 2476 if (setem) { 2477 *(int *)pte |= bit; 2478 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2479 } else { 2480 vm_offset_t pbits = *(vm_offset_t *)pte; 2481 2482 if (pbits & bit) { 2483 if (bit == PTE_RW) { 2484 if (pbits & PTE_M) { 2485 vm_page_dirty(m); 2486 } 2487 *(int *)pte = (pbits & ~(PTE_M | PTE_RW)) | 2488 PTE_RO; 2489 } else { 2490 *(int *)pte = pbits & ~bit; 2491 } 2492 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte); 2493 } 2494 } 2495 PMAP_UNLOCK(pv->pv_pmap); 2496 } 2497 if (!setem && bit == PTE_RW) 2498 vm_page_flag_clear(m, PG_WRITEABLE); 2499} 2500 2501/* 2502 * pmap_page_wired_mappings: 2503 * 2504 * Return the number of managed mappings to the given physical page 2505 * that are wired. 2506 */ 2507int 2508pmap_page_wired_mappings(vm_page_t m) 2509{ 2510 pv_entry_t pv; 2511 int count; 2512 2513 count = 0; 2514 if ((m->flags & PG_FICTITIOUS) != 0) 2515 return (count); 2516 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2517 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) 2518 if (pv->pv_wired) 2519 count++; 2520 return (count); 2521} 2522 2523/* 2524 * Clear the write and modified bits in each of the given page's mappings. 2525 */ 2526void 2527pmap_remove_write(vm_page_t m) 2528{ 2529 pv_entry_t pv, npv; 2530 vm_offset_t va; 2531 pt_entry_t *pte; 2532 2533 if ((m->flags & PG_WRITEABLE) == 0) 2534 return; 2535 2536 /* 2537 * Loop over all current mappings setting/clearing as appropos. 2538 */ 2539 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) { 2540 npv = TAILQ_NEXT(pv, pv_plist); 2541 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2542 2543 if ((pte == NULL) || !mips_pg_v(*pte)) 2544 panic("page on pm_pvlist has no pte\n"); 2545 2546 va = pv->pv_va; 2547 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 2548 VM_PROT_READ | VM_PROT_EXECUTE); 2549 } 2550 vm_page_flag_clear(m, PG_WRITEABLE); 2551} 2552 2553/* 2554 * pmap_ts_referenced: 2555 * 2556 * Return the count of reference bits for a page, clearing all of them. 2557 */ 2558int 2559pmap_ts_referenced(vm_page_t m) 2560{ 2561 if (m->flags & PG_FICTITIOUS) 2562 return (0); 2563 2564 if (m->md.pv_flags & PV_TABLE_REF) { 2565 m->md.pv_flags &= ~PV_TABLE_REF; 2566 return 1; 2567 } 2568 return 0; 2569} 2570 2571/* 2572 * pmap_is_modified: 2573 * 2574 * Return whether or not the specified physical page was modified 2575 * in any physical maps. 2576 */ 2577boolean_t 2578pmap_is_modified(vm_page_t m) 2579{ 2580 if (m->flags & PG_FICTITIOUS) 2581 return FALSE; 2582 2583 if (m->md.pv_flags & PV_TABLE_MOD) 2584 return TRUE; 2585 else 2586 return pmap_testbit(m, PTE_M); 2587} 2588 2589/* N/C */ 2590 2591/* 2592 * pmap_is_prefaultable: 2593 * 2594 * Return whether or not the specified virtual address is elgible 2595 * for prefault. 2596 */ 2597boolean_t 2598pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2599{ 2600 pt_entry_t *pte; 2601 boolean_t rv; 2602 2603 rv = FALSE; 2604 PMAP_LOCK(pmap); 2605 if (*pmap_pde(pmap, addr)) { 2606 pte = pmap_pte(pmap, addr); 2607 rv = (*pte == 0); 2608 } 2609 PMAP_UNLOCK(pmap); 2610 return (rv); 2611} 2612 2613/* 2614 * Clear the modify bits on the specified physical page. 2615 */ 2616void 2617pmap_clear_modify(vm_page_t m) 2618{ 2619 if (m->flags & PG_FICTITIOUS) 2620 return; 2621 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2622 if (m->md.pv_flags & PV_TABLE_MOD) { 2623 pmap_changebit(m, PTE_M, FALSE); 2624 m->md.pv_flags &= ~PV_TABLE_MOD; 2625 } 2626} 2627 2628/* 2629 * pmap_is_referenced: 2630 * 2631 * Return whether or not the specified physical page was referenced 2632 * in any physical maps. 2633 */ 2634boolean_t 2635pmap_is_referenced(vm_page_t m) 2636{ 2637 2638 return ((m->flags & PG_FICTITIOUS) == 0 && 2639 (m->md.pv_flags & PV_TABLE_REF) != 0); 2640} 2641 2642/* 2643 * pmap_clear_reference: 2644 * 2645 * Clear the reference bit on the specified physical page. 2646 */ 2647void 2648pmap_clear_reference(vm_page_t m) 2649{ 2650 if (m->flags & PG_FICTITIOUS) 2651 return; 2652 2653 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2654 if (m->md.pv_flags & PV_TABLE_REF) { 2655 m->md.pv_flags &= ~PV_TABLE_REF; 2656 } 2657} 2658 2659/* 2660 * Miscellaneous support routines follow 2661 */ 2662 2663/* 2664 * Map a set of physical memory pages into the kernel virtual 2665 * address space. Return a pointer to where it is mapped. This 2666 * routine is intended to be used for mapping device memory, 2667 * NOT real memory. 2668 */ 2669 2670/* 2671 * Map a set of physical memory pages into the kernel virtual 2672 * address space. Return a pointer to where it is mapped. This 2673 * routine is intended to be used for mapping device memory, 2674 * NOT real memory. 2675 */ 2676void * 2677pmap_mapdev(vm_offset_t pa, vm_size_t size) 2678{ 2679 vm_offset_t va, tmpva, offset; 2680 2681 /* 2682 * KSEG1 maps only first 512M of phys address space. For 2683 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 2684 */ 2685 if ((pa + size - 1) < MIPS_KSEG0_LARGEST_PHYS) 2686 return (void *)MIPS_PHYS_TO_KSEG1(pa); 2687 else { 2688 offset = pa & PAGE_MASK; 2689 size = roundup(size + offset, PAGE_SIZE); 2690 2691 va = kmem_alloc_nofault(kernel_map, size); 2692 if (!va) 2693 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2694 pa = trunc_page(pa); 2695 for (tmpva = va; size > 0;) { 2696 pmap_kenter(tmpva, pa); 2697 size -= PAGE_SIZE; 2698 tmpva += PAGE_SIZE; 2699 pa += PAGE_SIZE; 2700 } 2701 } 2702 2703 return ((void *)(va + offset)); 2704} 2705 2706void 2707pmap_unmapdev(vm_offset_t va, vm_size_t size) 2708{ 2709 vm_offset_t base, offset, tmpva; 2710 2711 /* If the address is within KSEG1 then there is nothing to do */ 2712 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 2713 return; 2714 2715 base = trunc_page(va); 2716 offset = va & PAGE_MASK; 2717 size = roundup(size + offset, PAGE_SIZE); 2718 for (tmpva = base; tmpva < base + size; tmpva += PAGE_SIZE) 2719 pmap_kremove(tmpva); 2720 kmem_free(kernel_map, base, size); 2721} 2722 2723/* 2724 * perform the pmap work for mincore 2725 */ 2726int 2727pmap_mincore(pmap_t pmap, vm_offset_t addr) 2728{ 2729 2730 pt_entry_t *ptep, pte; 2731 vm_page_t m; 2732 int val = 0; 2733 2734 PMAP_LOCK(pmap); 2735 ptep = pmap_pte(pmap, addr); 2736 pte = (ptep != NULL) ? *ptep : 0; 2737 PMAP_UNLOCK(pmap); 2738 2739 if (mips_pg_v(pte)) { 2740 vm_offset_t pa; 2741 2742 val = MINCORE_INCORE; 2743 pa = mips_tlbpfn_to_paddr(pte); 2744 if (!page_is_managed(pa)) 2745 return val; 2746 2747 m = PHYS_TO_VM_PAGE(pa); 2748 2749 /* 2750 * Modified by us 2751 */ 2752 if (pte & PTE_M) 2753 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2754 /* 2755 * Modified by someone 2756 */ 2757 else { 2758 vm_page_lock_queues(); 2759 if (m->dirty || pmap_is_modified(m)) 2760 val |= MINCORE_MODIFIED_OTHER; 2761 vm_page_unlock_queues(); 2762 } 2763 /* 2764 * Referenced by us or someone 2765 */ 2766 vm_page_lock_queues(); 2767 if ((m->flags & PG_REFERENCED) || pmap_is_referenced(m)) 2768 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2769 vm_page_unlock_queues(); 2770 } 2771 return val; 2772} 2773 2774void 2775pmap_activate(struct thread *td) 2776{ 2777 pmap_t pmap, oldpmap; 2778 struct proc *p = td->td_proc; 2779 2780 critical_enter(); 2781 2782 pmap = vmspace_pmap(p->p_vmspace); 2783 oldpmap = PCPU_GET(curpmap); 2784 2785 if (oldpmap) 2786 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); 2787 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2788 pmap_asid_alloc(pmap); 2789 if (td == curthread) { 2790 PCPU_SET(segbase, pmap->pm_segtab); 2791 MachSetPID(pmap->pm_asid[PCPU_GET(cpuid)].asid); 2792 } 2793 2794 PCPU_SET(curpmap, pmap); 2795 critical_exit(); 2796} 2797 2798void 2799pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2800{ 2801} 2802 2803/* 2804 * Increase the starting virtual address of the given mapping if a 2805 * different alignment might result in more superpage mappings. 2806 */ 2807void 2808pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2809 vm_offset_t *addr, vm_size_t size) 2810{ 2811 vm_offset_t superpage_offset; 2812 2813 if (size < NBSEG) 2814 return; 2815 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 2816 offset += ptoa(object->pg_color); 2817 superpage_offset = offset & SEGOFSET; 2818 if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG || 2819 (*addr & SEGOFSET) == superpage_offset) 2820 return; 2821 if ((*addr & SEGOFSET) < superpage_offset) 2822 *addr = (*addr & ~SEGOFSET) + superpage_offset; 2823 else 2824 *addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset; 2825} 2826 2827/* 2828 * Increase the starting virtual address of the given mapping so 2829 * that it is aligned to not be the second page in a TLB entry. 2830 * This routine assumes that the length is appropriately-sized so 2831 * that the allocation does not share a TLB entry at all if required. 2832 */ 2833void 2834pmap_align_tlb(vm_offset_t *addr) 2835{ 2836 if ((*addr & PAGE_SIZE) == 0) 2837 return; 2838 *addr += PAGE_SIZE; 2839 return; 2840} 2841 2842int pmap_pid_dump(int pid); 2843 2844int 2845pmap_pid_dump(int pid) 2846{ 2847 pmap_t pmap; 2848 struct proc *p; 2849 int npte = 0; 2850 int index; 2851 2852 sx_slock(&allproc_lock); 2853 LIST_FOREACH(p, &allproc, p_list) { 2854 if (p->p_pid != pid) 2855 continue; 2856 2857 if (p->p_vmspace) { 2858 int i, j; 2859 2860 printf("vmspace is %p\n", 2861 p->p_vmspace); 2862 index = 0; 2863 pmap = vmspace_pmap(p->p_vmspace); 2864 printf("pmap asid:%x generation:%x\n", 2865 pmap->pm_asid[0].asid, 2866 pmap->pm_asid[0].gen); 2867 for (i = 0; i < NUSERPGTBLS; i++) { 2868 pd_entry_t *pde; 2869 pt_entry_t *pte; 2870 unsigned base = i << SEGSHIFT; 2871 2872 pde = &pmap->pm_segtab[i]; 2873 if (pde && pmap_pde_v(pde)) { 2874 for (j = 0; j < 1024; j++) { 2875 vm_offset_t va = base + 2876 (j << PAGE_SHIFT); 2877 2878 pte = pmap_pte(pmap, va); 2879 if (pte && pmap_pte_v(pte)) { 2880 vm_offset_t pa; 2881 vm_page_t m; 2882 2883 pa = mips_tlbpfn_to_paddr(*pte); 2884 m = PHYS_TO_VM_PAGE(pa); 2885 printf("va: %p, pt: %p, h: %d, w: %d, f: 0x%x", 2886 (void *)va, 2887 (void *)pa, 2888 m->hold_count, 2889 m->wire_count, 2890 m->flags); 2891 npte++; 2892 index++; 2893 if (index >= 2) { 2894 index = 0; 2895 printf("\n"); 2896 } else { 2897 printf(" "); 2898 } 2899 } 2900 } 2901 } 2902 } 2903 } else { 2904 printf("Process pid:%d has no vm_space\n", pid); 2905 } 2906 break; 2907 } 2908 sx_sunlock(&allproc_lock); 2909 return npte; 2910} 2911 2912 2913#if defined(DEBUG) 2914 2915static void pads(pmap_t pm); 2916void pmap_pvdump(vm_offset_t pa); 2917 2918/* print address space of pmap*/ 2919static void 2920pads(pmap_t pm) 2921{ 2922 unsigned va, i, j; 2923 pt_entry_t *ptep; 2924 2925 if (pm == kernel_pmap) 2926 return; 2927 for (i = 0; i < NPTEPG; i++) 2928 if (pm->pm_segtab[i]) 2929 for (j = 0; j < NPTEPG; j++) { 2930 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 2931 if (pm == kernel_pmap && va < KERNBASE) 2932 continue; 2933 if (pm != kernel_pmap && 2934 va >= VM_MAXUSER_ADDRESS) 2935 continue; 2936 ptep = pmap_pte(pm, va); 2937 if (pmap_pte_v(ptep)) 2938 printf("%x:%x ", va, *(int *)ptep); 2939 } 2940 2941} 2942 2943void 2944pmap_pvdump(vm_offset_t pa) 2945{ 2946 register pv_entry_t pv; 2947 vm_page_t m; 2948 2949 printf("pa %x", pa); 2950 m = PHYS_TO_VM_PAGE(pa); 2951 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 2952 pv = TAILQ_NEXT(pv, pv_list)) { 2953 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 2954 pads(pv->pv_pmap); 2955 } 2956 printf(" "); 2957} 2958 2959/* N/C */ 2960#endif 2961 2962 2963/* 2964 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 2965 * It takes almost as much or more time to search the TLB for a 2966 * specific ASID and flush those entries as it does to flush the entire TLB. 2967 * Therefore, when we allocate a new ASID, we just take the next number. When 2968 * we run out of numbers, we flush the TLB, increment the generation count 2969 * and start over. ASID zero is reserved for kernel use. 2970 */ 2971static void 2972pmap_asid_alloc(pmap) 2973 pmap_t pmap; 2974{ 2975 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 2976 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 2977 else { 2978 if (PCPU_GET(next_asid) == pmap_max_asid) { 2979 MIPS_TBIAP(); 2980 PCPU_SET(asid_generation, 2981 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 2982 if (PCPU_GET(asid_generation) == 0) { 2983 PCPU_SET(asid_generation, 1); 2984 } 2985 PCPU_SET(next_asid, 1); /* 0 means invalid */ 2986 } 2987 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 2988 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 2989 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 2990 } 2991} 2992 2993int 2994page_is_managed(vm_offset_t pa) 2995{ 2996 vm_offset_t pgnum = mips_btop(pa); 2997 2998 if (pgnum >= first_page) { 2999 vm_page_t m; 3000 3001 m = PHYS_TO_VM_PAGE(pa); 3002 if (m == NULL) 3003 return 0; 3004 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) 3005 return 1; 3006 } 3007 return 0; 3008} 3009 3010static int 3011init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) 3012{ 3013 int rw = 0; 3014 3015 if (!(prot & VM_PROT_WRITE)) 3016 rw = PTE_ROPAGE; 3017 else { 3018 if (va >= VM_MIN_KERNEL_ADDRESS) { 3019 /* 3020 * Don't bother to trap on kernel writes, just 3021 * record page as dirty. 3022 */ 3023 rw = PTE_RWPAGE; 3024 vm_page_dirty(m); 3025 } else if ((m->md.pv_flags & PV_TABLE_MOD) || 3026 m->dirty == VM_PAGE_BITS_ALL) 3027 rw = PTE_RWPAGE; 3028 else 3029 rw = PTE_CWPAGE; 3030 vm_page_flag_set(m, PG_WRITEABLE); 3031 } 3032 return rw; 3033} 3034 3035/* 3036 * pmap_page_is_free: 3037 * 3038 * Called when a page is freed to allow pmap to clean up 3039 * any extra state associated with the page. In this case 3040 * clear modified/referenced bits. 3041 */ 3042void 3043pmap_page_is_free(vm_page_t m) 3044{ 3045 3046 m->md.pv_flags = 0; 3047} 3048 3049/* 3050 * pmap_set_modified: 3051 * 3052 * Sets the page modified and reference bits for the specified page. 3053 */ 3054void 3055pmap_set_modified(vm_offset_t pa) 3056{ 3057 3058 PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD); 3059} 3060 3061/* 3062 * Routine: pmap_kextract 3063 * Function: 3064 * Extract the physical page address associated 3065 * virtual address. 3066 */ 3067 /* PMAP_INLINE */ vm_offset_t 3068pmap_kextract(vm_offset_t va) 3069{ 3070 vm_offset_t pa = 0; 3071 3072 if (va < MIPS_KSEG0_START) { 3073 /* user virtual address */ 3074 pt_entry_t *ptep; 3075 3076 if (curproc && curproc->p_vmspace) { 3077 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3078 if (ptep) 3079 pa = mips_tlbpfn_to_paddr(*ptep) | 3080 (va & PAGE_MASK); 3081 } 3082 } else if (va >= MIPS_KSEG0_START && 3083 va < MIPS_KSEG1_START) 3084 pa = MIPS_KSEG0_TO_PHYS(va); 3085 else if (va >= MIPS_KSEG1_START && 3086 va < MIPS_KSEG2_START) 3087 pa = MIPS_KSEG1_TO_PHYS(va); 3088 else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) { 3089 pt_entry_t *ptep; 3090 3091 /* Is the kernel pmap initialized? */ 3092 if (kernel_pmap->pm_active) { 3093 /* Its inside the virtual address range */ 3094 ptep = pmap_pte(kernel_pmap, va); 3095 if (ptep) 3096 pa = mips_tlbpfn_to_paddr(*ptep) | 3097 (va & PAGE_MASK); 3098 } 3099 } 3100 return pa; 3101} 3102 3103void 3104pmap_flush_pvcache(vm_page_t m) 3105{ 3106 pv_entry_t pv; 3107 3108 if (m != NULL) { 3109 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3110 pv = TAILQ_NEXT(pv, pv_list)) { 3111 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3112 } 3113 } 3114} 3115 3116void 3117pmap_save_tlb(void) 3118{ 3119 int tlbno, cpu; 3120 3121 cpu = PCPU_GET(cpuid); 3122 3123 for (tlbno = 0; tlbno < num_tlbentries; ++tlbno) 3124 MachTLBRead(tlbno, &tlbstash[cpu][tlbno]); 3125} 3126 3127#ifdef DDB 3128#include <ddb/ddb.h> 3129 3130DB_SHOW_COMMAND(tlb, ddb_dump_tlb) 3131{ 3132 int cpu, tlbno; 3133 struct tlb *tlb; 3134 3135 if (have_addr) 3136 cpu = ((addr >> 4) % 16) * 10 + (addr % 16); 3137 else 3138 cpu = PCPU_GET(cpuid); 3139 3140 if (cpu < 0 || cpu >= mp_ncpus) { 3141 db_printf("Invalid CPU %d\n", cpu); 3142 return; 3143 } else 3144 db_printf("CPU %d:\n", cpu); 3145 3146 if (cpu == PCPU_GET(cpuid)) 3147 pmap_save_tlb(); 3148 3149 for (tlbno = 0; tlbno < num_tlbentries; ++tlbno) { 3150 tlb = &tlbstash[cpu][tlbno]; 3151 if (tlb->tlb_lo0 & PTE_V || tlb->tlb_lo1 & PTE_V) { 3152 printf("TLB %2d vad 0x%0lx ", 3153 tlbno, (long)(tlb->tlb_hi & 0xffffff00)); 3154 } else { 3155 printf("TLB*%2d vad 0x%0lx ", 3156 tlbno, (long)(tlb->tlb_hi & 0xffffff00)); 3157 } 3158 printf("0=0x%0lx ", pfn_to_vad((long)tlb->tlb_lo0)); 3159 printf("%c", tlb->tlb_lo0 & PTE_V ? 'V' : '-'); 3160 printf("%c", tlb->tlb_lo0 & PTE_M ? 'M' : '-'); 3161 printf("%c", tlb->tlb_lo0 & PTE_G ? 'G' : '-'); 3162 printf(" atr %x ", (tlb->tlb_lo0 >> 3) & 7); 3163 printf("1=0x%0lx ", pfn_to_vad((long)tlb->tlb_lo1)); 3164 printf("%c", tlb->tlb_lo1 & PTE_V ? 'V' : '-'); 3165 printf("%c", tlb->tlb_lo1 & PTE_M ? 'M' : '-'); 3166 printf("%c", tlb->tlb_lo1 & PTE_G ? 'G' : '-'); 3167 printf(" atr %x ", (tlb->tlb_lo1 >> 3) & 7); 3168 printf(" sz=%x pid=%x\n", tlb->tlb_mask, 3169 (tlb->tlb_hi & 0x000000ff)); 3170 } 3171} 3172#endif /* DDB */ 3173