1/* $NetBSD: pmap.c,v 1.275 2011/07/12 07:51:34 mrg Exp $ */ 2/* 3 * 4 * Copyright (C) 1996-1999 Eduardo Horvath. 5 * All rights reserved. 6 * 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28#include <sys/cdefs.h> 29__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.275 2011/07/12 07:51:34 mrg Exp $"); 30 31#undef NO_VCACHE /* Don't forget the locked TLB in dostart */ 32#define HWREF 33 34#include "opt_ddb.h" 35#include "opt_multiprocessor.h" 36#include "opt_modular.h" 37 38#include <sys/param.h> 39#include <sys/malloc.h> 40#include <sys/queue.h> 41#include <sys/systm.h> 42#include <sys/msgbuf.h> 43#include <sys/pool.h> 44#include <sys/exec.h> 45#include <sys/core.h> 46#include <sys/kcore.h> 47#include <sys/proc.h> 48#include <sys/atomic.h> 49#include <sys/cpu.h> 50 51#include <sys/exec_aout.h> /* for MID_* */ 52 53#include <uvm/uvm.h> 54 55#include <machine/pcb.h> 56#include <machine/sparc64.h> 57#include <machine/ctlreg.h> 58#include <machine/promlib.h> 59#include <machine/kcore.h> 60#include <machine/bootinfo.h> 61 62#include <sparc64/sparc64/cache.h> 63 64#ifdef DDB 65#include <machine/db_machdep.h> 66#include <ddb/db_command.h> 67#include <ddb/db_sym.h> 68#include <ddb/db_variables.h> 69#include <ddb/db_extern.h> 70#include <ddb/db_access.h> 71#include <ddb/db_output.h> 72#else 73#define Debugger() 74#define db_printf printf 75#endif 76 77#define MEG (1<<20) /* 1MB */ 78#define KB (1<<10) /* 1KB */ 79 80paddr_t cpu0paddr; /* contigious phys memory preallocated for cpus */ 81 82/* These routines are in assembly to allow access thru physical mappings */ 83extern int64_t pseg_get_real(struct pmap *, vaddr_t); 84extern int pseg_set_real(struct pmap *, vaddr_t, int64_t, paddr_t); 85 86/* 87 * Diatribe on ref/mod counting: 88 * 89 * First of all, ref/mod info must be non-volatile. Hence we need to keep it 90 * in the pv_entry structure for each page. (We could bypass this for the 91 * vm_page, but that's a long story....) 92 * 93 * This architecture has nice, fast traps with lots of space for software bits 94 * in the TTE. To accelerate ref/mod counts we make use of these features. 95 * 96 * When we map a page initially, we place a TTE in the page table. It's 97 * inserted with the TLB_W and TLB_ACCESS bits cleared. If a page is really 98 * writable we set the TLB_REAL_W bit for the trap handler. 99 * 100 * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS 101 * bit in the approprate TTE in the page table. Whenever we take a protection 102 * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD 103 * bits to enable writing and mark the page as modified. 104 * 105 * This means that we may have ref/mod information all over the place. The 106 * pmap routines must traverse the page tables of all pmaps with a given page 107 * and collect/clear all the ref/mod information and copy it into the pv_entry. 108 */ 109 110#ifdef NO_VCACHE 111#define FORCE_ALIAS 1 112#else 113#define FORCE_ALIAS 0 114#endif 115 116#define PV_ALIAS 0x1LL 117#define PV_REF 0x2LL 118#define PV_MOD 0x4LL 119#define PV_NVC 0x8LL 120#define PV_NC 0x10LL 121#define PV_WE 0x20LL /* Debug -- this page was writable somtime */ 122#define PV_MASK (0x03fLL) 123#define PV_VAMASK (~(PAGE_SIZE - 1)) 124#define PV_MATCH(pv,va) (!(((pv)->pv_va ^ (va)) & PV_VAMASK)) 125#define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \ 126 (((pv)->pv_va) & PV_MASK))) 127 128struct pool_cache pmap_cache; 129struct pool_cache pmap_pv_cache; 130 131pv_entry_t pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *); 132void pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *, 133 pv_entry_t); 134void pmap_page_cache(struct pmap *, paddr_t, int); 135 136/* 137 * First and last managed physical addresses. 138 * XXX only used for dumping the system. 139 */ 140paddr_t vm_first_phys, vm_num_phys; 141 142/* 143 * Here's the CPU TSB stuff. It's allocated in pmap_bootstrap. 144 */ 145int tsbsize; /* tsbents = 512 * 2^^tsbsize */ 146#define TSBENTS (512<<tsbsize) 147#define TSBSIZE (TSBENTS * 16) 148 149static struct pmap kernel_pmap_; 150struct pmap *const kernel_pmap_ptr = &kernel_pmap_; 151 152static int ctx_alloc(struct pmap *); 153static bool pmap_is_referenced_locked(struct vm_page *); 154 155static void ctx_free(struct pmap *, struct cpu_info *); 156 157/* 158 * Check if any MMU has a non-zero context 159 */ 160static inline bool 161pmap_has_ctx(struct pmap *p) 162{ 163 int i; 164 165 /* any context on any cpu? */ 166 for (i = 0; i < sparc_ncpus; i++) 167 if (p->pm_ctx[i] > 0) 168 return true; 169 170 return false; 171} 172 173#ifdef MULTIPROCESSOR 174#define pmap_ctx(PM) ((PM)->pm_ctx[cpu_number()]) 175#else 176#define pmap_ctx(PM) ((PM)->pm_ctx[0]) 177#endif 178 179/* 180 * Check if this pmap has a live mapping on some MMU. 181 */ 182static inline bool 183pmap_is_on_mmu(struct pmap *p) 184{ 185 /* The kernel pmap is always on all MMUs */ 186 if (p == pmap_kernel()) 187 return true; 188 189 return pmap_has_ctx(p); 190} 191 192/* 193 * Virtual and physical addresses of the start and end of kernel text 194 * and data segments. 195 */ 196vaddr_t ktext; 197paddr_t ktextp; 198vaddr_t ektext; 199paddr_t ektextp; 200vaddr_t kdata; 201paddr_t kdatap; 202vaddr_t ekdata; 203paddr_t ekdatap; 204 205/* 206 * Kernel 4MB pages. 207 */ 208extern struct tlb_entry *kernel_tlbs; 209extern int kernel_tlb_slots; 210 211static int npgs; 212 213vaddr_t vmmap; /* one reserved MI vpage for /dev/mem */ 214 215int phys_installed_size; /* Installed physical memory */ 216struct mem_region *phys_installed; 217 218paddr_t avail_start, avail_end; /* These are used by ps & family */ 219 220static int ptelookup_va(vaddr_t va); 221 222static inline void 223clrx(void *addr) 224{ 225 __asm volatile("clrx [%0]" : : "r" (addr) : "memory"); 226} 227 228static void 229tsb_invalidate(vaddr_t va, pmap_t pm) 230{ 231 struct cpu_info *ci; 232 int ctx; 233 bool kpm = (pm == pmap_kernel()); 234 int i; 235 int64_t tag; 236 237 i = ptelookup_va(va); 238#ifdef MULTIPROCESSOR 239 for (ci = cpus; ci != NULL; ci = ci->ci_next) { 240 if (!CPUSET_HAS(cpus_active, ci->ci_index)) 241 continue; 242#else 243 ci = curcpu(); 244#endif 245 ctx = pm->pm_ctx[ci->ci_index]; 246 if (kpm || ctx > 0) { 247 tag = TSB_TAG(0, ctx, va); 248 if (ci->ci_tsb_dmmu[i].tag == tag) { 249 clrx(&ci->ci_tsb_dmmu[i].data); 250 } 251 if (ci->ci_tsb_immu[i].tag == tag) { 252 clrx(&ci->ci_tsb_immu[i].data); 253 } 254 } 255#ifdef MULTIPROCESSOR 256 } 257#endif 258} 259 260struct prom_map *prom_map; 261int prom_map_size; 262 263#ifdef DEBUG 264struct { 265 int kernel; /* entering kernel mapping */ 266 int user; /* entering user mapping */ 267 int ptpneeded; /* needed to allocate a PT page */ 268 int pwchange; /* no mapping change, just wiring or protection */ 269 int wchange; /* no mapping change, just wiring */ 270 int mchange; /* was mapped but mapping to different page */ 271 int managed; /* a managed page */ 272 int firstpv; /* first mapping for this PA */ 273 int secondpv; /* second mapping for this PA */ 274 int ci; /* cache inhibited */ 275 int unmanaged; /* not a managed page */ 276 int flushes; /* cache flushes */ 277 int cachehit; /* new entry forced valid entry out */ 278} enter_stats; 279struct { 280 int calls; 281 int removes; 282 int flushes; 283 int tflushes; /* TLB flushes */ 284 int pidflushes; /* HW pid stolen */ 285 int pvfirst; 286 int pvsearch; 287} remove_stats; 288#define ENTER_STAT(x) do { enter_stats.x ++; } while (0) 289#define REMOVE_STAT(x) do { remove_stats.x ++; } while (0) 290 291#define PDB_CREATE 0x000001 292#define PDB_DESTROY 0x000002 293#define PDB_REMOVE 0x000004 294#define PDB_CHANGEPROT 0x000008 295#define PDB_ENTER 0x000010 296#define PDB_DEMAP 0x000020 /* used in locore */ 297#define PDB_REF 0x000040 298#define PDB_COPY 0x000080 299#define PDB_MMU_ALLOC 0x000100 300#define PDB_MMU_STEAL 0x000200 301#define PDB_CTX_ALLOC 0x000400 302#define PDB_CTX_STEAL 0x000800 303#define PDB_MMUREG_ALLOC 0x001000 304#define PDB_MMUREG_STEAL 0x002000 305#define PDB_CACHESTUFF 0x004000 306#define PDB_ALIAS 0x008000 307#define PDB_EXTRACT 0x010000 308#define PDB_BOOT 0x020000 309#define PDB_BOOT1 0x040000 310#define PDB_GROW 0x080000 311#define PDB_CTX_FLUSHALL 0x100000 312int pmapdebug = 0; 313/* Number of H/W pages stolen for page tables */ 314int pmap_pages_stolen = 0; 315 316#define BDPRINTF(n, f) if (pmapdebug & (n)) prom_printf f 317#define DPRINTF(n, f) if (pmapdebug & (n)) printf f 318#else 319#define ENTER_STAT(x) do { /* nothing */ } while (0) 320#define REMOVE_STAT(x) do { /* nothing */ } while (0) 321#define BDPRINTF(n, f) 322#define DPRINTF(n, f) 323#endif 324 325#define pv_check() 326 327static int pmap_get_page(paddr_t *); 328static void pmap_free_page(paddr_t, sparc64_cpuset_t); 329static void pmap_free_page_noflush(paddr_t); 330 331/* 332 * Global pmap locks. 333 */ 334static kmutex_t pmap_lock; 335static bool lock_available = false; 336 337/* 338 * Support for big page sizes. This maps the page size to the 339 * page bits. That is: these are the bits between 8K pages and 340 * larger page sizes that cause aliasing. 341 */ 342#define PSMAP_ENTRY(MASK, CODE) { .mask = MASK, .code = CODE } 343struct page_size_map page_size_map[] = { 344#ifdef DEBUG 345 PSMAP_ENTRY(0, PGSZ_8K & 0), /* Disable large pages */ 346#endif 347 PSMAP_ENTRY((4 * 1024 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_4M), 348 PSMAP_ENTRY((512 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_512K), 349 PSMAP_ENTRY((64 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_64K), 350 PSMAP_ENTRY((8 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_8K), 351 PSMAP_ENTRY(0, 0), 352}; 353 354/* 355 * This probably shouldn't be necessary, but it stops USIII machines from 356 * breaking in general, and not just for MULTIPROCESSOR. 357 */ 358#define USE_LOCKSAFE_PSEG_GETSET 359#if defined(USE_LOCKSAFE_PSEG_GETSET) 360 361static kmutex_t pseg_lock; 362 363static __inline__ int64_t 364pseg_get_locksafe(struct pmap *pm, vaddr_t va) 365{ 366 int64_t rv; 367 bool took_lock = lock_available /*&& pm == pmap_kernel()*/; 368 369 if (__predict_true(took_lock)) 370 mutex_enter(&pseg_lock); 371 rv = pseg_get_real(pm, va); 372 if (__predict_true(took_lock)) 373 mutex_exit(&pseg_lock); 374 return rv; 375} 376 377static __inline__ int 378pseg_set_locksafe(struct pmap *pm, vaddr_t va, int64_t data, paddr_t ptp) 379{ 380 int rv; 381 bool took_lock = lock_available /*&& pm == pmap_kernel()*/; 382 383 if (__predict_true(took_lock)) 384 mutex_enter(&pseg_lock); 385 rv = pseg_set_real(pm, va, data, ptp); 386 if (__predict_true(took_lock)) 387 mutex_exit(&pseg_lock); 388 return rv; 389} 390 391#define pseg_get(pm, va) pseg_get_locksafe(pm, va) 392#define pseg_set(pm, va, data, ptp) pseg_set_locksafe(pm, va, data, ptp) 393 394#else /* USE_LOCKSAFE_PSEG_GETSET */ 395 396#define pseg_get(pm, va) pseg_get_real(pm, va) 397#define pseg_set(pm, va, data, ptp) pseg_set_real(pm, va, data, ptp) 398 399#endif /* USE_LOCKSAFE_PSEG_GETSET */ 400 401/* 402 * Enter a TTE into the kernel pmap only. Don't do anything else. 403 * 404 * Use only during bootstrapping since it does no locking and 405 * can lose ref/mod info!!!! 406 * 407 */ 408static void pmap_enter_kpage(vaddr_t va, int64_t data) 409{ 410 paddr_t newp; 411 412 newp = 0UL; 413 while (pseg_set(pmap_kernel(), va, data, newp) & 1) { 414 if (!pmap_get_page(&newp)) { 415 prom_printf("pmap_enter_kpage: out of pages\n"); 416 panic("pmap_enter_kpage"); 417 } 418 419 ENTER_STAT(ptpneeded); 420 BDPRINTF(PDB_BOOT1, 421 ("pseg_set: pm=%p va=%p data=%lx newp %lx\n", 422 pmap_kernel(), va, (long)data, (long)newp)); 423#ifdef DEBUG 424 if (pmapdebug & PDB_BOOT1) 425 {int i; for (i=0; i<140000000; i++) ;} 426#endif 427 } 428} 429 430/* 431 * Check the bootargs to see if we need to enable bootdebug. 432 */ 433#ifdef DEBUG 434static void pmap_bootdebug(void) 435{ 436 const char *cp = prom_getbootargs(); 437 438 for (;;) 439 switch (*++cp) { 440 case '\0': 441 return; 442 case 'V': 443 pmapdebug |= PDB_BOOT|PDB_BOOT1; 444 break; 445 case 'D': 446 pmapdebug |= PDB_BOOT1; 447 break; 448 } 449} 450#endif 451 452 453/* 454 * Calculate the correct number of page colors to use. This should be the 455 * size of the E$/PAGE_SIZE. However, different CPUs can have different sized 456 * E$, so we need to take the GCM of the E$ size. 457 */ 458static int pmap_calculate_colors(void) 459{ 460 int node; 461 int size, assoc, color, maxcolor = 1; 462 463 for (node = prom_firstchild(prom_findroot()); node != 0; 464 node = prom_nextsibling(node)) { 465 char *name = prom_getpropstring(node, "device_type"); 466 if (strcmp("cpu", name) != 0) 467 continue; 468 469 /* Found a CPU, get the E$ info. */ 470 size = prom_getpropint(node, "ecache-size", -1); 471 if (size == -1) { 472 prom_printf("pmap_calculate_colors: node %x has " 473 "no ecache-size\n", node); 474 /* If we can't get the E$ size, skip the node */ 475 continue; 476 } 477 478 assoc = prom_getpropint(node, "ecache-associativity", 1); 479 color = size/assoc/PAGE_SIZE; 480 if (color > maxcolor) 481 maxcolor = color; 482 } 483 return (maxcolor); 484} 485 486static void pmap_alloc_bootargs(void) 487{ 488 char *v; 489 490 v = OF_claim(NULL, 2*PAGE_SIZE, PAGE_SIZE); 491 if ((v == NULL) || (v == (void*)-1)) 492 panic("Can't claim two pages of memory."); 493 494 memset(v, 0, 2*PAGE_SIZE); 495 496 cpu_args = (struct cpu_bootargs*)v; 497} 498 499#if defined(MULTIPROCESSOR) 500static void pmap_mp_init(void); 501 502static void 503pmap_mp_init(void) 504{ 505 pte_t *tp; 506 char *v; 507 int i; 508 509 extern void cpu_mp_startup(void); 510 511 if ((v = OF_claim(NULL, PAGE_SIZE, PAGE_SIZE)) == NULL) { 512 panic("pmap_mp_init: Cannot claim a page."); 513 } 514 515 memcpy(v, mp_tramp_code, mp_tramp_code_len); 516 *(u_long *)(v + mp_tramp_tlb_slots) = kernel_tlb_slots; 517 *(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup; 518 *(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args; 519 tp = (pte_t *)(v + mp_tramp_code_len); 520 for (i = 0; i < kernel_tlb_slots; i++) { 521 tp[i].tag = kernel_tlbs[i].te_va; 522 tp[i].data = TSB_DATA(0, /* g */ 523 PGSZ_4M, /* sz */ 524 kernel_tlbs[i].te_pa, /* pa */ 525 1, /* priv */ 526 1, /* write */ 527 1, /* cache */ 528 1, /* aliased */ 529 1, /* valid */ 530 0 /* ie */); 531 tp[i].data |= TLB_L | TLB_CV; 532 DPRINTF(PDB_BOOT1, ("xtlb[%d]: Tag: %" PRIx64 " Data: %" 533 PRIx64 "\n", i, tp[i].tag, tp[i].data)); 534 } 535 536 for (i = 0; i < PAGE_SIZE; i += sizeof(long)) 537 flush(v + i); 538 539 cpu_spinup_trampoline = (vaddr_t)v; 540} 541#else 542#define pmap_mp_init() ((void)0) 543#endif 544 545paddr_t pmap_kextract(vaddr_t va); 546 547paddr_t 548pmap_kextract(vaddr_t va) 549{ 550 int i; 551 paddr_t paddr = (paddr_t)-1; 552 553 for (i = 0; i < kernel_tlb_slots; i++) { 554 if ((va & ~PAGE_MASK_4M) == kernel_tlbs[i].te_va) { 555 paddr = kernel_tlbs[i].te_pa + 556 (paddr_t)(va & PAGE_MASK_4M); 557 break; 558 } 559 } 560 561 if (i == kernel_tlb_slots) { 562 panic("pmap_kextract: Address %p is not from kernel space.\n" 563 "Data segment is too small?\n", (void*)va); 564 } 565 566 return (paddr); 567} 568 569/* 570 * Bootstrap kernel allocator, allocates from unused space in 4MB kernel 571 * data segment meaning that 572 * 573 * - Access to allocated memory will never generate a trap 574 * - Allocated chunks are never reclaimed or freed 575 * - Allocation calls do not change PROM memlists 576 */ 577static struct mem_region kdata_mem_pool; 578 579static void 580kdata_alloc_init(vaddr_t va_start, vaddr_t va_end) 581{ 582 vsize_t va_size = va_end - va_start; 583 584 kdata_mem_pool.start = va_start; 585 kdata_mem_pool.size = va_size; 586 587 BDPRINTF(PDB_BOOT, ("kdata_alloc_init(): %d bytes @%p.\n", va_size, 588 va_start)); 589} 590 591static vaddr_t 592kdata_alloc(vsize_t size, vsize_t align) 593{ 594 vaddr_t va; 595 vsize_t asize; 596 597 asize = roundup(kdata_mem_pool.start, align) - kdata_mem_pool.start; 598 599 kdata_mem_pool.start += asize; 600 kdata_mem_pool.size -= asize; 601 602 if (kdata_mem_pool.size < size) { 603 panic("kdata_alloc(): Data segment is too small.\n"); 604 } 605 606 va = kdata_mem_pool.start; 607 kdata_mem_pool.start += size; 608 kdata_mem_pool.size -= size; 609 610 BDPRINTF(PDB_BOOT, ("kdata_alloc(): Allocated %d@%p, %d free.\n", 611 size, (void*)va, kdata_mem_pool.size)); 612 613 return (va); 614} 615 616/* 617 * Unified routine for reading PROM properties. 618 */ 619static void 620pmap_read_memlist(const char *device, const char *property, void **ml, 621 int *ml_size, vaddr_t (* ml_alloc)(vsize_t, vsize_t)) 622{ 623 void *va; 624 int size, handle; 625 626 if ( (handle = prom_finddevice(device)) == 0) { 627 prom_printf("pmap_read_memlist(): No %s device found.\n", 628 device); 629 prom_halt(); 630 } 631 if ( (size = OF_getproplen(handle, property)) < 0) { 632 prom_printf("pmap_read_memlist(): %s/%s has no length.\n", 633 device, property); 634 prom_halt(); 635 } 636 if ( (va = (void*)(* ml_alloc)(size, sizeof(uint64_t))) == NULL) { 637 prom_printf("pmap_read_memlist(): Cannot allocate memlist.\n"); 638 prom_halt(); 639 } 640 if (OF_getprop(handle, property, va, size) <= 0) { 641 prom_printf("pmap_read_memlist(): Cannot read %s/%s.\n", 642 device, property); 643 prom_halt(); 644 } 645 646 *ml = va; 647 *ml_size = size; 648} 649 650/* 651 * This is called during bootstrap, before the system is really initialized. 652 * 653 * It's called with the start and end virtual addresses of the kernel. We 654 * bootstrap the pmap allocator now. We will allocate the basic structures we 655 * need to bootstrap the VM system here: the page frame tables, the TSB, and 656 * the free memory lists. 657 * 658 * Now all this is becoming a bit obsolete. maxctx is still important, but by 659 * separating the kernel text and data segments we really would need to 660 * provide the start and end of each segment. But we can't. The rodata 661 * segment is attached to the end of the kernel segment and has nothing to 662 * delimit its end. We could still pass in the beginning of the kernel and 663 * the beginning and end of the data segment but we could also just as easily 664 * calculate that all in here. 665 * 666 * To handle the kernel text, we need to do a reverse mapping of the start of 667 * the kernel, then traverse the free memory lists to find out how big it is. 668 */ 669 670void 671pmap_bootstrap(u_long kernelstart, u_long kernelend) 672{ 673#ifdef MODULAR 674 extern vaddr_t module_start, module_end; 675#endif 676 extern char etext[], data_start[]; /* start of data segment */ 677 extern int msgbufmapped; 678 struct mem_region *mp, *mp1, *avail, *orig; 679 int i, j, pcnt, msgbufsiz; 680 size_t s, sz; 681 int64_t data; 682 vaddr_t va, intstk; 683 uint64_t phys_msgbuf; 684 paddr_t newp = 0; 685 686 void *prom_memlist; 687 int prom_memlist_size; 688 689 BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\n")); 690 691 cache_setup_funcs(); 692 693 /* 694 * Calculate kernel size. 695 */ 696 ktext = kernelstart; 697 ktextp = pmap_kextract(ktext); 698 ektext = roundup((vaddr_t)etext, PAGE_SIZE_4M); 699 ektextp = roundup(pmap_kextract((vaddr_t)etext), PAGE_SIZE_4M); 700 701 kdata = (vaddr_t)data_start; 702 kdatap = pmap_kextract(kdata); 703 ekdata = roundup(kernelend, PAGE_SIZE_4M); 704 ekdatap = roundup(pmap_kextract(kernelend), PAGE_SIZE_4M); 705 706 BDPRINTF(PDB_BOOT, ("Virtual layout: text %lx-%lx, data %lx-%lx.\n", 707 ktext, ektext, kdata, ekdata)); 708 BDPRINTF(PDB_BOOT, ("Physical layout: text %lx-%lx, data %lx-%lx.\n", 709 ktextp, ektextp, kdatap, ekdatap)); 710 711 /* Initialize bootstrap allocator. */ 712 kdata_alloc_init(kernelend + 1 * 1024 * 1024, ekdata); 713 714#ifdef DEBUG 715 pmap_bootdebug(); 716#endif 717 718 pmap_alloc_bootargs(); 719 pmap_mp_init(); 720 721 /* 722 * set machine page size 723 */ 724 uvmexp.pagesize = NBPG; 725 uvmexp.ncolors = pmap_calculate_colors(); 726 uvm_setpagesize(); 727 728 /* 729 * Get hold or the message buffer. 730 */ 731 msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA; 732/* XXXXX -- increase msgbufsiz for uvmhist printing */ 733 msgbufsiz = 4*PAGE_SIZE /* round_page(sizeof(struct msgbuf)) */; 734 BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\n", 735 (long)msgbufp, (long)msgbufsiz)); 736 if ((long)msgbufp != 737 (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz))) 738 prom_printf( 739 "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\n", 740 (void *)msgbufp, (long)phys_msgbuf); 741 phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN); 742 BDPRINTF(PDB_BOOT, 743 ("We should have the memory at %lx, let's map it in\n", 744 phys_msgbuf)); 745 if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, 746 -1/* sunos does this */) == -1) { 747 prom_printf("Failed to map msgbuf\n"); 748 } else { 749 BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\n", 750 (void *)msgbufp)); 751 } 752 msgbufmapped = 1; /* enable message buffer */ 753 initmsgbuf((void *)msgbufp, msgbufsiz); 754 755 /* 756 * Find out how much RAM we have installed. 757 */ 758 BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\n")); 759 pmap_read_memlist("/memory", "reg", &prom_memlist, &prom_memlist_size, 760 kdata_alloc); 761 phys_installed = prom_memlist; 762 phys_installed_size = prom_memlist_size / sizeof(*phys_installed); 763 764#ifdef DEBUG 765 if (pmapdebug & PDB_BOOT1) { 766 /* print out mem list */ 767 prom_printf("Installed physical memory:\n"); 768 for (i = 0; i < phys_installed_size; i++) { 769 prom_printf("memlist start %lx size %lx\n", 770 (u_long)phys_installed[i].start, 771 (u_long)phys_installed[i].size); 772 } 773 } 774#endif 775 776 BDPRINTF(PDB_BOOT1, ("Calculating physmem:")); 777 for (i = 0; i < phys_installed_size; i++) 778 physmem += btoc(phys_installed[i].size); 779 BDPRINTF(PDB_BOOT1, (" result %x or %d pages\n", 780 (int)physmem, (int)physmem)); 781 782 /* 783 * Calculate approx TSB size. This probably needs tweaking. 784 */ 785 if (physmem < btoc(64 * 1024 * 1024)) 786 tsbsize = 0; 787 else if (physmem < btoc(512 * 1024 * 1024)) 788 tsbsize = 1; 789 else 790 tsbsize = 2; 791 792 /* 793 * Save the prom translations 794 */ 795 pmap_read_memlist("/virtual-memory", "translations", &prom_memlist, 796 &prom_memlist_size, kdata_alloc); 797 prom_map = prom_memlist; 798 prom_map_size = prom_memlist_size / sizeof(struct prom_map); 799 800#ifdef DEBUG 801 if (pmapdebug & PDB_BOOT) { 802 /* print out mem list */ 803 prom_printf("Prom xlations:\n"); 804 for (i = 0; i < prom_map_size; i++) { 805 prom_printf("start %016lx size %016lx tte %016lx\n", 806 (u_long)prom_map[i].vstart, 807 (u_long)prom_map[i].vsize, 808 (u_long)prom_map[i].tte); 809 } 810 prom_printf("End of prom xlations\n"); 811 } 812#endif 813 814 /* 815 * Here's a quick in-lined reverse bubble sort. It gets rid of 816 * any translations inside the kernel data VA range. 817 */ 818 for (i = 0; i < prom_map_size; i++) { 819 for (j = i; j < prom_map_size; j++) { 820 if (prom_map[j].vstart > prom_map[i].vstart) { 821 struct prom_map tmp; 822 823 tmp = prom_map[i]; 824 prom_map[i] = prom_map[j]; 825 prom_map[j] = tmp; 826 } 827 } 828 } 829#ifdef DEBUG 830 if (pmapdebug & PDB_BOOT) { 831 /* print out mem list */ 832 prom_printf("Prom xlations:\n"); 833 for (i = 0; i < prom_map_size; i++) { 834 prom_printf("start %016lx size %016lx tte %016lx\n", 835 (u_long)prom_map[i].vstart, 836 (u_long)prom_map[i].vsize, 837 (u_long)prom_map[i].tte); 838 } 839 prom_printf("End of prom xlations\n"); 840 } 841#endif 842 843 /* 844 * Allocate a ncpu*64KB page for the cpu_info & stack structure now. 845 */ 846 cpu0paddr = prom_alloc_phys(8 * PAGE_SIZE * sparc_ncpus, 8 * PAGE_SIZE); 847 if (cpu0paddr == 0) { 848 prom_printf("Cannot allocate cpu_infos\n"); 849 prom_halt(); 850 } 851 852 /* 853 * Now the kernel text segment is in its final location we can try to 854 * find out how much memory really is free. 855 */ 856 pmap_read_memlist("/memory", "available", &prom_memlist, 857 &prom_memlist_size, kdata_alloc); 858 orig = prom_memlist; 859 sz = prom_memlist_size; 860 pcnt = prom_memlist_size / sizeof(*orig); 861 862 BDPRINTF(PDB_BOOT1, ("Available physical memory:\n")); 863 avail = (struct mem_region*)kdata_alloc(sz, sizeof(uint64_t)); 864 for (i = 0; i < pcnt; i++) { 865 avail[i] = orig[i]; 866 BDPRINTF(PDB_BOOT1, ("memlist start %lx size %lx\n", 867 (u_long)orig[i].start, 868 (u_long)orig[i].size)); 869 } 870 BDPRINTF(PDB_BOOT1, ("End of available physical memory\n")); 871 872 BDPRINTF(PDB_BOOT, ("ktext %08lx[%08lx] - %08lx[%08lx] : " 873 "kdata %08lx[%08lx] - %08lx[%08lx]\n", 874 (u_long)ktext, (u_long)ktextp, 875 (u_long)ektext, (u_long)ektextp, 876 (u_long)kdata, (u_long)kdatap, 877 (u_long)ekdata, (u_long)ekdatap)); 878#ifdef DEBUG 879 if (pmapdebug & PDB_BOOT1) { 880 /* print out mem list */ 881 prom_printf("Available %lx physical memory before cleanup:\n", 882 (u_long)avail); 883 for (i = 0; i < pcnt; i++) { 884 prom_printf("memlist start %lx size %lx\n", 885 (u_long)avail[i].start, 886 (u_long)avail[i].size); 887 } 888 prom_printf("End of available physical memory before cleanup\n"); 889 prom_printf("kernel physical text size %08lx - %08lx\n", 890 (u_long)ktextp, (u_long)ektextp); 891 prom_printf("kernel physical data size %08lx - %08lx\n", 892 (u_long)kdatap, (u_long)ekdatap); 893 } 894#endif 895 /* 896 * Here's a another quick in-lined bubble sort. 897 */ 898 for (i = 0; i < pcnt; i++) { 899 for (j = i; j < pcnt; j++) { 900 if (avail[j].start < avail[i].start) { 901 struct mem_region tmp; 902 tmp = avail[i]; 903 avail[i] = avail[j]; 904 avail[j] = tmp; 905 } 906 } 907 } 908 909 /* Throw away page zero if we have it. */ 910 if (avail->start == 0) { 911 avail->start += PAGE_SIZE; 912 avail->size -= PAGE_SIZE; 913 } 914 915 /* 916 * Now we need to remove the area we valloc'ed from the available 917 * memory lists. (NB: we may have already alloc'ed the entire space). 918 */ 919 npgs = 0; 920 for (mp = avail, i = 0; i < pcnt; i++, mp = &avail[i]) { 921 /* 922 * Now page align the start of the region. 923 */ 924 s = mp->start % PAGE_SIZE; 925 if (mp->size >= s) { 926 mp->size -= s; 927 mp->start += s; 928 } 929 /* 930 * And now align the size of the region. 931 */ 932 mp->size -= mp->size % PAGE_SIZE; 933 /* 934 * Check whether some memory is left here. 935 */ 936 if (mp->size == 0) { 937 memcpy(mp, mp + 1, 938 (pcnt - (mp - avail)) * sizeof *mp); 939 pcnt--; 940 mp--; 941 continue; 942 } 943 s = mp->start; 944 sz = mp->size; 945 npgs += btoc(sz); 946 for (mp1 = avail; mp1 < mp; mp1++) 947 if (s < mp1->start) 948 break; 949 if (mp1 < mp) { 950 memcpy(mp1 + 1, mp1, (char *)mp - (char *)mp1); 951 mp1->start = s; 952 mp1->size = sz; 953 } 954#ifdef DEBUG 955/* Clear all memory we give to the VM system. I want to make sure 956 * the PROM isn't using it for something, so this should break the PROM. 957 */ 958 959/* Calling pmap_zero_page() at this point also hangs some machines 960 * so don't do it at all. -- pk 26/02/2002 961 */ 962#if 0 963 { 964 paddr_t p; 965 for (p = mp->start; p < mp->start+mp->size; 966 p += PAGE_SIZE) 967 pmap_zero_page(p); 968 } 969#endif 970#endif /* DEBUG */ 971 /* 972 * In future we should be able to specify both allocated 973 * and free. 974 */ 975 BDPRINTF(PDB_BOOT1, ("uvm_page_physload(%lx, %lx)\n", 976 (long)mp->start, 977 (long)(mp->start + mp->size))); 978 uvm_page_physload( 979 atop(mp->start), 980 atop(mp->start+mp->size), 981 atop(mp->start), 982 atop(mp->start+mp->size), 983 VM_FREELIST_DEFAULT); 984 } 985 986#ifdef DEBUG 987 if (pmapdebug & PDB_BOOT) { 988 /* print out mem list */ 989 prom_printf("Available physical memory after cleanup:\n"); 990 for (i = 0; i < pcnt; i++) { 991 prom_printf("avail start %lx size %lx\n", 992 (long)avail[i].start, (long)avail[i].size); 993 } 994 prom_printf("End of available physical memory after cleanup\n"); 995 } 996#endif 997 /* 998 * Allocate and clear out pmap_kernel()->pm_segs[] 999 */ 1000 pmap_kernel()->pm_refs = 1; 1001 memset(&pmap_kernel()->pm_ctx, 0, sizeof(pmap_kernel()->pm_ctx)); 1002 1003 /* Throw away page zero */ 1004 do { 1005 pmap_get_page(&newp); 1006 } while (!newp); 1007 pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp; 1008 pmap_kernel()->pm_physaddr = newp; 1009 1010 /* 1011 * finish filling out kernel pmap. 1012 */ 1013 1014 BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\n", 1015 (long)pmap_kernel()->pm_physaddr)); 1016 /* 1017 * Tell pmap about our mesgbuf -- Hope this works already 1018 */ 1019#ifdef DEBUG 1020 BDPRINTF(PDB_BOOT1, ("Calling consinit()\n")); 1021 if (pmapdebug & PDB_BOOT1) 1022 consinit(); 1023 BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\n")); 1024#endif 1025 /* it's not safe to call pmap_enter so we need to do this ourselves */ 1026 va = (vaddr_t)msgbufp; 1027 prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1); 1028 while (msgbufsiz) { 1029 data = TSB_DATA(0 /* global */, 1030 PGSZ_8K, 1031 phys_msgbuf, 1032 1 /* priv */, 1033 1 /* Write */, 1034 1 /* Cacheable */, 1035 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1036 1 /* valid */, 1037 0 /* IE */); 1038 pmap_enter_kpage(va, data); 1039 va += PAGE_SIZE; 1040 msgbufsiz -= PAGE_SIZE; 1041 phys_msgbuf += PAGE_SIZE; 1042 } 1043 BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\n")); 1044 1045 BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\n")); 1046 for (i = 0; i < prom_map_size; i++) 1047 if (prom_map[i].vstart && ((prom_map[i].vstart >> 32) == 0)) 1048 for (j = 0; j < prom_map[i].vsize; j += PAGE_SIZE) { 1049 int k; 1050 1051 for (k = 0; page_size_map[k].mask; k++) { 1052 if (((prom_map[i].vstart | 1053 prom_map[i].tte) & 1054 page_size_map[k].mask) == 0 && 1055 page_size_map[k].mask < 1056 prom_map[i].vsize) 1057 break; 1058 } 1059#ifdef DEBUG 1060 page_size_map[k].use++; 1061#endif 1062 /* Enter PROM map into pmap_kernel() */ 1063 pmap_enter_kpage(prom_map[i].vstart + j, 1064 (prom_map[i].tte + j) | TLB_EXEC | 1065 page_size_map[k].code); 1066 } 1067 BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\n")); 1068 1069 /* 1070 * Fix up start of kernel heap. 1071 */ 1072 vmmap = (vaddr_t)roundup(ekdata, 4*MEG); 1073 /* Let's keep 1 page of redzone after the kernel */ 1074 vmmap += PAGE_SIZE; 1075 { 1076 extern void main(void); 1077 vaddr_t u0va; 1078 paddr_t pa; 1079 1080 u0va = vmmap; 1081 1082 BDPRINTF(PDB_BOOT1, 1083 ("Inserting lwp0 USPACE into pmap_kernel() at %p\n", 1084 vmmap)); 1085 1086 while (vmmap < u0va + 2*USPACE) { 1087 int64_t data1; 1088 1089 if (!pmap_get_page(&pa)) 1090 panic("pmap_bootstrap: no pages"); 1091 prom_map_phys(pa, PAGE_SIZE, vmmap, -1); 1092 data1 = TSB_DATA(0 /* global */, 1093 PGSZ_8K, 1094 pa, 1095 1 /* priv */, 1096 1 /* Write */, 1097 1 /* Cacheable */, 1098 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1099 1 /* valid */, 1100 0 /* IE */); 1101 pmap_enter_kpage(vmmap, data1); 1102 vmmap += PAGE_SIZE; 1103 } 1104 BDPRINTF(PDB_BOOT1, 1105 ("Done inserting stack 0 into pmap_kernel()\n")); 1106 1107 /* Now map in and initialize our cpu_info structure */ 1108#ifdef DIAGNOSTIC 1109 vmmap += PAGE_SIZE; /* redzone -- XXXX do we need one? */ 1110#endif 1111 if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK) 1112 vmmap += PAGE_SIZE; /* Matchup virtual color for D$ */ 1113 intstk = vmmap; 1114 cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK); 1115 1116 BDPRINTF(PDB_BOOT1, 1117 ("Inserting cpu_info into pmap_kernel() at %p\n", 1118 cpus)); 1119 /* Now map in all 8 pages of interrupt stack/cpu_info */ 1120 pa = cpu0paddr; 1121 prom_map_phys(pa, 64*KB, vmmap, -1); 1122 1123 /* 1124 * Also map it in as the interrupt stack. 1125 * This lets the PROM see this if needed. 1126 * 1127 * XXXX locore.s does not flush these mappings 1128 * before installing the locked TTE. 1129 */ 1130 prom_map_phys(pa, 64*KB, INTSTACK, -1); 1131 for (i = 0; i < 8; i++) { 1132 int64_t data1; 1133 1134 data1 = TSB_DATA(0 /* global */, 1135 PGSZ_8K, 1136 pa, 1137 1 /* priv */, 1138 1 /* Write */, 1139 1 /* Cacheable */, 1140 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1141 1 /* valid */, 1142 0 /* IE */); 1143 pmap_enter_kpage(vmmap, data1); 1144 vmmap += PAGE_SIZE; 1145 pa += PAGE_SIZE; 1146 } 1147 BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\n")); 1148 1149 /* Initialize our cpu_info structure */ 1150 memset((void *)intstk, 0, 64 * KB); 1151 cpus->ci_self = cpus; 1152 cpus->ci_next = NULL; 1153 cpus->ci_curlwp = &lwp0; 1154 cpus->ci_flags = CPUF_PRIMARY; 1155 cpus->ci_cpuid = CPU_UPAID; 1156 cpus->ci_fplwp = NULL; 1157 cpus->ci_eintstack = NULL; 1158 cpus->ci_spinup = main; /* Call main when we're running. */ 1159 cpus->ci_paddr = cpu0paddr; 1160 cpus->ci_cpcb = (struct pcb *)u0va; 1161 cpus->ci_idepth = -1; 1162 memset(cpus->ci_intrpending, -1, sizeof(cpus->ci_intrpending)); 1163 1164 uvm_lwp_setuarea(&lwp0, u0va); 1165 lwp0.l_md.md_tf = (struct trapframe64*)(u0va + USPACE 1166 - sizeof(struct trapframe64)); 1167 1168 cpu0paddr += 64 * KB; 1169 1170 CPUSET_CLEAR(cpus_active); 1171 CPUSET_ADD(cpus_active, 0); 1172 1173 cpu_pmap_prepare(cpus, true); 1174 cpu_pmap_init(cpus); 1175 1176 /* The rest will be done at CPU attach time. */ 1177 BDPRINTF(PDB_BOOT1, 1178 ("Done inserting cpu_info into pmap_kernel()\n")); 1179 } 1180 1181 vmmap = (vaddr_t)reserve_dumppages((void *)(u_long)vmmap); 1182 1183#ifdef MODULAR 1184 /* 1185 * Reserve 16 MB of VA for module loading. Right now our full 1186 * GENERIC kernel is about 13 MB, so this looks good enough. 1187 * If we make this bigger, we should adjust the KERNEND and 1188 * associated defines in param.h. 1189 */ 1190 module_start = vmmap; 1191 vmmap += 16 * 1024*1024; 1192 module_end = vmmap; 1193#endif 1194 1195 /* 1196 * Set up bounds of allocatable memory for vmstat et al. 1197 */ 1198 avail_start = avail->start; 1199 for (mp = avail; mp->size; mp++) 1200 avail_end = mp->start+mp->size; 1201 1202 BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\n")); 1203 1204 BDPRINTF(PDB_BOOT, ("left kdata: %" PRId64 " @%" PRIx64 ".\n", 1205 kdata_mem_pool.size, kdata_mem_pool.start)); 1206} 1207 1208/* 1209 * Allocate TSBs for both mmus from the locked kernel data segment page. 1210 * This is run before the cpu itself is activated (or by the first cpu 1211 * itself) 1212 */ 1213void 1214cpu_pmap_prepare(struct cpu_info *ci, bool initial) 1215{ 1216 /* allocate our TSBs */ 1217 ci->ci_tsb_dmmu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE); 1218 ci->ci_tsb_immu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE); 1219 memset(ci->ci_tsb_dmmu, 0, TSBSIZE); 1220 memset(ci->ci_tsb_immu, 0, TSBSIZE); 1221 if (!initial) { 1222 KASSERT(ci != curcpu()); 1223 /* 1224 * Initially share ctxbusy with the boot cpu, the 1225 * cpu will replace it as soon as it runs (and can 1226 * probe the number of available contexts itself). 1227 * Untill then only context 0 (aka kernel) will be 1228 * referenced anyway. 1229 */ 1230 ci->ci_numctx = curcpu()->ci_numctx; 1231 ci->ci_ctxbusy = curcpu()->ci_ctxbusy; 1232 } 1233 1234 BDPRINTF(PDB_BOOT1, ("cpu %d: TSB allocated at %p/%p size %08x\n", 1235 ci->ci_index, ci->ci_tsb_dmmu, ci->ci_tsb_immu, TSBSIZE)); 1236} 1237 1238/* 1239 * Initialize the per CPU parts for the cpu running this code. 1240 */ 1241void 1242cpu_pmap_init(struct cpu_info *ci) 1243{ 1244 size_t ctxsize; 1245 1246 /* 1247 * We delay initialising ci_ctx_lock here as LOCKDEBUG isn't 1248 * running for cpu0 yet.. 1249 */ 1250 ci->ci_pmap_next_ctx = 1; 1251#ifdef SUN4V 1252#error find out if we have 16 or 13 bit context ids 1253#else 1254 ci->ci_numctx = 0x2000; /* all SUN4U use 13 bit contexts */ 1255#endif 1256 ctxsize = sizeof(paddr_t)*ci->ci_numctx; 1257 ci->ci_ctxbusy = (paddr_t *)kdata_alloc(ctxsize, sizeof(uint64_t)); 1258 memset(ci->ci_ctxbusy, 0, ctxsize); 1259 LIST_INIT(&ci->ci_pmap_ctxlist); 1260 1261 /* mark kernel context as busy */ 1262 ci->ci_ctxbusy[0] = pmap_kernel()->pm_physaddr; 1263} 1264 1265/* 1266 * Initialize anything else for pmap handling. 1267 * Called during vm_init(). 1268 */ 1269void 1270pmap_init(void) 1271{ 1272 struct vm_page *pg; 1273 struct pglist pglist; 1274 uint64_t data; 1275 paddr_t pa; 1276 psize_t size; 1277 vaddr_t va; 1278 1279 BDPRINTF(PDB_BOOT1, ("pmap_init()\n")); 1280 1281 size = sizeof(struct pv_entry) * physmem; 1282 if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1, 1283 (paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0) 1284 panic("pmap_init: no memory"); 1285 1286 va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY); 1287 if (va == 0) 1288 panic("pmap_init: no memory"); 1289 1290 /* Map the pages */ 1291 TAILQ_FOREACH(pg, &pglist, pageq.queue) { 1292 pa = VM_PAGE_TO_PHYS(pg); 1293 pmap_zero_page(pa); 1294 data = TSB_DATA(0 /* global */, 1295 PGSZ_8K, 1296 pa, 1297 1 /* priv */, 1298 1 /* Write */, 1299 1 /* Cacheable */, 1300 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1301 1 /* valid */, 1302 0 /* IE */); 1303 pmap_enter_kpage(va, data); 1304 va += PAGE_SIZE; 1305 } 1306 1307 /* 1308 * initialize the pmap pools. 1309 */ 1310 pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 1311 SPARC64_BLOCK_SIZE, 0, 0, "pmappl", NULL, IPL_NONE, NULL, NULL, 1312 NULL); 1313 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1314 PR_LARGECACHE, "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL); 1315 1316 vm_first_phys = avail_start; 1317 vm_num_phys = avail_end - avail_start; 1318 1319 mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE); 1320#if defined(USE_LOCKSAFE_PSEG_GETSET) 1321 mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM); 1322#endif 1323 lock_available = true; 1324} 1325 1326/* 1327 * How much virtual space is available to the kernel? 1328 */ 1329static vaddr_t kbreak; /* End of kernel VA */ 1330void 1331pmap_virtual_space(vaddr_t *start, vaddr_t *end) 1332{ 1333 1334 /* 1335 * Reserve one segment for kernel virtual memory 1336 */ 1337 /* Reserve two pages for pmap_copy_page && /dev/mem */ 1338 *start = kbreak = (vaddr_t)(vmmap + 2*PAGE_SIZE); 1339 *end = VM_MAX_KERNEL_ADDRESS; 1340 BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\n", *start, *end)); 1341} 1342 1343/* 1344 * Preallocate kernel page tables to a specified VA. 1345 * This simply loops through the first TTE for each 1346 * page table from the beginning of the kernel pmap, 1347 * reads the entry, and if the result is 1348 * zero (either invalid entry or no page table) it stores 1349 * a zero there, populating page tables in the process. 1350 * This is not the most efficient technique but i don't 1351 * expect it to be called that often. 1352 */ 1353vaddr_t 1354pmap_growkernel(vaddr_t maxkvaddr) 1355{ 1356 struct pmap *pm = pmap_kernel(); 1357 paddr_t pa; 1358 1359 if (maxkvaddr >= KERNEND) { 1360 printf("WARNING: cannot extend kernel pmap beyond %p to %p\n", 1361 (void *)KERNEND, (void *)maxkvaddr); 1362 return (kbreak); 1363 } 1364 DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr)); 1365 /* Align with the start of a page table */ 1366 for (kbreak &= (-1 << PDSHIFT); kbreak < maxkvaddr; 1367 kbreak += (1 << PDSHIFT)) { 1368 if (pseg_get(pm, kbreak) & TLB_V) 1369 continue; 1370 1371 pa = 0; 1372 while (pseg_set(pm, kbreak, 0, pa) & 1) { 1373 DPRINTF(PDB_GROW, 1374 ("pmap_growkernel: extending %lx\n", kbreak)); 1375 pa = 0; 1376 if (!pmap_get_page(&pa)) 1377 panic("pmap_growkernel: no pages"); 1378 ENTER_STAT(ptpneeded); 1379 } 1380 } 1381 return (kbreak); 1382} 1383 1384/* 1385 * Create and return a physical map. 1386 */ 1387struct pmap * 1388pmap_create(void) 1389{ 1390 struct pmap *pm; 1391 1392 DPRINTF(PDB_CREATE, ("pmap_create()\n")); 1393 1394 pm = pool_cache_get(&pmap_cache, PR_WAITOK); 1395 memset(pm, 0, sizeof *pm); 1396 DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm)); 1397 1398 mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE); 1399 uvm_obj_init(&pm->pm_obj, NULL, false, 1); 1400 uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock); 1401 1402 if (pm != pmap_kernel()) { 1403 while (!pmap_get_page(&pm->pm_physaddr)) { 1404 uvm_wait("pmap_create"); 1405 } 1406 pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr; 1407 } 1408 DPRINTF(PDB_CREATE, ("pmap_create(%p): ctx %d\n", pm, pmap_ctx(pm))); 1409 return pm; 1410} 1411 1412/* 1413 * Add a reference to the given pmap. 1414 */ 1415void 1416pmap_reference(struct pmap *pm) 1417{ 1418 1419 atomic_inc_uint(&pm->pm_refs); 1420} 1421 1422/* 1423 * Retire the given pmap from service. 1424 * Should only be called if the map contains no valid mappings. 1425 */ 1426void 1427pmap_destroy(struct pmap *pm) 1428{ 1429#ifdef MULTIPROCESSOR 1430 struct cpu_info *ci; 1431 sparc64_cpuset_t pmap_cpus_active; 1432#else 1433#define pmap_cpus_active 0 1434#endif 1435 struct vm_page *pg, *nextpg; 1436 1437 if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) { 1438 return; 1439 } 1440 DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm)); 1441#ifdef MULTIPROCESSOR 1442 CPUSET_CLEAR(pmap_cpus_active); 1443 for (ci = cpus; ci != NULL; ci = ci->ci_next) { 1444 /* XXXMRG: Move the lock inside one or both tests? */ 1445 mutex_enter(&ci->ci_ctx_lock); 1446 if (CPUSET_HAS(cpus_active, ci->ci_index)) { 1447 if (pm->pm_ctx[ci->ci_index] > 0) { 1448 CPUSET_ADD(pmap_cpus_active, ci->ci_index); 1449 ctx_free(pm, ci); 1450 } 1451 } 1452 mutex_exit(&ci->ci_ctx_lock); 1453 } 1454#else 1455 if (pmap_ctx(pm)) { 1456 mutex_enter(&curcpu()->ci_ctx_lock); 1457 ctx_free(pm, curcpu()); 1458 mutex_exit(&curcpu()->ci_ctx_lock); 1459 } 1460#endif 1461 1462 /* we could be a little smarter and leave pages zeroed */ 1463 for (pg = TAILQ_FIRST(&pm->pm_obj.memq); pg != NULL; pg = nextpg) { 1464#ifdef DIAGNOSTIC 1465 struct vm_page_md *md = VM_PAGE_TO_MD(pg); 1466#endif 1467 1468 KASSERT((pg->flags & PG_MARKER) == 0); 1469 nextpg = TAILQ_NEXT(pg, listq.queue); 1470 TAILQ_REMOVE(&pm->pm_obj.memq, pg, listq.queue); 1471 KASSERT(md->mdpg_pvh.pv_pmap == NULL); 1472 dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active); 1473 uvm_pagefree(pg); 1474 } 1475 pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active); 1476 1477 uvm_obj_destroy(&pm->pm_obj, false); 1478 mutex_destroy(&pm->pm_obj_lock); 1479 pool_cache_put(&pmap_cache, pm); 1480} 1481 1482/* 1483 * Copy the range specified by src_addr/len 1484 * from the source map to the range dst_addr/len 1485 * in the destination map. 1486 * 1487 * This routine is only advisory and need not do anything. 1488 */ 1489void 1490pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr) 1491{ 1492 1493 DPRINTF(PDB_CREATE, ("pmap_copy(%p, %p, %p, %lx, %p)\n", 1494 dst_pmap, src_pmap, (void *)(u_long)dst_addr, 1495 (u_long)len, (void *)(u_long)src_addr)); 1496} 1497 1498/* 1499 * Activate the address space for the specified process. If the 1500 * process is the current process, load the new MMU context. 1501 */ 1502void 1503pmap_activate(struct lwp *l) 1504{ 1505 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 1506 1507 if (pmap == pmap_kernel()) { 1508 return; 1509 } 1510 1511 /* 1512 * This is essentially the same thing that happens in cpu_switchto() 1513 * when the newly selected process is about to run, except that we 1514 * have to make sure to clean the register windows before we set 1515 * the new context. 1516 */ 1517 1518 if (l != curlwp) { 1519 return; 1520 } 1521 write_user_windows(); 1522 pmap_activate_pmap(pmap); 1523} 1524 1525void 1526pmap_activate_pmap(struct pmap *pmap) 1527{ 1528 1529 if (pmap_ctx(pmap) == 0) { 1530 (void) ctx_alloc(pmap); 1531 } 1532 dmmu_set_secondary_context(pmap_ctx(pmap)); 1533} 1534 1535/* 1536 * Deactivate the address space of the specified process. 1537 */ 1538void 1539pmap_deactivate(struct lwp *l) 1540{ 1541} 1542 1543/* 1544 * pmap_kenter_pa: [ INTERFACE ] 1545 * 1546 * Enter a va -> pa mapping into the kernel pmap without any 1547 * physical->virtual tracking. 1548 * 1549 * Note: no locking is necessary in this function. 1550 */ 1551void 1552pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1553{ 1554 pte_t tte; 1555 paddr_t ptp; 1556 struct pmap *pm = pmap_kernel(); 1557 int i; 1558 1559 KASSERT(va < INTSTACK || va > EINTSTACK); 1560 KASSERT(va < kdata || va > ekdata); 1561 1562 /* 1563 * Construct the TTE. 1564 */ 1565 1566 ENTER_STAT(unmanaged); 1567 if (pa & (PMAP_NVC|PMAP_NC)) { 1568 ENTER_STAT(ci); 1569 } 1570 1571 tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */, 1572 (VM_PROT_WRITE & prot), 1573 !(pa & PMAP_NC), pa & (PMAP_NVC), 1, 0); 1574 /* We don't track mod/ref here. */ 1575 if (prot & VM_PROT_WRITE) 1576 tte.data |= TLB_REAL_W|TLB_W; 1577 if (prot & VM_PROT_EXECUTE) 1578 tte.data |= TLB_EXEC; 1579 tte.data |= TLB_TSB_LOCK; /* wired */ 1580 ptp = 0; 1581 1582 retry: 1583 i = pseg_set(pm, va, tte.data, ptp); 1584 if (i & 1) { 1585 KASSERT((i & 4) == 0); 1586 ptp = 0; 1587 if (!pmap_get_page(&ptp)) 1588 panic("pmap_kenter_pa: no pages"); 1589 ENTER_STAT(ptpneeded); 1590 goto retry; 1591 } 1592 if (ptp && i == 0) { 1593 /* We allocated a spare page but didn't use it. Free it. */ 1594 printf("pmap_kenter_pa: freeing unused page %llx\n", 1595 (long long)ptp); 1596 pmap_free_page_noflush(ptp); 1597 } 1598#ifdef DEBUG 1599 i = ptelookup_va(va); 1600 if (pmapdebug & PDB_ENTER) 1601 prom_printf("pmap_kenter_pa: va=%08x data=%08x:%08x " 1602 "tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32), 1603 (int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]); 1604 if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) { 1605 prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x " 1606 "data=%08x:%08x tsb_dmmu[%d]=%08x\n", 1607 (int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag, 1608 (int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, 1609 i, &curcpu()->ci_tsb_dmmu[i]); 1610 prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n", 1611 va, (int)(tte.data>>32), (int)tte.data, i, 1612 &curcpu()->ci_tsb_dmmu[i]); 1613 } 1614#endif 1615} 1616 1617/* 1618 * pmap_kremove: [ INTERFACE ] 1619 * 1620 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1621 * for size bytes (assumed to be page rounded). 1622 */ 1623void 1624pmap_kremove(vaddr_t va, vsize_t size) 1625{ 1626 struct pmap *pm = pmap_kernel(); 1627 int64_t data; 1628 paddr_t pa; 1629 int rv; 1630 bool flush = FALSE; 1631 1632 KASSERT(va < INTSTACK || va > EINTSTACK); 1633 KASSERT(va < kdata || va > ekdata); 1634 1635 DPRINTF(PDB_DEMAP, ("pmap_kremove: start 0x%lx size %lx\n", va, size)); 1636 for (; size >= PAGE_SIZE; va += PAGE_SIZE, size -= PAGE_SIZE) { 1637 1638#ifdef DIAGNOSTIC 1639 /* 1640 * Is this part of the permanent 4MB mapping? 1641 */ 1642 if (va >= ktext && va < roundup(ekdata, 4*MEG)) 1643 panic("pmap_kremove: va=%08x in locked TLB", (u_int)va); 1644#endif 1645 1646 data = pseg_get(pm, va); 1647 if ((data & TLB_V) == 0) { 1648 continue; 1649 } 1650 1651 flush = TRUE; 1652 pa = data & TLB_PA_MASK; 1653 1654 /* 1655 * We need to flip the valid bit and 1656 * clear the access statistics. 1657 */ 1658 1659 rv = pseg_set(pm, va, 0, 0); 1660 if (rv & 1) 1661 panic("pmap_kremove: pseg_set needs spare, rv=%d\n", 1662 rv); 1663 DPRINTF(PDB_DEMAP, ("pmap_kremove: seg %x pdir %x pte %x\n", 1664 (int)va_to_seg(va), (int)va_to_dir(va), 1665 (int)va_to_pte(va))); 1666 REMOVE_STAT(removes); 1667 1668 tsb_invalidate(va, pm); 1669 REMOVE_STAT(tflushes); 1670 1671 /* 1672 * Here we assume nothing can get into the TLB 1673 * unless it has a PTE. 1674 */ 1675 1676 tlb_flush_pte(va, pm); 1677 dcache_flush_page_all(pa); 1678 } 1679 if (flush) 1680 REMOVE_STAT(flushes); 1681} 1682 1683/* 1684 * Insert physical page at pa into the given pmap at virtual address va. 1685 * Supports 64-bit pa so we can map I/O space. 1686 */ 1687 1688int 1689pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1690{ 1691 pte_t tte; 1692 int64_t data; 1693 paddr_t opa = 0, ptp; /* XXX: gcc */ 1694 pv_entry_t pvh, npv = NULL, freepv; 1695 struct vm_page *pg, *opg, *ptpg; 1696 int s, i, uncached = 0, error = 0; 1697 int size = PGSZ_8K; /* PMAP_SZ_TO_TTE(pa); */ 1698 bool wired = (flags & PMAP_WIRED) != 0; 1699 bool wasmapped = FALSE; 1700 bool dopv = TRUE; 1701 1702 /* 1703 * Is this part of the permanent mappings? 1704 */ 1705 KASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK); 1706 KASSERT(pm != pmap_kernel() || va < kdata || va > ekdata); 1707 1708 /* Grab a spare PV. */ 1709 freepv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); 1710 if (__predict_false(freepv == NULL)) { 1711 if (flags & PMAP_CANFAIL) 1712 return (ENOMEM); 1713 panic("pmap_enter: no pv entries available"); 1714 } 1715 freepv->pv_next = NULL; 1716 1717 /* 1718 * If a mapping at this address already exists, check if we're 1719 * entering the same PA again. if it's different remove it. 1720 */ 1721 1722 mutex_enter(&pmap_lock); 1723 data = pseg_get(pm, va); 1724 if (data & TLB_V) { 1725 wasmapped = TRUE; 1726 opa = data & TLB_PA_MASK; 1727 if (opa != pa) { 1728 opg = PHYS_TO_VM_PAGE(opa); 1729 if (opg != NULL) { 1730 npv = pmap_remove_pv(pm, va, opg); 1731 } 1732 } 1733 } 1734 1735 /* 1736 * Construct the TTE. 1737 */ 1738 pg = PHYS_TO_VM_PAGE(pa); 1739 if (pg) { 1740 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1741 1742 pvh = &md->mdpg_pvh; 1743 uncached = (pvh->pv_va & (PV_ALIAS|PV_NVC)); 1744#ifdef DIAGNOSTIC 1745 if ((flags & VM_PROT_ALL) & ~prot) 1746 panic("pmap_enter: access_type exceeds prot"); 1747#endif 1748 /* 1749 * If we don't have the traphandler do it, 1750 * set the ref/mod bits now. 1751 */ 1752 if (flags & VM_PROT_ALL) 1753 pvh->pv_va |= PV_REF; 1754 if (flags & VM_PROT_WRITE) 1755 pvh->pv_va |= PV_MOD; 1756 1757 /* 1758 * make sure we have a pv entry ready if we need one. 1759 */ 1760 if (pvh->pv_pmap == NULL || (wasmapped && opa == pa)) { 1761 if (npv != NULL) { 1762 /* free it */ 1763 npv->pv_next = freepv; 1764 freepv = npv; 1765 npv = NULL; 1766 } 1767 if (wasmapped && opa == pa) { 1768 dopv = FALSE; 1769 } 1770 } else if (npv == NULL) { 1771 /* use the pre-allocated pv */ 1772 npv = freepv; 1773 freepv = freepv->pv_next; 1774 } 1775 ENTER_STAT(managed); 1776 } else { 1777 ENTER_STAT(unmanaged); 1778 dopv = FALSE; 1779 if (npv != NULL) { 1780 /* free it */ 1781 npv->pv_next = freepv; 1782 freepv = npv; 1783 npv = NULL; 1784 } 1785 } 1786 1787#ifndef NO_VCACHE 1788 if (pa & PMAP_NVC) 1789#endif 1790 uncached = 1; 1791 if (uncached) { 1792 ENTER_STAT(ci); 1793 } 1794 tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(), 1795 flags & VM_PROT_WRITE, !(pa & PMAP_NC), 1796 uncached, 1, pa & PMAP_LITTLE); 1797#ifdef HWREF 1798 if (prot & VM_PROT_WRITE) 1799 tte.data |= TLB_REAL_W; 1800 if (prot & VM_PROT_EXECUTE) 1801 tte.data |= TLB_EXEC; 1802#else 1803 /* If it needs ref accounting do nothing. */ 1804 if (!(flags & VM_PROT_READ)) { 1805 mutex_exit(&pmap_lock); 1806 goto out; 1807 } 1808#endif 1809 if (flags & VM_PROT_EXECUTE) { 1810 if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0) 1811 tte.data |= TLB_EXEC_ONLY|TLB_EXEC; 1812 else 1813 tte.data |= TLB_EXEC; 1814 } 1815 if (wired) 1816 tte.data |= TLB_TSB_LOCK; 1817 ptp = 0; 1818 1819 retry: 1820 i = pseg_set(pm, va, tte.data, ptp); 1821 if (i & 4) { 1822 /* ptp used as L3 */ 1823 KASSERT(ptp != 0); 1824 KASSERT((i & 3) == 0); 1825 ptpg = PHYS_TO_VM_PAGE(ptp); 1826 if (ptpg) { 1827 ptpg->offset = (uint64_t)va & (0xfffffLL << 23); 1828 TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue); 1829 } else { 1830 KASSERT(pm == pmap_kernel()); 1831 } 1832 } 1833 if (i & 2) { 1834 /* ptp used as L2 */ 1835 KASSERT(ptp != 0); 1836 KASSERT((i & 4) == 0); 1837 ptpg = PHYS_TO_VM_PAGE(ptp); 1838 if (ptpg) { 1839 ptpg->offset = (((uint64_t)va >> 43) & 0x3ffLL) << 13; 1840 TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue); 1841 } else { 1842 KASSERT(pm == pmap_kernel()); 1843 } 1844 } 1845 if (i & 1) { 1846 KASSERT((i & 4) == 0); 1847 ptp = 0; 1848 if (!pmap_get_page(&ptp)) { 1849 mutex_exit(&pmap_lock); 1850 if (flags & PMAP_CANFAIL) { 1851 if (npv != NULL) { 1852 /* free it */ 1853 npv->pv_next = freepv; 1854 freepv = npv; 1855 } 1856 error = ENOMEM; 1857 goto out; 1858 } else { 1859 panic("pmap_enter: no pages"); 1860 } 1861 } 1862 ENTER_STAT(ptpneeded); 1863 goto retry; 1864 } 1865 if (ptp && i == 0) { 1866 /* We allocated a spare page but didn't use it. Free it. */ 1867 printf("pmap_enter: freeing unused page %llx\n", 1868 (long long)ptp); 1869 pmap_free_page_noflush(ptp); 1870 } 1871 if (dopv) { 1872 pmap_enter_pv(pm, va, pa, pg, npv); 1873 } 1874 1875 mutex_exit(&pmap_lock); 1876#ifdef DEBUG 1877 i = ptelookup_va(va); 1878 if (pmapdebug & PDB_ENTER) 1879 prom_printf("pmap_enter: va=%08x data=%08x:%08x " 1880 "tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32), 1881 (int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]); 1882 if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) { 1883 prom_printf("pmap_enter: evicting entry tag=%x:%08x " 1884 "data=%08x:%08x tsb_dmmu[%d]=%08x\n", 1885 (int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag, 1886 (int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, i, 1887 &curcpu()->ci_tsb_dmmu[i]); 1888 prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n", 1889 va, (int)(tte.data>>32), (int)tte.data, i, 1890 &curcpu()->ci_tsb_dmmu[i]); 1891 } 1892#endif 1893 1894 if (flags & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) { 1895 1896 /* 1897 * preload the TSB with the new entry, 1898 * since we're going to need it immediately anyway. 1899 */ 1900 1901 KASSERT(pmap_ctx(pm)>=0); 1902 i = ptelookup_va(va); 1903 tte.tag = TSB_TAG(0, pmap_ctx(pm), va); 1904 s = splhigh(); 1905 if (wasmapped && pmap_is_on_mmu(pm)) { 1906 tsb_invalidate(va, pm); 1907 } 1908 if (flags & (VM_PROT_READ | VM_PROT_WRITE)) { 1909 curcpu()->ci_tsb_dmmu[i].tag = tte.tag; 1910 __asm volatile("" : : : "memory"); 1911 curcpu()->ci_tsb_dmmu[i].data = tte.data; 1912 } 1913 if (flags & VM_PROT_EXECUTE) { 1914 curcpu()->ci_tsb_immu[i].tag = tte.tag; 1915 __asm volatile("" : : : "memory"); 1916 curcpu()->ci_tsb_immu[i].data = tte.data; 1917 } 1918 1919 /* 1920 * it's only necessary to flush the TLB if this page was 1921 * previously mapped, but for some reason it's a lot faster 1922 * for the fork+exit microbenchmark if we always do it. 1923 */ 1924 1925 KASSERT(pmap_ctx(pm)>=0); 1926#ifdef MULTIPROCESSOR 1927 if (wasmapped && pmap_is_on_mmu(pm)) 1928 tlb_flush_pte(va, pm); 1929 else 1930 sp_tlb_flush_pte(va, pmap_ctx(pm)); 1931#else 1932 tlb_flush_pte(va, pm); 1933#endif 1934 splx(s); 1935 } else if (wasmapped && pmap_is_on_mmu(pm)) { 1936 /* Force reload -- protections may be changed */ 1937 KASSERT(pmap_ctx(pm)>=0); 1938 tsb_invalidate(va, pm); 1939 tlb_flush_pte(va, pm); 1940 } 1941 1942 /* We will let the fast mmu miss interrupt load the new translation */ 1943 pv_check(); 1944 out: 1945 /* Catch up on deferred frees. */ 1946 for (; freepv != NULL; freepv = npv) { 1947 npv = freepv->pv_next; 1948 pool_cache_put(&pmap_pv_cache, freepv); 1949 } 1950 return error; 1951} 1952 1953void 1954pmap_remove_all(struct pmap *pm) 1955{ 1956#ifdef MULTIPROCESSOR 1957 struct cpu_info *ci; 1958 sparc64_cpuset_t pmap_cpus_active; 1959#endif 1960 1961 if (pm == pmap_kernel()) { 1962 return; 1963 } 1964 write_user_windows(); 1965 pm->pm_refs = 0; 1966 1967 /* 1968 * XXXMRG: pmap_destroy() does exactly the same dance here. 1969 * surely one of them isn't necessary? 1970 */ 1971#ifdef MULTIPROCESSOR 1972 CPUSET_CLEAR(pmap_cpus_active); 1973 for (ci = cpus; ci != NULL; ci = ci->ci_next) { 1974 /* XXXMRG: Move the lock inside one or both tests? */ 1975 mutex_enter(&ci->ci_ctx_lock); 1976 if (CPUSET_HAS(cpus_active, ci->ci_index)) { 1977 if (pm->pm_ctx[ci->ci_index] > 0) { 1978 CPUSET_ADD(pmap_cpus_active, ci->ci_index); 1979 ctx_free(pm, ci); 1980 } 1981 } 1982 mutex_exit(&ci->ci_ctx_lock); 1983 } 1984#else 1985 if (pmap_ctx(pm)) { 1986 mutex_enter(&curcpu()->ci_ctx_lock); 1987 ctx_free(pm, curcpu()); 1988 mutex_exit(&curcpu()->ci_ctx_lock); 1989 } 1990#endif 1991 1992 REMOVE_STAT(flushes); 1993 /* 1994 * XXXMRG: couldn't we do something less severe here, and 1995 * only flush the right context on each CPU? 1996 */ 1997 blast_dcache(); 1998} 1999 2000/* 2001 * Remove the given range of mapping entries. 2002 */ 2003void 2004pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva) 2005{ 2006 int64_t data; 2007 paddr_t pa; 2008 struct vm_page *pg; 2009 pv_entry_t pv, freepv = NULL; 2010 int rv; 2011 bool flush = FALSE; 2012 2013 /* 2014 * In here we should check each pseg and if there are no more entries, 2015 * free it. It's just that linear scans of 8K pages gets expensive. 2016 */ 2017 2018 KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK); 2019 KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata); 2020 2021 mutex_enter(&pmap_lock); 2022 DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm, 2023 (void *)(u_long)va, (void *)(u_long)endva)); 2024 REMOVE_STAT(calls); 2025 2026 /* Now do the real work */ 2027 for (; va < endva; va += PAGE_SIZE) { 2028#ifdef DIAGNOSTIC 2029 /* 2030 * Is this part of the permanent 4MB mapping? 2031 */ 2032 if (pm == pmap_kernel() && va >= ktext && 2033 va < roundup(ekdata, 4*MEG)) 2034 panic("pmap_remove: va=%08llx in locked TLB", 2035 (long long)va); 2036#endif 2037 2038 data = pseg_get(pm, va); 2039 if ((data & TLB_V) == 0) { 2040 continue; 2041 } 2042 2043 flush = TRUE; 2044 /* First remove the pv entry, if there is one */ 2045 pa = data & TLB_PA_MASK; 2046 pg = PHYS_TO_VM_PAGE(pa); 2047 if (pg) { 2048 pv = pmap_remove_pv(pm, va, pg); 2049 if (pv != NULL) { 2050 /* free it */ 2051 pv->pv_next = freepv; 2052 freepv = pv; 2053 } 2054 } 2055 2056 /* 2057 * We need to flip the valid bit and 2058 * clear the access statistics. 2059 */ 2060 2061 rv = pseg_set(pm, va, 0, 0); 2062 if (rv & 1) 2063 panic("pmap_remove: pseg_set needed spare, rv=%d!\n", 2064 rv); 2065 2066 DPRINTF(PDB_REMOVE, (" clearing seg %x pte %x\n", 2067 (int)va_to_seg(va), (int)va_to_pte(va))); 2068 REMOVE_STAT(removes); 2069 2070 if (pm != pmap_kernel() && !pmap_has_ctx(pm)) 2071 continue; 2072 2073 /* 2074 * if the pmap is being torn down, don't bother flushing, 2075 * we already have done so. 2076 */ 2077 2078 if (!pm->pm_refs) 2079 continue; 2080 2081 /* 2082 * Here we assume nothing can get into the TLB 2083 * unless it has a PTE. 2084 */ 2085 2086 KASSERT(pmap_ctx(pm)>=0); 2087 tsb_invalidate(va, pm); 2088 REMOVE_STAT(tflushes); 2089 tlb_flush_pte(va, pm); 2090 dcache_flush_page_all(pa); 2091 } 2092 if (flush && pm->pm_refs) 2093 REMOVE_STAT(flushes); 2094 DPRINTF(PDB_REMOVE, ("\n")); 2095 pv_check(); 2096 mutex_exit(&pmap_lock); 2097 2098 /* Catch up on deferred frees. */ 2099 for (; freepv != NULL; freepv = pv) { 2100 pv = freepv->pv_next; 2101 pool_cache_put(&pmap_pv_cache, freepv); 2102 } 2103} 2104 2105/* 2106 * Change the protection on the specified range of this pmap. 2107 */ 2108void 2109pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 2110{ 2111 paddr_t pa; 2112 int64_t data; 2113 struct vm_page *pg; 2114 pv_entry_t pv; 2115 int rv; 2116 2117 KASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK); 2118 KASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata); 2119 2120 if (prot == VM_PROT_NONE) { 2121 pmap_remove(pm, sva, eva); 2122 return; 2123 } 2124 2125 sva = trunc_page(sva); 2126 for (; sva < eva; sva += PAGE_SIZE) { 2127#ifdef DEBUG 2128 /* 2129 * Is this part of the permanent 4MB mapping? 2130 */ 2131 if (pm == pmap_kernel() && sva >= ktext && 2132 sva < roundup(ekdata, 4 * MEG)) { 2133 prom_printf("pmap_protect: va=%08x in locked TLB\n", 2134 sva); 2135 prom_abort(); 2136 return; 2137 } 2138#endif 2139 DPRINTF(PDB_CHANGEPROT, ("pmap_protect: va %p\n", 2140 (void *)(u_long)sva)); 2141 data = pseg_get(pm, sva); 2142 if ((data & TLB_V) == 0) { 2143 continue; 2144 } 2145 2146 pa = data & TLB_PA_MASK; 2147 DPRINTF(PDB_CHANGEPROT|PDB_REF, 2148 ("pmap_protect: va=%08x data=%08llx " 2149 "seg=%08x pte=%08x\n", 2150 (u_int)sva, (long long)pa, (int)va_to_seg(sva), 2151 (int)va_to_pte(sva))); 2152 2153 pg = PHYS_TO_VM_PAGE(pa); 2154 if (pg) { 2155 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2156 2157 /* Save REF/MOD info */ 2158 pv = &md->mdpg_pvh; 2159 if (data & TLB_ACCESS) 2160 pv->pv_va |= PV_REF; 2161 if (data & TLB_MODIFY) 2162 pv->pv_va |= PV_MOD; 2163 } 2164 2165 /* Just do the pmap and TSB, not the pv_list */ 2166 if ((prot & VM_PROT_WRITE) == 0) 2167 data &= ~(TLB_W|TLB_REAL_W); 2168 if ((prot & VM_PROT_EXECUTE) == 0) 2169 data &= ~(TLB_EXEC); 2170 2171 rv = pseg_set(pm, sva, data, 0); 2172 if (rv & 1) 2173 panic("pmap_protect: pseg_set needs spare! rv=%d\n", 2174 rv); 2175 2176 if (pm != pmap_kernel() && !pmap_has_ctx(pm)) 2177 continue; 2178 2179 KASSERT(pmap_ctx(pm)>=0); 2180 tsb_invalidate(sva, pm); 2181 tlb_flush_pte(sva, pm); 2182 } 2183 pv_check(); 2184} 2185 2186/* 2187 * Extract the physical page address associated 2188 * with the given map/virtual_address pair. 2189 */ 2190bool 2191pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap) 2192{ 2193 paddr_t pa; 2194 int64_t data = 0; 2195 2196 if (pm == pmap_kernel() && va >= kdata && va < roundup(ekdata, 4*MEG)) { 2197 /* Need to deal w/locked TLB entry specially. */ 2198 pa = pmap_kextract(va); 2199 DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n", 2200 (u_long)va, (unsigned long long)pa)); 2201 if (pap != NULL) 2202 *pap = pa; 2203 return TRUE; 2204 } else if (pm == pmap_kernel() && va >= ktext && va < ektext) { 2205 /* Need to deal w/locked TLB entry specially. */ 2206 pa = pmap_kextract(va); 2207 DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n", 2208 (u_long)va, (unsigned long long)pa)); 2209 if (pap != NULL) 2210 *pap = pa; 2211 return TRUE; 2212 } else if (pm == pmap_kernel() && va >= INTSTACK && va < (INTSTACK + 64*KB)) { 2213 pa = (paddr_t)(curcpu()->ci_paddr - INTSTACK + va); 2214 DPRINTF(PDB_EXTRACT, ("pmap_extract (intstack): va=%lx pa=%llx\n", 2215 (u_long)va, (unsigned long long)pa)); 2216 if (pap != NULL) 2217 *pap = pa; 2218 return TRUE; 2219 } else { 2220 data = pseg_get(pm, va); 2221 pa = data & TLB_PA_MASK; 2222#ifdef DEBUG 2223 if (pmapdebug & PDB_EXTRACT) { 2224 paddr_t npa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)], 2225 ASI_PHYS_CACHED); 2226 printf("pmap_extract: va=%p segs[%ld]=%llx", 2227 (void *)(u_long)va, (long)va_to_seg(va), 2228 (unsigned long long)npa); 2229 if (npa) { 2230 npa = (paddr_t) 2231 ldxa((vaddr_t)&((paddr_t *)(u_long)npa) 2232 [va_to_dir(va)], 2233 ASI_PHYS_CACHED); 2234 printf(" segs[%ld][%ld]=%lx", 2235 (long)va_to_seg(va), 2236 (long)va_to_dir(va), (long)npa); 2237 } 2238 if (npa) { 2239 npa = (paddr_t) 2240 ldxa((vaddr_t)&((paddr_t *)(u_long)npa) 2241 [va_to_pte(va)], 2242 ASI_PHYS_CACHED); 2243 printf(" segs[%ld][%ld][%ld]=%lx", 2244 (long)va_to_seg(va), 2245 (long)va_to_dir(va), 2246 (long)va_to_pte(va), (long)npa); 2247 } 2248 printf(" pseg_get: %lx\n", (long)pa); 2249 } 2250#endif 2251 } 2252 if ((data & TLB_V) == 0) 2253 return (FALSE); 2254 if (pap != NULL) 2255 *pap = pa + (va & PGOFSET); 2256 return (TRUE); 2257} 2258 2259/* 2260 * Change protection on a kernel address. 2261 * This should only be called from MD code. 2262 */ 2263void 2264pmap_kprotect(vaddr_t va, vm_prot_t prot) 2265{ 2266 struct pmap *pm = pmap_kernel(); 2267 int64_t data; 2268 int rv; 2269 2270 data = pseg_get(pm, va); 2271 KASSERT(data & TLB_V); 2272 if (prot & VM_PROT_WRITE) { 2273 data |= (TLB_W|TLB_REAL_W); 2274 } else { 2275 data &= ~(TLB_W|TLB_REAL_W); 2276 } 2277 rv = pseg_set(pm, va, data, 0); 2278 if (rv & 1) 2279 panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv); 2280 KASSERT(pmap_ctx(pm)>=0); 2281 tsb_invalidate(va, pm); 2282 tlb_flush_pte(va, pm); 2283} 2284 2285/* 2286 * Return the number bytes that pmap_dumpmmu() will dump. 2287 */ 2288int 2289pmap_dumpsize(void) 2290{ 2291 int sz; 2292 2293 sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)); 2294 sz += kernel_tlb_slots * sizeof(struct cpu_kcore_4mbseg); 2295 sz += phys_installed_size * sizeof(phys_ram_seg_t); 2296 2297 return btodb(sz + DEV_BSIZE - 1); 2298} 2299 2300/* 2301 * Write the mmu contents to the dump device. 2302 * This gets appended to the end of a crash dump since 2303 * there is no in-core copy of kernel memory mappings on a 4/4c machine. 2304 * 2305 * Write the core dump headers and MD data to the dump device. 2306 * We dump the following items: 2307 * 2308 * kcore_seg_t MI header defined in <sys/kcore.h>) 2309 * cpu_kcore_hdr_t MD header defined in <machine/kcore.h>) 2310 * phys_ram_seg_t[phys_installed_size] physical memory segments 2311 */ 2312int 2313pmap_dumpmmu(int (*dump)(dev_t, daddr_t, void *, size_t), daddr_t blkno) 2314{ 2315 kcore_seg_t *kseg; 2316 cpu_kcore_hdr_t *kcpu; 2317 phys_ram_seg_t memseg; 2318 struct cpu_kcore_4mbseg ktlb; 2319 int error = 0; 2320 int i; 2321 int buffer[dbtob(1) / sizeof(int)]; 2322 int *bp, *ep; 2323 2324#define EXPEDITE(p,n) do { \ 2325 int *sp = (void *)(p); \ 2326 int sz = (n); \ 2327 while (sz > 0) { \ 2328 *bp++ = *sp++; \ 2329 if (bp >= ep) { \ 2330 error = (*dump)(dumpdev, blkno, \ 2331 (void *)buffer, dbtob(1)); \ 2332 if (error != 0) \ 2333 return (error); \ 2334 ++blkno; \ 2335 bp = buffer; \ 2336 } \ 2337 sz -= 4; \ 2338 } \ 2339} while (0) 2340 2341 /* Setup bookkeeping pointers */ 2342 bp = buffer; 2343 ep = &buffer[sizeof(buffer) / sizeof(buffer[0])]; 2344 2345 /* Fill in MI segment header */ 2346 kseg = (kcore_seg_t *)bp; 2347 CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 2348 kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t)); 2349 2350 /* Fill in MD segment header (interpreted by MD part of libkvm) */ 2351 kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t))); 2352 kcpu->cputype = cputyp; 2353 kcpu->kernbase = (uint64_t)KERNBASE; 2354 kcpu->cpubase = (uint64_t)CPUINFO_VA; 2355 2356 /* Describe the locked text segment */ 2357 kcpu->ktextbase = (uint64_t)ktext; 2358 kcpu->ktextp = (uint64_t)ktextp; 2359 kcpu->ktextsz = (uint64_t)ektext - ktext; 2360 if (kcpu->ktextsz > 4*MEG) 2361 kcpu->ktextsz = 0; /* old version can not work */ 2362 2363 /* Describe locked data segment */ 2364 kcpu->kdatabase = (uint64_t)kdata; 2365 kcpu->kdatap = (uint64_t)kdatap; 2366 kcpu->kdatasz = (uint64_t)ekdatap - kdatap; 2367 2368 /* new version of locked segments description */ 2369 kcpu->newmagic = SPARC64_KCORE_NEWMAGIC; 2370 kcpu->num4mbsegs = kernel_tlb_slots; 2371 kcpu->off4mbsegs = ALIGN(sizeof(cpu_kcore_hdr_t)); 2372 2373 /* description of per-cpu mappings */ 2374 kcpu->numcpuinfos = sparc_ncpus; 2375 kcpu->percpusz = 64 * 1024; /* used to be 128k for some time */ 2376 kcpu->thiscpu = cpu_number(); /* which cpu is doing this dump */ 2377 kcpu->cpusp = cpu0paddr - 64 * 1024 * sparc_ncpus; 2378 2379 /* Now the memsegs */ 2380 kcpu->nmemseg = phys_installed_size; 2381 kcpu->memsegoffset = kcpu->off4mbsegs 2382 + kernel_tlb_slots * sizeof(struct cpu_kcore_4mbseg); 2383 2384 /* Now we need to point this at our kernel pmap. */ 2385 kcpu->nsegmap = STSZ; 2386 kcpu->segmapoffset = (uint64_t)pmap_kernel()->pm_physaddr; 2387 2388 /* Note: we have assumed everything fits in buffer[] so far... */ 2389 bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t))); 2390 2391 /* write locked kernel 4MB TLBs */ 2392 for (i = 0; i < kernel_tlb_slots; i++) { 2393 ktlb.va = kernel_tlbs[i].te_va; 2394 ktlb.pa = kernel_tlbs[i].te_pa; 2395 EXPEDITE(&ktlb, sizeof(ktlb)); 2396 } 2397 2398 /* write memsegs */ 2399 for (i = 0; i < phys_installed_size; i++) { 2400 memseg.start = phys_installed[i].start; 2401 memseg.size = phys_installed[i].size; 2402 EXPEDITE(&memseg, sizeof(phys_ram_seg_t)); 2403 } 2404 2405 if (bp != buffer) 2406 error = (*dump)(dumpdev, blkno++, (void *)buffer, dbtob(1)); 2407 2408 return (error); 2409} 2410 2411/* 2412 * Determine (non)existence of physical page 2413 */ 2414int 2415pmap_pa_exists(paddr_t pa) 2416{ 2417 int i; 2418 2419 /* Just go through physical memory list & see if we're there */ 2420 for (i = 0; i < phys_installed_size; i++) { 2421 if ((phys_installed[i].start <= pa) && 2422 (phys_installed[i].start + 2423 phys_installed[i].size >= pa)) 2424 return 1; 2425 } 2426 return 0; 2427} 2428 2429/* 2430 * Lookup the appropriate TSB entry. 2431 * 2432 * Here is the full official pseudo code: 2433 * 2434 */ 2435 2436#ifdef NOTYET 2437int64 GenerateTSBPointer( 2438 int64 va, /* Missing VA */ 2439 PointerType type, /* 8K_POINTER or 16K_POINTER */ 2440 int64 TSBBase, /* TSB Register[63:13] << 13 */ 2441 Boolean split, /* TSB Register[12] */ 2442 int TSBSize) /* TSB Register[2:0] */ 2443{ 2444 int64 vaPortion; 2445 int64 TSBBaseMask; 2446 int64 splitMask; 2447 2448 /* TSBBaseMask marks the bits from TSB Base Reg */ 2449 TSBBaseMask = 0xffffffffffffe000 << 2450 (split? (TSBsize + 1) : TSBsize); 2451 2452 /* Shift va towards lsb appropriately and */ 2453 /* zero out the original va page offset */ 2454 vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) & 2455 0xfffffffffffffff0; 2456 2457 if (split) { 2458 /* There's only one bit in question for split */ 2459 splitMask = 1 << (13 + TSBsize); 2460 if (type == 8K_POINTER) 2461 /* Make sure we're in the lower half */ 2462 vaPortion &= ~splitMask; 2463 else 2464 /* Make sure we're in the upper half */ 2465 vaPortion |= splitMask; 2466 } 2467 return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask); 2468} 2469#endif 2470/* 2471 * Of course, since we are not using a split TSB or variable page sizes, 2472 * we can optimize this a bit. 2473 * 2474 * The following only works for a unified 8K TSB. It will find the slot 2475 * for that particular va and return it. IT MAY BE FOR ANOTHER MAPPING! 2476 */ 2477int 2478ptelookup_va(vaddr_t va) 2479{ 2480 long tsbptr; 2481#define TSBBASEMASK (0xffffffffffffe000LL << tsbsize) 2482 2483 tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK); 2484 return (tsbptr / sizeof(pte_t)); 2485} 2486 2487/* 2488 * Do whatever is needed to sync the MOD/REF flags 2489 */ 2490 2491bool 2492pmap_clear_modify(struct vm_page *pg) 2493{ 2494 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2495 pv_entry_t pv; 2496 int rv; 2497 int changed = 0; 2498#ifdef DEBUG 2499 int modified = 0; 2500 2501 DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify(%p)\n", pg)); 2502 2503 modified = pmap_is_modified(pg); 2504#endif 2505 mutex_enter(&pmap_lock); 2506 /* Clear all mappings */ 2507 pv = &md->mdpg_pvh; 2508#ifdef DEBUG 2509 if (pv->pv_va & PV_MOD) 2510 pv->pv_va |= PV_WE; /* Remember this was modified */ 2511#endif 2512 if (pv->pv_va & PV_MOD) { 2513 changed |= 1; 2514 pv->pv_va &= ~PV_MOD; 2515 } 2516#ifdef DEBUG 2517 if (pv->pv_next && !pv->pv_pmap) { 2518 printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv); 2519 Debugger(); 2520 } 2521#endif 2522 if (pv->pv_pmap != NULL) { 2523 for (; pv; pv = pv->pv_next) { 2524 int64_t data; 2525 struct pmap *pmap = pv->pv_pmap; 2526 vaddr_t va = pv->pv_va & PV_VAMASK; 2527 2528 /* First clear the mod bit in the PTE and make it R/O */ 2529 data = pseg_get(pmap, va); 2530 KASSERT(data & TLB_V); 2531 /* Need to both clear the modify and write bits */ 2532 if (data & TLB_MODIFY) 2533 changed |= 1; 2534#ifdef HWREF 2535 data &= ~(TLB_MODIFY|TLB_W); 2536#else 2537 data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W); 2538#endif 2539 rv = pseg_set(pmap, va, data, 0); 2540 if (rv & 1) 2541 printf("pmap_clear_modify: pseg_set needs" 2542 " spare! rv=%d\n", rv); 2543 if (pmap_is_on_mmu(pmap)) { 2544 KASSERT(pmap_ctx(pmap)>=0); 2545 tsb_invalidate(va, pmap); 2546 tlb_flush_pte(va, pmap); 2547 } 2548 /* Then clear the mod bit in the pv */ 2549 if (pv->pv_va & PV_MOD) { 2550 changed |= 1; 2551 pv->pv_va &= ~PV_MOD; 2552 } 2553 } 2554 } 2555 pv_check(); 2556 mutex_exit(&pmap_lock); 2557#ifdef DEBUG 2558 if (pmap_is_modified(pg)) { 2559 printf("pmap_clear_modify(): %p still modified!\n", pg); 2560 Debugger(); 2561 } 2562 DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify: pg %p %s\n", pg, 2563 (changed ? "was modified" : "was not modified"))); 2564 if (modified != changed) { 2565 printf("pmap_clear_modify: modified %d changed %d\n", 2566 modified, changed); 2567 Debugger(); 2568 } else return (modified); 2569#endif 2570 return (changed); 2571} 2572 2573bool 2574pmap_clear_reference(struct vm_page *pg) 2575{ 2576 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2577 pv_entry_t pv; 2578 int rv; 2579 int changed = 0; 2580#ifdef DEBUG 2581 int referenced = 0; 2582#endif 2583 2584 mutex_enter(&pmap_lock); 2585#ifdef DEBUG 2586 DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg)); 2587 referenced = pmap_is_referenced_locked(pg); 2588#endif 2589 /* Clear all references */ 2590 pv = &md->mdpg_pvh; 2591 if (pv->pv_va & PV_REF) { 2592 changed |= 1; 2593 pv->pv_va &= ~PV_REF; 2594 } 2595#ifdef DEBUG 2596 if (pv->pv_next && !pv->pv_pmap) { 2597 printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv); 2598 Debugger(); 2599 } 2600#endif 2601 if (pv->pv_pmap != NULL) { 2602 for (; pv; pv = pv->pv_next) { 2603 int64_t data; 2604 struct pmap *pmap = pv->pv_pmap; 2605 vaddr_t va = pv->pv_va & PV_VAMASK; 2606 2607 data = pseg_get(pmap, va); 2608 KASSERT(data & TLB_V); 2609 DPRINTF(PDB_CHANGEPROT, 2610 ("clearing ref pm:%p va:%p ctx:%lx data:%llx\n", 2611 pmap, (void *)(u_long)va, 2612 (u_long)pmap_ctx(pmap), 2613 (long long)data)); 2614#ifdef HWREF 2615 if (data & TLB_ACCESS) { 2616 changed |= 1; 2617 data &= ~TLB_ACCESS; 2618 } 2619#else 2620 if (data < 0) 2621 changed |= 1; 2622 data = 0; 2623#endif 2624 rv = pseg_set(pmap, va, data, 0); 2625 if (rv & 1) 2626 panic("pmap_clear_reference: pseg_set needs" 2627 " spare! rv=%d\n", rv); 2628 if (pmap_is_on_mmu(pmap)) { 2629 KASSERT(pmap_ctx(pmap)>=0); 2630 tsb_invalidate(va, pmap); 2631 tlb_flush_pte(va, pmap); 2632 } 2633 if (pv->pv_va & PV_REF) { 2634 changed |= 1; 2635 pv->pv_va &= ~PV_REF; 2636 } 2637 } 2638 } 2639 dcache_flush_page_all(VM_PAGE_TO_PHYS(pg)); 2640 pv_check(); 2641#ifdef DEBUG 2642 if (pmap_is_referenced_locked(pg)) { 2643 pv = &md->mdpg_pvh; 2644 printf("pmap_clear_reference(): %p still referenced " 2645 "(pmap = %p, ctx = %d)\n", pg, pv->pv_pmap, 2646 pv->pv_pmap ? pmap_ctx(pv->pv_pmap) : 0); 2647 Debugger(); 2648 } 2649 DPRINTF(PDB_CHANGEPROT|PDB_REF, 2650 ("pmap_clear_reference: pg %p %s\n", pg, 2651 (changed ? "was referenced" : "was not referenced"))); 2652 if (referenced != changed) { 2653 printf("pmap_clear_reference: referenced %d changed %d\n", 2654 referenced, changed); 2655 Debugger(); 2656 } else { 2657 mutex_exit(&pmap_lock); 2658 return (referenced); 2659 } 2660#endif 2661 mutex_exit(&pmap_lock); 2662 return (changed); 2663} 2664 2665bool 2666pmap_is_modified(struct vm_page *pg) 2667{ 2668 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2669 pv_entry_t pv, npv; 2670 bool res = false; 2671 2672 /* Check if any mapping has been modified */ 2673 pv = &md->mdpg_pvh; 2674 if (pv->pv_va & PV_MOD) 2675 res = true; 2676#ifdef HWREF 2677#ifdef DEBUG 2678 if (pv->pv_next && !pv->pv_pmap) { 2679 printf("pmap_is_modified: npv but no pmap for pv %p\n", pv); 2680 Debugger(); 2681 } 2682#endif 2683 if (!res && pv->pv_pmap != NULL) { 2684 mutex_enter(&pmap_lock); 2685 for (npv = pv; !res && npv && npv->pv_pmap; 2686 npv = npv->pv_next) { 2687 int64_t data; 2688 2689 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2690 KASSERT(data & TLB_V); 2691 if (data & TLB_MODIFY) 2692 res = true; 2693 2694 /* Migrate modify info to head pv */ 2695 if (npv->pv_va & PV_MOD) { 2696 res = true; 2697 npv->pv_va &= ~PV_MOD; 2698 } 2699 } 2700 /* Save modify info */ 2701 if (res) 2702 pv->pv_va |= PV_MOD; 2703#ifdef DEBUG 2704 if (res) 2705 pv->pv_va |= PV_WE; 2706#endif 2707 mutex_exit(&pmap_lock); 2708 } 2709#endif 2710 2711 DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_is_modified(%p) = %d\n", pg, 2712 res)); 2713 pv_check(); 2714 return res; 2715} 2716 2717/* 2718 * Variant of pmap_is_reference() where caller already holds pmap_lock 2719 */ 2720static bool 2721pmap_is_referenced_locked(struct vm_page *pg) 2722{ 2723 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2724 pv_entry_t pv, npv; 2725 bool res = false; 2726 2727 KASSERT(mutex_owned(&pmap_lock)); 2728 2729 /* Check if any mapping has been referenced */ 2730 pv = &md->mdpg_pvh; 2731 if (pv->pv_va & PV_REF) 2732 return true; 2733 2734#ifdef HWREF 2735#ifdef DEBUG 2736 if (pv->pv_next && !pv->pv_pmap) { 2737 printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv); 2738 Debugger(); 2739 } 2740#endif 2741 if (pv->pv_pmap == NULL) 2742 return false; 2743 2744 for (npv = pv; npv; npv = npv->pv_next) { 2745 int64_t data; 2746 2747 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2748 KASSERT(data & TLB_V); 2749 if (data & TLB_ACCESS) 2750 res = true; 2751 2752 /* Migrate ref info to head pv */ 2753 if (npv->pv_va & PV_REF) { 2754 res = true; 2755 npv->pv_va &= ~PV_REF; 2756 } 2757 } 2758 /* Save ref info */ 2759 if (res) 2760 pv->pv_va |= PV_REF; 2761#endif 2762 2763 DPRINTF(PDB_CHANGEPROT|PDB_REF, 2764 ("pmap_is_referenced(%p) = %d\n", pg, res)); 2765 pv_check(); 2766 return res; 2767} 2768 2769bool 2770pmap_is_referenced(struct vm_page *pg) 2771{ 2772 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2773 pv_entry_t pv; 2774 bool res = false; 2775 2776 /* Check if any mapping has been referenced */ 2777 pv = &md->mdpg_pvh; 2778 if (pv->pv_va & PV_REF) 2779 return true; 2780 2781#ifdef HWREF 2782#ifdef DEBUG 2783 if (pv->pv_next && !pv->pv_pmap) { 2784 printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv); 2785 Debugger(); 2786 } 2787#endif 2788 if (pv->pv_pmap != NULL) { 2789 mutex_enter(&pmap_lock); 2790 res = pmap_is_referenced_locked(pg); 2791 mutex_exit(&pmap_lock); 2792 } 2793#endif 2794 2795 DPRINTF(PDB_CHANGEPROT|PDB_REF, 2796 ("pmap_is_referenced(%p) = %d\n", pg, res)); 2797 pv_check(); 2798 return res; 2799} 2800 2801 2802 2803/* 2804 * Routine: pmap_unwire 2805 * Function: Clear the wired attribute for a map/virtual-address 2806 * pair. 2807 * In/out conditions: 2808 * The mapping must already exist in the pmap. 2809 */ 2810void 2811pmap_unwire(pmap_t pmap, vaddr_t va) 2812{ 2813 int64_t data; 2814 int rv; 2815 2816 DPRINTF(PDB_MMU_STEAL, ("pmap_unwire(%p, %lx)\n", pmap, va)); 2817 2818#ifdef DEBUG 2819 /* 2820 * Is this part of the permanent 4MB mapping? 2821 */ 2822 if (pmap == pmap_kernel() && va >= ktext && 2823 va < roundup(ekdata, 4*MEG)) { 2824 prom_printf("pmap_unwire: va=%08x in locked TLB\n", va); 2825 prom_abort(); 2826 return; 2827 } 2828#endif 2829 data = pseg_get(pmap, va & PV_VAMASK); 2830 KASSERT(data & TLB_V); 2831 data &= ~TLB_TSB_LOCK; 2832 rv = pseg_set(pmap, va & PV_VAMASK, data, 0); 2833 if (rv & 1) 2834 panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv); 2835 pv_check(); 2836} 2837 2838/* 2839 * Lower the protection on the specified physical page. 2840 * 2841 * Never enable writing as it will break COW 2842 */ 2843 2844void 2845pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2846{ 2847 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2848 int64_t clear, set; 2849 int64_t data = 0; 2850 int rv; 2851 pv_entry_t pv, npv, freepv = NULL; 2852 struct pmap *pmap; 2853 vaddr_t va; 2854 bool needflush = FALSE; 2855 2856 DPRINTF(PDB_CHANGEPROT, 2857 ("pmap_page_protect: pg %p prot %x\n", pg, prot)); 2858 2859 mutex_enter(&pmap_lock); 2860 pv = &md->mdpg_pvh; 2861 if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { 2862 /* copy_on_write */ 2863 2864 set = TLB_V; 2865 clear = TLB_REAL_W|TLB_W; 2866 if (VM_PROT_EXECUTE & prot) 2867 set |= TLB_EXEC; 2868 else 2869 clear |= TLB_EXEC; 2870 if (VM_PROT_EXECUTE == prot) 2871 set |= TLB_EXEC_ONLY; 2872 2873#ifdef DEBUG 2874 if (pv->pv_next && !pv->pv_pmap) { 2875 printf("pmap_page_protect: no pmap for pv %p\n", pv); 2876 Debugger(); 2877 } 2878#endif 2879 if (pv->pv_pmap != NULL) { 2880 for (; pv; pv = pv->pv_next) { 2881 pmap = pv->pv_pmap; 2882 va = pv->pv_va & PV_VAMASK; 2883 2884 DPRINTF(PDB_CHANGEPROT | PDB_REF, 2885 ("pmap_page_protect: " 2886 "RO va %p of pg %p...\n", 2887 (void *)(u_long)pv->pv_va, pg)); 2888 data = pseg_get(pmap, va); 2889 KASSERT(data & TLB_V); 2890 2891 /* Save REF/MOD info */ 2892 if (data & TLB_ACCESS) 2893 pv->pv_va |= PV_REF; 2894 if (data & TLB_MODIFY) 2895 pv->pv_va |= PV_MOD; 2896 2897 data &= ~clear; 2898 data |= set; 2899 rv = pseg_set(pmap, va, data, 0); 2900 if (rv & 1) 2901 panic("pmap_page_protect: " 2902 "pseg_set needs spare! rv=%d\n", 2903 rv); 2904 if (pmap_is_on_mmu(pmap)) { 2905 KASSERT(pmap_ctx(pmap)>=0); 2906 tsb_invalidate(va, pmap); 2907 tlb_flush_pte(va, pmap); 2908 } 2909 } 2910 } 2911 } else { 2912 /* remove mappings */ 2913 DPRINTF(PDB_REMOVE, 2914 ("pmap_page_protect: demapping pg %p\n", pg)); 2915 2916 /* First remove the entire list of continuation pv's */ 2917 for (npv = pv->pv_next; npv; npv = pv->pv_next) { 2918 pmap = npv->pv_pmap; 2919 va = npv->pv_va & PV_VAMASK; 2920 2921 /* We're removing npv from pv->pv_next */ 2922 DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE, 2923 ("pmap_page_protect: " 2924 "demap va %p of pg %p in pmap %p...\n", 2925 (void *)(u_long)va, pg, pmap)); 2926 2927 /* clear the entry in the page table */ 2928 data = pseg_get(pmap, va); 2929 KASSERT(data & TLB_V); 2930 2931 /* Save ref/mod info */ 2932 if (data & TLB_ACCESS) 2933 pv->pv_va |= PV_REF; 2934 if (data & TLB_MODIFY) 2935 pv->pv_va |= PV_MOD; 2936 /* Clear mapping */ 2937 rv = pseg_set(pmap, va, 0, 0); 2938 if (rv & 1) 2939 panic("pmap_page_protect: pseg_set needs" 2940 " spare! rv=%d\n", rv); 2941 if (pmap_is_on_mmu(pmap)) { 2942 KASSERT(pmap_ctx(pmap)>=0); 2943 tsb_invalidate(va, pmap); 2944 tlb_flush_pte(va, pmap); 2945 } 2946 if (pmap->pm_refs > 0) { 2947 needflush = TRUE; 2948 } 2949 2950 /* free the pv */ 2951 pv->pv_next = npv->pv_next; 2952 npv->pv_next = freepv; 2953 freepv = npv; 2954 } 2955 2956 /* Then remove the primary pv */ 2957#ifdef DEBUG 2958 if (pv->pv_next && !pv->pv_pmap) { 2959 printf("pmap_page_protect: no pmap for pv %p\n", pv); 2960 Debugger(); 2961 } 2962#endif 2963 if (pv->pv_pmap != NULL) { 2964 pmap = pv->pv_pmap; 2965 va = pv->pv_va & PV_VAMASK; 2966 2967 DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE, 2968 ("pmap_page_protect: " 2969 "demap va %p of pg %p from pm %p...\n", 2970 (void *)(u_long)va, pg, pmap)); 2971 2972 data = pseg_get(pmap, va); 2973 KASSERT(data & TLB_V); 2974 /* Save ref/mod info */ 2975 if (data & TLB_ACCESS) 2976 pv->pv_va |= PV_REF; 2977 if (data & TLB_MODIFY) 2978 pv->pv_va |= PV_MOD; 2979 rv = pseg_set(pmap, va, 0, 0); 2980 if (rv & 1) 2981 panic("pmap_page_protect: pseg_set needs" 2982 " spare! rv=%d\n", rv); 2983 if (pmap_is_on_mmu(pmap)) { 2984 KASSERT(pmap_ctx(pmap)>=0); 2985 tsb_invalidate(va, pmap); 2986 tlb_flush_pte(va, pmap); 2987 } 2988 if (pmap->pm_refs > 0) { 2989 needflush = TRUE; 2990 } 2991 npv = pv->pv_next; 2992 /* dump the first pv */ 2993 if (npv) { 2994 /* First save mod/ref bits */ 2995 pv->pv_pmap = npv->pv_pmap; 2996 pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va; 2997 pv->pv_next = npv->pv_next; 2998 npv->pv_next = freepv; 2999 freepv = npv; 3000 } else { 3001 pv->pv_pmap = NULL; 3002 pv->pv_next = NULL; 3003 } 3004 } 3005 if (needflush) 3006 dcache_flush_page_all(VM_PAGE_TO_PHYS(pg)); 3007 } 3008 /* We should really only flush the pages we demapped. */ 3009 pv_check(); 3010 mutex_exit(&pmap_lock); 3011 3012 /* Catch up on deferred frees. */ 3013 for (; freepv != NULL; freepv = npv) { 3014 npv = freepv->pv_next; 3015 pool_cache_put(&pmap_pv_cache, freepv); 3016 } 3017} 3018 3019#ifdef PMAP_COUNT_DEBUG 3020/* 3021 * count pages in pmap -- this can be slow. 3022 */ 3023int 3024pmap_count_res(struct pmap *pm) 3025{ 3026 int64_t data; 3027 paddr_t *pdir, *ptbl; 3028 int i, j, k, n; 3029 3030 /* Don't want one of these pages reused while we're reading it. */ 3031 mutex_enter(&pmap_lock); 3032 n = 0; 3033 for (i = 0; i < STSZ; i++) { 3034 pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], 3035 ASI_PHYS_CACHED); 3036 if (pdir == NULL) { 3037 continue; 3038 } 3039 for (k = 0; k < PDSZ; k++) { 3040 ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], 3041 ASI_PHYS_CACHED); 3042 if (ptbl == NULL) { 3043 continue; 3044 } 3045 for (j = 0; j < PTSZ; j++) { 3046 data = (int64_t)ldxa((vaddr_t)&ptbl[j], 3047 ASI_PHYS_CACHED); 3048 if (data & TLB_V) 3049 n++; 3050 } 3051 } 3052 } 3053 mutex_exit(&pmap_lock); 3054 3055 if (pm->pm_stats.resident_count != n) 3056 printf("pmap_count_resident: pm_stats = %ld, counted: %d\n", 3057 pm->pm_stats.resident_count, n); 3058 3059 return n; 3060} 3061 3062/* 3063 * count wired pages in pmap -- this can be slow. 3064 */ 3065int 3066pmap_count_wired(struct pmap *pm) 3067{ 3068 int64_t data; 3069 paddr_t *pdir, *ptbl; 3070 int i, j, k, n; 3071 3072 /* Don't want one of these pages reused while we're reading it. */ 3073 mutex_enter(&pmap_lock); /* XXX uvmplock */ 3074 n = 0; 3075 for (i = 0; i < STSZ; i++) { 3076 pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], 3077 ASI_PHYS_CACHED); 3078 if (pdir == NULL) { 3079 continue; 3080 } 3081 for (k = 0; k < PDSZ; k++) { 3082 ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], 3083 ASI_PHYS_CACHED); 3084 if (ptbl == NULL) { 3085 continue; 3086 } 3087 for (j = 0; j < PTSZ; j++) { 3088 data = (int64_t)ldxa((vaddr_t)&ptbl[j], 3089 ASI_PHYS_CACHED); 3090 if (data & TLB_TSB_LOCK) 3091 n++; 3092 } 3093 } 3094 } 3095 mutex_exit(&pmap_lock); /* XXX uvmplock */ 3096 3097 if (pm->pm_stats.wired_count != n) 3098 printf("pmap_count_wired: pm_stats = %ld, counted: %d\n", 3099 pm->pm_stats.wired_count, n); 3100 3101 return n; 3102} 3103#endif /* PMAP_COUNT_DEBUG */ 3104 3105void 3106pmap_procwr(struct proc *p, vaddr_t va, size_t len) 3107{ 3108 3109 blast_icache(); 3110} 3111 3112/* 3113 * Allocate a hardware context to the given pmap. 3114 */ 3115static int 3116ctx_alloc(struct pmap *pm) 3117{ 3118 int i, ctx; 3119 3120 KASSERT(pm != pmap_kernel()); 3121 KASSERT(pm == curproc->p_vmspace->vm_map.pmap); 3122 mutex_enter(&curcpu()->ci_ctx_lock); 3123 ctx = curcpu()->ci_pmap_next_ctx++; 3124 3125 /* 3126 * if we have run out of contexts, remove all user entries from 3127 * the TSB, TLB and dcache and start over with context 1 again. 3128 */ 3129 3130 if (ctx == curcpu()->ci_numctx) { 3131 DPRINTF(PDB_CTX_ALLOC|PDB_CTX_FLUSHALL, 3132 ("ctx_alloc: cpu%d run out of contexts %d\n", 3133 cpu_number(), curcpu()->ci_numctx)); 3134 write_user_windows(); 3135 while (!LIST_EMPTY(&curcpu()->ci_pmap_ctxlist)) { 3136#ifdef MULTIPROCESSOR 3137 KASSERT(pmap_ctx(LIST_FIRST(&curcpu()->ci_pmap_ctxlist)) != 0); 3138#endif 3139 ctx_free(LIST_FIRST(&curcpu()->ci_pmap_ctxlist), 3140 curcpu()); 3141 } 3142 for (i = TSBENTS - 1; i >= 0; i--) { 3143 if (TSB_TAG_CTX(curcpu()->ci_tsb_dmmu[i].tag) != 0) { 3144 clrx(&curcpu()->ci_tsb_dmmu[i].data); 3145 } 3146 if (TSB_TAG_CTX(curcpu()->ci_tsb_immu[i].tag) != 0) { 3147 clrx(&curcpu()->ci_tsb_immu[i].data); 3148 } 3149 } 3150 sp_tlb_flush_all(); 3151 ctx = 1; 3152 curcpu()->ci_pmap_next_ctx = 2; 3153 } 3154 curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr; 3155 LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]); 3156 pmap_ctx(pm) = ctx; 3157 mutex_exit(&curcpu()->ci_ctx_lock); 3158 DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n", 3159 cpu_number(), ctx)); 3160 return ctx; 3161} 3162 3163/* 3164 * Give away a context. 3165 */ 3166static void 3167ctx_free(struct pmap *pm, struct cpu_info *ci) 3168{ 3169 int oldctx; 3170 int cpunum; 3171 3172 KASSERT(mutex_owned(&ci->ci_ctx_lock)); 3173 3174#ifdef MULTIPROCESSOR 3175 cpunum = ci->ci_index; 3176#else 3177 /* Give the compiler a hint.. */ 3178 cpunum = 0; 3179#endif 3180 3181 oldctx = pm->pm_ctx[cpunum]; 3182 if (oldctx == 0) 3183 return; 3184 3185#ifdef DIAGNOSTIC 3186 if (pm == pmap_kernel()) 3187 panic("ctx_free: freeing kernel context"); 3188 if (ci->ci_ctxbusy[oldctx] == 0) 3189 printf("ctx_free: freeing free context %d\n", oldctx); 3190 if (ci->ci_ctxbusy[oldctx] != pm->pm_physaddr) { 3191 printf("ctx_free: freeing someone else's context\n " 3192 "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n", 3193 oldctx, (void *)(u_long)ci->ci_ctxbusy[oldctx], pm, 3194 (void *)(u_long)pm->pm_physaddr); 3195 Debugger(); 3196 } 3197#endif 3198 /* We should verify it has not been stolen and reallocated... */ 3199 DPRINTF(PDB_CTX_ALLOC, ("ctx_free: cpu%d freeing ctx %d\n", 3200 cpu_number(), oldctx)); 3201 ci->ci_ctxbusy[oldctx] = 0UL; 3202 pm->pm_ctx[cpunum] = 0; 3203 LIST_REMOVE(pm, pm_list[cpunum]); 3204} 3205 3206/* 3207 * Enter the pmap and virtual address into the 3208 * physical to virtual map table. 3209 * 3210 * We enter here with the pmap locked. 3211 */ 3212 3213void 3214pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg, 3215 pv_entry_t npv) 3216{ 3217 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3218 pv_entry_t pvh; 3219 3220 KASSERT(mutex_owned(&pmap_lock)); 3221 3222 pvh = &md->mdpg_pvh; 3223 DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n", 3224 pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next)); 3225 if (pvh->pv_pmap == NULL) { 3226 3227 /* 3228 * No entries yet, use header as the first entry 3229 */ 3230 DPRINTF(PDB_ENTER, ("pmap_enter: first pv: pmap %p va %lx\n", 3231 pmap, va)); 3232 ENTER_STAT(firstpv); 3233 PV_SETVA(pvh, va); 3234 pvh->pv_pmap = pmap; 3235 pvh->pv_next = NULL; 3236 KASSERT(npv == NULL); 3237 } else { 3238 if (pg->loan_count == 0 && !(pvh->pv_va & PV_ALIAS)) { 3239 3240 /* 3241 * There is at least one other VA mapping this page. 3242 * Check if they are cache index compatible. If not 3243 * remove all mappings, flush the cache and set page 3244 * to be mapped uncached. Caching will be restored 3245 * when pages are mapped compatible again. 3246 */ 3247 if ((pvh->pv_va ^ va) & VA_ALIAS_MASK) { 3248 pvh->pv_va |= PV_ALIAS; 3249 pmap_page_cache(pmap, pa, 0); 3250 ENTER_STAT(ci); 3251 } 3252 } 3253 3254 /* 3255 * There is at least one other VA mapping this page. 3256 * Place this entry after the header. 3257 */ 3258 3259 DPRINTF(PDB_ENTER, ("pmap_enter: new pv: pmap %p va %lx\n", 3260 pmap, va)); 3261 npv->pv_pmap = pmap; 3262 npv->pv_va = va & PV_VAMASK; 3263 npv->pv_next = pvh->pv_next; 3264 pvh->pv_next = npv; 3265 3266 if (!npv->pv_next) { 3267 ENTER_STAT(secondpv); 3268 } 3269 } 3270} 3271 3272/* 3273 * Remove a physical to virtual address translation. 3274 */ 3275 3276pv_entry_t 3277pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg) 3278{ 3279 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3280 pv_entry_t pvh, npv, pv; 3281 int64_t data = 0; 3282 3283 KASSERT(mutex_owned(&pmap_lock)); 3284 3285 pvh = &md->mdpg_pvh; 3286 3287 DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap, 3288 (void *)(u_long)va, pg)); 3289 pv_check(); 3290 3291 /* 3292 * Remove page from the PV table. 3293 * If it is the first entry on the list, it is actually 3294 * in the header and we must copy the following entry up 3295 * to the header. Otherwise we must search the list for 3296 * the entry. In either case we free the now unused entry. 3297 */ 3298 if (pmap == pvh->pv_pmap && PV_MATCH(pvh, va)) { 3299 data = pseg_get(pvh->pv_pmap, pvh->pv_va & PV_VAMASK); 3300 KASSERT(data & TLB_V); 3301 npv = pvh->pv_next; 3302 if (npv) { 3303 /* First save mod/ref bits */ 3304 pvh->pv_va = (pvh->pv_va & PV_MASK) | npv->pv_va; 3305 pvh->pv_next = npv->pv_next; 3306 pvh->pv_pmap = npv->pv_pmap; 3307 } else { 3308 pvh->pv_pmap = NULL; 3309 pvh->pv_next = NULL; 3310 pvh->pv_va &= (PV_REF|PV_MOD); 3311 } 3312 REMOVE_STAT(pvfirst); 3313 } else { 3314 for (pv = pvh, npv = pvh->pv_next; npv; 3315 pv = npv, npv = npv->pv_next) { 3316 REMOVE_STAT(pvsearch); 3317 if (pmap == npv->pv_pmap && PV_MATCH(npv, va)) 3318 break; 3319 } 3320 pv->pv_next = npv->pv_next; 3321 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 3322 KASSERT(data & TLB_V); 3323 } 3324 3325 /* Save ref/mod info */ 3326 if (data & TLB_ACCESS) 3327 pvh->pv_va |= PV_REF; 3328 if (data & TLB_MODIFY) 3329 pvh->pv_va |= PV_MOD; 3330 3331 /* Check to see if the alias went away */ 3332 if (pvh->pv_va & PV_ALIAS) { 3333 pvh->pv_va &= ~PV_ALIAS; 3334 for (pv = pvh; pv; pv = pv->pv_next) { 3335 if ((pv->pv_va ^ pvh->pv_va) & VA_ALIAS_MASK) { 3336 pvh->pv_va |= PV_ALIAS; 3337 break; 3338 } 3339 } 3340 if (!(pvh->pv_va & PV_ALIAS)) 3341 pmap_page_cache(pmap, VM_PAGE_TO_PHYS(pg), 1); 3342 } 3343 pv_check(); 3344 return npv; 3345} 3346 3347/* 3348 * pmap_page_cache: 3349 * 3350 * Change all mappings of a page to cached/uncached. 3351 */ 3352void 3353pmap_page_cache(struct pmap *pm, paddr_t pa, int mode) 3354{ 3355 struct vm_page *pg; 3356 struct vm_page_md *md; 3357 pv_entry_t pv; 3358 vaddr_t va; 3359 int rv; 3360 3361#if 0 3362 /* 3363 * Why is this? 3364 */ 3365 if (CPU_ISSUN4US || CPU_ISSUN4V) 3366 return; 3367#endif 3368 3369 KASSERT(mutex_owned(&pmap_lock)); 3370 3371 DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n", 3372 (unsigned long long)pa)); 3373 pg = PHYS_TO_VM_PAGE(pa); 3374 md = VM_PAGE_TO_MD(pg); 3375 pv = &md->mdpg_pvh; 3376 while (pv) { 3377 va = pv->pv_va & PV_VAMASK; 3378 if (pv->pv_va & PV_NC) { 3379 int64_t data; 3380 3381 /* Non-cached -- I/O mapping */ 3382 data = pseg_get(pv->pv_pmap, va); 3383 KASSERT(data & TLB_V); 3384 rv = pseg_set(pv->pv_pmap, va, 3385 data & ~(TLB_CV|TLB_CP), 0); 3386 if (rv & 1) 3387 panic("pmap_page_cache: pseg_set needs" 3388 " spare! rv=%d\n", rv); 3389 } else if (mode && (!(pv->pv_va & PV_NVC))) { 3390 int64_t data; 3391 3392 /* Enable caching */ 3393 data = pseg_get(pv->pv_pmap, va); 3394 KASSERT(data & TLB_V); 3395 rv = pseg_set(pv->pv_pmap, va, data | TLB_CV, 0); 3396 if (rv & 1) 3397 panic("pmap_page_cache: pseg_set needs" 3398 " spare! rv=%d\n", rv); 3399 } else { 3400 int64_t data; 3401 3402 /* Disable caching */ 3403 data = pseg_get(pv->pv_pmap, va); 3404 KASSERT(data & TLB_V); 3405 rv = pseg_set(pv->pv_pmap, va, data & ~TLB_CV, 0); 3406 if (rv & 1) 3407 panic("pmap_page_cache: pseg_set needs" 3408 " spare! rv=%d\n", rv); 3409 } 3410 if (pmap_is_on_mmu(pv->pv_pmap)) { 3411 /* Force reload -- cache bits have changed */ 3412 KASSERT(pmap_ctx(pv->pv_pmap)>=0); 3413 tsb_invalidate(va, pv->pv_pmap); 3414 tlb_flush_pte(va, pv->pv_pmap); 3415 } 3416 pv = pv->pv_next; 3417 } 3418} 3419 3420/* 3421 * Some routines to allocate and free PTPs. 3422 */ 3423static int 3424pmap_get_page(paddr_t *p) 3425{ 3426 struct vm_page *pg; 3427 paddr_t pa; 3428 3429 if (uvm.page_init_done) { 3430 pg = uvm_pagealloc(NULL, 0, NULL, 3431 UVM_PGA_ZERO | UVM_PGA_USERESERVE); 3432 if (pg == NULL) 3433 return (0); 3434 pa = VM_PAGE_TO_PHYS(pg); 3435 } else { 3436 if (!uvm_page_physget(&pa)) 3437 return (0); 3438 pmap_zero_page(pa); 3439 } 3440 *p = pa; 3441 return (1); 3442} 3443 3444static void 3445pmap_free_page(paddr_t pa, sparc64_cpuset_t cs) 3446{ 3447 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 3448 3449 dcache_flush_page_cpuset(pa, cs); 3450 uvm_pagefree(pg); 3451} 3452 3453static void 3454pmap_free_page_noflush(paddr_t pa) 3455{ 3456 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 3457 3458 uvm_pagefree(pg); 3459} 3460 3461#ifdef DDB 3462 3463void db_dump_pv(db_expr_t, int, db_expr_t, const char *); 3464void 3465db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, const char *modif) 3466{ 3467 struct vm_page *pg; 3468 struct vm_page_md *md; 3469 struct pv_entry *pv; 3470 3471 if (!have_addr) { 3472 db_printf("Need addr for pv\n"); 3473 return; 3474 } 3475 3476 pg = PHYS_TO_VM_PAGE((paddr_t)addr); 3477 if (pg == NULL) { 3478 db_printf("page is not managed\n"); 3479 return; 3480 } 3481 md = VM_PAGE_TO_MD(pg); 3482 for (pv = &md->mdpg_pvh; pv; pv = pv->pv_next) 3483 db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n", 3484 pv, pv->pv_next, pv->pv_pmap, 3485 (unsigned long long)pv->pv_va); 3486} 3487 3488#endif 3489 3490#ifdef DEBUG 3491/* 3492 * Test ref/modify handling. */ 3493void pmap_testout(void); 3494void 3495pmap_testout(void) 3496{ 3497 vaddr_t va; 3498 volatile int *loc; 3499 int val = 0; 3500 paddr_t pa; 3501 struct vm_page *pg; 3502 int ref, mod; 3503 3504 /* Allocate a page */ 3505 va = (vaddr_t)(vmmap - PAGE_SIZE); 3506 KASSERT(va != 0); 3507 loc = (int*)va; 3508 3509 pmap_get_page(&pa); 3510 pg = PHYS_TO_VM_PAGE(pa); 3511 pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL); 3512 pmap_update(pmap_kernel()); 3513 3514 /* Now clear reference and modify */ 3515 ref = pmap_clear_reference(pg); 3516 mod = pmap_clear_modify(pg); 3517 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3518 (void *)(u_long)va, (long)pa, 3519 ref, mod); 3520 3521 /* Check it's properly cleared */ 3522 ref = pmap_is_referenced(pg); 3523 mod = pmap_is_modified(pg); 3524 printf("Checking cleared page: ref %d, mod %d\n", 3525 ref, mod); 3526 3527 /* Reference page */ 3528 val = *loc; 3529 3530 ref = pmap_is_referenced(pg); 3531 mod = pmap_is_modified(pg); 3532 printf("Referenced page: ref %d, mod %d val %x\n", 3533 ref, mod, val); 3534 3535 /* Now clear reference and modify */ 3536 ref = pmap_clear_reference(pg); 3537 mod = pmap_clear_modify(pg); 3538 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3539 (void *)(u_long)va, (long)pa, 3540 ref, mod); 3541 3542 /* Modify page */ 3543 *loc = 1; 3544 3545 ref = pmap_is_referenced(pg); 3546 mod = pmap_is_modified(pg); 3547 printf("Modified page: ref %d, mod %d\n", 3548 ref, mod); 3549 3550 /* Now clear reference and modify */ 3551 ref = pmap_clear_reference(pg); 3552 mod = pmap_clear_modify(pg); 3553 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3554 (void *)(u_long)va, (long)pa, 3555 ref, mod); 3556 3557 /* Check it's properly cleared */ 3558 ref = pmap_is_referenced(pg); 3559 mod = pmap_is_modified(pg); 3560 printf("Checking cleared page: ref %d, mod %d\n", 3561 ref, mod); 3562 3563 /* Modify page */ 3564 *loc = 1; 3565 3566 ref = pmap_is_referenced(pg); 3567 mod = pmap_is_modified(pg); 3568 printf("Modified page: ref %d, mod %d\n", 3569 ref, mod); 3570 3571 /* Check pmap_protect() */ 3572 pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ); 3573 pmap_update(pmap_kernel()); 3574 ref = pmap_is_referenced(pg); 3575 mod = pmap_is_modified(pg); 3576 printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n", 3577 ref, mod); 3578 3579 /* Now clear reference and modify */ 3580 ref = pmap_clear_reference(pg); 3581 mod = pmap_clear_modify(pg); 3582 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3583 (void *)(u_long)va, (long)pa, 3584 ref, mod); 3585 3586 /* Modify page */ 3587 pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL); 3588 pmap_update(pmap_kernel()); 3589 *loc = 1; 3590 3591 ref = pmap_is_referenced(pg); 3592 mod = pmap_is_modified(pg); 3593 printf("Modified page: ref %d, mod %d\n", 3594 ref, mod); 3595 3596 /* Check pmap_protect() */ 3597 pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE); 3598 pmap_update(pmap_kernel()); 3599 ref = pmap_is_referenced(pg); 3600 mod = pmap_is_modified(pg); 3601 printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n", 3602 ref, mod); 3603 3604 /* Now clear reference and modify */ 3605 ref = pmap_clear_reference(pg); 3606 mod = pmap_clear_modify(pg); 3607 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3608 (void *)(u_long)va, (long)pa, 3609 ref, mod); 3610 3611 /* Modify page */ 3612 pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL); 3613 pmap_update(pmap_kernel()); 3614 *loc = 1; 3615 3616 ref = pmap_is_referenced(pg); 3617 mod = pmap_is_modified(pg); 3618 printf("Modified page: ref %d, mod %d\n", 3619 ref, mod); 3620 3621 /* Check pmap_pag_protect() */ 3622 pmap_page_protect(pg, VM_PROT_READ); 3623 ref = pmap_is_referenced(pg); 3624 mod = pmap_is_modified(pg); 3625 printf("pmap_protect(): ref %d, mod %d\n", 3626 ref, mod); 3627 3628 /* Now clear reference and modify */ 3629 ref = pmap_clear_reference(pg); 3630 mod = pmap_clear_modify(pg); 3631 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3632 (void *)(u_long)va, (long)pa, 3633 ref, mod); 3634 3635 3636 /* Modify page */ 3637 pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL); 3638 pmap_update(pmap_kernel()); 3639 *loc = 1; 3640 3641 ref = pmap_is_referenced(pg); 3642 mod = pmap_is_modified(pg); 3643 printf("Modified page: ref %d, mod %d\n", 3644 ref, mod); 3645 3646 /* Check pmap_pag_protect() */ 3647 pmap_page_protect(pg, VM_PROT_NONE); 3648 ref = pmap_is_referenced(pg); 3649 mod = pmap_is_modified(pg); 3650 printf("pmap_protect(): ref %d, mod %d\n", 3651 ref, mod); 3652 3653 /* Now clear reference and modify */ 3654 ref = pmap_clear_reference(pg); 3655 mod = pmap_clear_modify(pg); 3656 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3657 (void *)(u_long)va, (long)pa, 3658 ref, mod); 3659 3660 /* Unmap page */ 3661 pmap_remove(pmap_kernel(), va, va+1); 3662 pmap_update(pmap_kernel()); 3663 ref = pmap_is_referenced(pg); 3664 mod = pmap_is_modified(pg); 3665 printf("Unmapped page: ref %d, mod %d\n", ref, mod); 3666 3667 /* Now clear reference and modify */ 3668 ref = pmap_clear_reference(pg); 3669 mod = pmap_clear_modify(pg); 3670 printf("Clearing page va %p pa %lx: ref %d, mod %d\n", 3671 (void *)(u_long)va, (long)pa, ref, mod); 3672 3673 /* Check it's properly cleared */ 3674 ref = pmap_is_referenced(pg); 3675 mod = pmap_is_modified(pg); 3676 printf("Checking cleared page: ref %d, mod %d\n", 3677 ref, mod); 3678 3679 pmap_remove(pmap_kernel(), va, va+1); 3680 pmap_update(pmap_kernel()); 3681 pmap_free_page(pa, cpus_active); 3682} 3683#endif 3684 3685void 3686pmap_update(struct pmap *pmap) 3687{ 3688 3689 if (pmap->pm_refs > 0) { 3690 return; 3691 } 3692 pmap->pm_refs = 1; 3693 pmap_activate_pmap(pmap); 3694} 3695 3696/* 3697 * pmap_copy_page()/pmap_zero_page() 3698 * 3699 * we make sure that the destination page is flushed from all D$'s 3700 * before we perform the copy/zero. 3701 */ 3702extern int cold; 3703void 3704pmap_copy_page(paddr_t src, paddr_t dst) 3705{ 3706 3707 if (!cold) 3708 dcache_flush_page_all(dst); 3709 pmap_copy_page_phys(src, dst); 3710} 3711 3712void 3713pmap_zero_page(paddr_t pa) 3714{ 3715 3716 if (!cold) 3717 dcache_flush_page_all(pa); 3718 pmap_zero_page_phys(pa); 3719} 3720