pmap.c revision 223170
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 1998,2000 Doug Rabson 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp 45 * with some ideas from NetBSD's alpha pmap 46 */ 47 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 223170 2011-06-17 04:26:03Z marcel $"); 50 51#include <sys/param.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/mman.h> 55#include <sys/mutex.h> 56#include <sys/proc.h> 57#include <sys/smp.h> 58#include <sys/sysctl.h> 59#include <sys/systm.h> 60 61#include <vm/vm.h> 62#include <vm/vm_page.h> 63#include <vm/vm_map.h> 64#include <vm/vm_object.h> 65#include <vm/vm_pageout.h> 66#include <vm/uma.h> 67 68#include <machine/bootinfo.h> 69#include <machine/md_var.h> 70#include <machine/pal.h> 71 72/* 73 * Manages physical address maps. 74 * 75 * In addition to hardware address maps, this 76 * module is called upon to provide software-use-only 77 * maps which may or may not be stored in the same 78 * form as hardware maps. These pseudo-maps are 79 * used to store intermediate results from copy 80 * operations to and from address spaces. 81 * 82 * Since the information managed by this module is 83 * also stored by the logical address mapping module, 84 * this module may throw away valid virtual-to-physical 85 * mappings at almost any time. However, invalidations 86 * of virtual-to-physical mappings must be done as 87 * requested. 88 * 89 * In order to cope with hardware architectures which 90 * make virtual-to-physical map invalidates expensive, 91 * this module may delay invalidate or reduced protection 92 * operations until such time as they are actually 93 * necessary. This module is given full information as 94 * to which processors are currently using which maps, 95 * and to when physical maps must be made correct. 96 */ 97 98/* 99 * Following the Linux model, region IDs are allocated in groups of 100 * eight so that a single region ID can be used for as many RRs as we 101 * want by encoding the RR number into the low bits of the ID. 102 * 103 * We reserve region ID 0 for the kernel and allocate the remaining 104 * IDs for user pmaps. 105 * 106 * Region 0-3: User virtually mapped 107 * Region 4: PBVM and special mappings 108 * Region 5: Kernel virtual memory 109 * Region 6: Direct-mapped uncacheable 110 * Region 7: Direct-mapped cacheable 111 */ 112 113/* XXX move to a header. */ 114extern uint64_t ia64_gateway_page[]; 115 116#ifndef PMAP_SHPGPERPROC 117#define PMAP_SHPGPERPROC 200 118#endif 119 120#if !defined(DIAGNOSTIC) 121#define PMAP_INLINE __inline 122#else 123#define PMAP_INLINE 124#endif 125 126#define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED) 127#define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY) 128#define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX) 129#define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED) 130#define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK) 131#define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT) 132#define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56) 133#define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED) 134 135#define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED 136#define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY 137#define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT 138#define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED 139 140#define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED 141 142/* 143 * The VHPT bucket head structure. 144 */ 145struct ia64_bucket { 146 uint64_t chain; 147 struct mtx mutex; 148 u_int length; 149}; 150 151/* 152 * Statically allocated kernel pmap 153 */ 154struct pmap kernel_pmap_store; 155 156vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 157vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 158 159/* 160 * Kernel virtual memory management. 161 */ 162static int nkpt; 163extern struct ia64_lpte ***ia64_kptdir; 164 165#define KPTE_DIR0_INDEX(va) \ 166 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1)) 167#define KPTE_DIR1_INDEX(va) \ 168 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1)) 169#define KPTE_PTE_INDEX(va) \ 170 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1)) 171#define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte)) 172 173vm_offset_t kernel_vm_end; 174 175/* Values for ptc.e. XXX values for SKI. */ 176static uint64_t pmap_ptc_e_base = 0x100000000; 177static uint64_t pmap_ptc_e_count1 = 3; 178static uint64_t pmap_ptc_e_count2 = 2; 179static uint64_t pmap_ptc_e_stride1 = 0x2000; 180static uint64_t pmap_ptc_e_stride2 = 0x100000000; 181 182extern volatile u_long pmap_ptc_g_sem; 183 184/* 185 * Data for the RID allocator 186 */ 187static int pmap_ridcount; 188static int pmap_rididx; 189static int pmap_ridmapsz; 190static int pmap_ridmax; 191static uint64_t *pmap_ridmap; 192struct mtx pmap_ridmutex; 193 194/* 195 * Data for the pv entry allocation mechanism 196 */ 197static uma_zone_t pvzone; 198static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 199 200/* 201 * Data for allocating PTEs for user processes. 202 */ 203static uma_zone_t ptezone; 204 205/* 206 * Virtual Hash Page Table (VHPT) data. 207 */ 208/* SYSCTL_DECL(_machdep); */ 209SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, ""); 210 211struct ia64_bucket *pmap_vhpt_bucket; 212 213int pmap_vhpt_nbuckets; 214SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD, 215 &pmap_vhpt_nbuckets, 0, ""); 216 217int pmap_vhpt_log2size = 0; 218TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size); 219SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD, 220 &pmap_vhpt_log2size, 0, ""); 221 222static int pmap_vhpt_inserts; 223SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD, 224 &pmap_vhpt_inserts, 0, ""); 225 226static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS); 227SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD, 228 NULL, 0, pmap_vhpt_population, "I", ""); 229 230static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va); 231 232static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 233static pv_entry_t get_pv_entry(pmap_t locked_pmap); 234 235static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 236 vm_page_t m, vm_prot_t prot); 237static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va); 238static void pmap_invalidate_all(void); 239static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, 240 vm_offset_t va, pv_entry_t pv, int freepte); 241static int pmap_remove_vhpt(vm_offset_t va); 242static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 243 vm_page_t m); 244 245vm_offset_t 246pmap_steal_memory(vm_size_t size) 247{ 248 vm_size_t bank_size; 249 vm_offset_t pa, va; 250 251 size = round_page(size); 252 253 bank_size = phys_avail[1] - phys_avail[0]; 254 while (size > bank_size) { 255 int i; 256 for (i = 0; phys_avail[i+2]; i+= 2) { 257 phys_avail[i] = phys_avail[i+2]; 258 phys_avail[i+1] = phys_avail[i+3]; 259 } 260 phys_avail[i] = 0; 261 phys_avail[i+1] = 0; 262 if (!phys_avail[0]) 263 panic("pmap_steal_memory: out of memory"); 264 bank_size = phys_avail[1] - phys_avail[0]; 265 } 266 267 pa = phys_avail[0]; 268 phys_avail[0] += size; 269 270 va = IA64_PHYS_TO_RR7(pa); 271 bzero((caddr_t) va, size); 272 return va; 273} 274 275static void 276pmap_initialize_vhpt(vm_offset_t vhpt) 277{ 278 struct ia64_lpte *pte; 279 u_int i; 280 281 pte = (struct ia64_lpte *)vhpt; 282 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 283 pte[i].pte = 0; 284 pte[i].itir = 0; 285 pte[i].tag = 1UL << 63; /* Invalid tag */ 286 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i); 287 } 288} 289 290#ifdef SMP 291MALLOC_DECLARE(M_SMP); 292 293vm_offset_t 294pmap_alloc_vhpt(void) 295{ 296 vm_offset_t vhpt; 297 vm_size_t size; 298 299 size = 1UL << pmap_vhpt_log2size; 300 vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL); 301 if (vhpt != 0) { 302 vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt)); 303 pmap_initialize_vhpt(vhpt); 304 } 305 return (vhpt); 306} 307#endif 308 309/* 310 * Bootstrap the system enough to run with virtual memory. 311 */ 312void 313pmap_bootstrap() 314{ 315 struct ia64_pal_result res; 316 vm_offset_t base; 317 size_t size; 318 int i, j, count, ridbits; 319 320 /* 321 * Query the PAL Code to find the loop parameters for the 322 * ptc.e instruction. 323 */ 324 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0); 325 if (res.pal_status != 0) 326 panic("Can't configure ptc.e parameters"); 327 pmap_ptc_e_base = res.pal_result[0]; 328 pmap_ptc_e_count1 = res.pal_result[1] >> 32; 329 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1); 330 pmap_ptc_e_stride1 = res.pal_result[2] >> 32; 331 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1); 332 if (bootverbose) 333 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, " 334 "stride1=0x%lx, stride2=0x%lx\n", 335 pmap_ptc_e_base, 336 pmap_ptc_e_count1, 337 pmap_ptc_e_count2, 338 pmap_ptc_e_stride1, 339 pmap_ptc_e_stride2); 340 341 /* 342 * Setup RIDs. RIDs 0..7 are reserved for the kernel. 343 * 344 * We currently need at least 19 bits in the RID because PID_MAX 345 * can only be encoded in 17 bits and we need RIDs for 4 regions 346 * per process. With PID_MAX equalling 99999 this means that we 347 * need to be able to encode 399996 (=4*PID_MAX). 348 * The Itanium processor only has 18 bits and the architected 349 * minimum is exactly that. So, we cannot use a PID based scheme 350 * in those cases. Enter pmap_ridmap... 351 * We should avoid the map when running on a processor that has 352 * implemented enough bits. This means that we should pass the 353 * process/thread ID to pmap. This we currently don't do, so we 354 * use the map anyway. However, we don't want to allocate a map 355 * that is large enough to cover the range dictated by the number 356 * of bits in the RID, because that may result in a RID map of 357 * 2MB in size for a 24-bit RID. A 64KB map is enough. 358 * The bottomline: we create a 32KB map when the processor only 359 * implements 18 bits (or when we can't figure it out). Otherwise 360 * we create a 64KB map. 361 */ 362 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 363 if (res.pal_status != 0) { 364 if (bootverbose) 365 printf("Can't read VM Summary - assuming 18 Region ID bits\n"); 366 ridbits = 18; /* guaranteed minimum */ 367 } else { 368 ridbits = (res.pal_result[1] >> 8) & 0xff; 369 if (bootverbose) 370 printf("Processor supports %d Region ID bits\n", 371 ridbits); 372 } 373 if (ridbits > 19) 374 ridbits = 19; 375 376 pmap_ridmax = (1 << ridbits); 377 pmap_ridmapsz = pmap_ridmax / 64; 378 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8); 379 pmap_ridmap[0] |= 0xff; 380 pmap_rididx = 0; 381 pmap_ridcount = 8; 382 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF); 383 384 /* 385 * Allocate some memory for initial kernel 'page tables'. 386 */ 387 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE); 388 nkpt = 0; 389 kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 390 391 for (i = 0; phys_avail[i+2]; i+= 2) 392 ; 393 count = i+2; 394 395 /* 396 * Determine a valid (mappable) VHPT size. 397 */ 398 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size); 399 if (pmap_vhpt_log2size == 0) 400 pmap_vhpt_log2size = 20; 401 else if (pmap_vhpt_log2size < 16) 402 pmap_vhpt_log2size = 16; 403 else if (pmap_vhpt_log2size > 28) 404 pmap_vhpt_log2size = 28; 405 if (pmap_vhpt_log2size & 1) 406 pmap_vhpt_log2size--; 407 408 base = 0; 409 size = 1UL << pmap_vhpt_log2size; 410 for (i = 0; i < count; i += 2) { 411 base = (phys_avail[i] + size - 1) & ~(size - 1); 412 if (base + size <= phys_avail[i+1]) 413 break; 414 } 415 if (!phys_avail[i]) 416 panic("Unable to allocate VHPT"); 417 418 if (base != phys_avail[i]) { 419 /* Split this region. */ 420 for (j = count; j > i; j -= 2) { 421 phys_avail[j] = phys_avail[j-2]; 422 phys_avail[j+1] = phys_avail[j-2+1]; 423 } 424 phys_avail[i+1] = base; 425 phys_avail[i+2] = base + size; 426 } else 427 phys_avail[i] = base + size; 428 429 base = IA64_PHYS_TO_RR7(base); 430 PCPU_SET(md.vhpt, base); 431 if (bootverbose) 432 printf("VHPT: address=%#lx, size=%#lx\n", base, size); 433 434 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte); 435 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets * 436 sizeof(struct ia64_bucket)); 437 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 438 /* Stolen memory is zeroed. */ 439 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL, 440 MTX_NOWITNESS | MTX_SPIN); 441 } 442 443 pmap_initialize_vhpt(base); 444 map_vhpt(base); 445 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1); 446 ia64_srlz_i(); 447 448 virtual_avail = VM_MIN_KERNEL_ADDRESS; 449 virtual_end = VM_MAX_KERNEL_ADDRESS; 450 451 /* 452 * Initialize the kernel pmap (which is statically allocated). 453 */ 454 PMAP_LOCK_INIT(kernel_pmap); 455 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 456 kernel_pmap->pm_rid[i] = 0; 457 TAILQ_INIT(&kernel_pmap->pm_pvlist); 458 PCPU_SET(md.current_pmap, kernel_pmap); 459 460 /* Region 5 is mapped via the VHPT. */ 461 ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1); 462 463 /* 464 * Clear out any random TLB entries left over from booting. 465 */ 466 pmap_invalidate_all(); 467 468 map_gateway_page(); 469} 470 471static int 472pmap_vhpt_population(SYSCTL_HANDLER_ARGS) 473{ 474 int count, error, i; 475 476 count = 0; 477 for (i = 0; i < pmap_vhpt_nbuckets; i++) 478 count += pmap_vhpt_bucket[i].length; 479 480 error = SYSCTL_OUT(req, &count, sizeof(count)); 481 return (error); 482} 483 484/* 485 * Initialize a vm_page's machine-dependent fields. 486 */ 487void 488pmap_page_init(vm_page_t m) 489{ 490 491 TAILQ_INIT(&m->md.pv_list); 492 m->md.pv_list_count = 0; 493} 494 495/* 496 * Initialize the pmap module. 497 * Called by vm_init, to initialize any structures that the pmap 498 * system needs to map virtual memory. 499 */ 500void 501pmap_init(void) 502{ 503 int shpgperproc = PMAP_SHPGPERPROC; 504 505 /* 506 * Initialize the address space (zone) for the pv entries. Set a 507 * high water mark so that the system can recover from excessive 508 * numbers of pv entries. 509 */ 510 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 511 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 512 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 513 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 514 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 515 pv_entry_high_water = 9 * (pv_entry_max / 10); 516 517 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte), 518 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE); 519} 520 521 522/*************************************************** 523 * Manipulate TLBs for a pmap 524 ***************************************************/ 525 526static void 527pmap_invalidate_page(vm_offset_t va) 528{ 529 struct ia64_lpte *pte; 530 struct pcpu *pc; 531 uint64_t tag, sem; 532 register_t is; 533 u_int vhpt_ofs; 534 535 critical_enter(); 536 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt); 537 tag = ia64_ttag(va); 538 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 539 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs); 540 atomic_cmpset_64(&pte->tag, tag, 1UL << 63); 541 } 542 543 /* PTC.G enter exclusive */ 544 is = intr_disable(); 545 546 /* Atomically assert writer after all writers have gone. */ 547 do { 548 /* Wait until there's no more writer. */ 549 do { 550 sem = atomic_load_acq_long(&pmap_ptc_g_sem); 551 tag = sem | (1ul << 63); 552 } while (sem == tag); 553 } while (!atomic_cmpset_rel_long(&pmap_ptc_g_sem, sem, tag)); 554 555 /* Wait until all readers are gone. */ 556 tag = (1ul << 63); 557 do { 558 sem = atomic_load_acq_long(&pmap_ptc_g_sem); 559 } while (sem != tag); 560 561 ia64_ptc_ga(va, PAGE_SHIFT << 2); 562 ia64_mf(); 563 ia64_srlz_i(); 564 565 /* PTC.G leave exclusive */ 566 atomic_store_rel_long(&pmap_ptc_g_sem, 0); 567 568 ia64_invala(); 569 570 intr_restore(is); 571 critical_exit(); 572} 573 574static void 575pmap_invalidate_all_1(void *arg) 576{ 577 uint64_t addr; 578 int i, j; 579 580 critical_enter(); 581 addr = pmap_ptc_e_base; 582 for (i = 0; i < pmap_ptc_e_count1; i++) { 583 for (j = 0; j < pmap_ptc_e_count2; j++) { 584 ia64_ptc_e(addr); 585 addr += pmap_ptc_e_stride2; 586 } 587 addr += pmap_ptc_e_stride1; 588 } 589 critical_exit(); 590} 591 592static void 593pmap_invalidate_all(void) 594{ 595 596#ifdef SMP 597 if (mp_ncpus > 1) { 598 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL); 599 return; 600 } 601#endif 602 pmap_invalidate_all_1(NULL); 603} 604 605static uint32_t 606pmap_allocate_rid(void) 607{ 608 uint64_t bit, bits; 609 int rid; 610 611 mtx_lock(&pmap_ridmutex); 612 if (pmap_ridcount == pmap_ridmax) 613 panic("pmap_allocate_rid: All Region IDs used"); 614 615 /* Find an index with a free bit. */ 616 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) { 617 pmap_rididx++; 618 if (pmap_rididx == pmap_ridmapsz) 619 pmap_rididx = 0; 620 } 621 rid = pmap_rididx * 64; 622 623 /* Find a free bit. */ 624 bit = 1UL; 625 while (bits & bit) { 626 rid++; 627 bit <<= 1; 628 } 629 630 pmap_ridmap[pmap_rididx] |= bit; 631 pmap_ridcount++; 632 mtx_unlock(&pmap_ridmutex); 633 634 return rid; 635} 636 637static void 638pmap_free_rid(uint32_t rid) 639{ 640 uint64_t bit; 641 int idx; 642 643 idx = rid / 64; 644 bit = ~(1UL << (rid & 63)); 645 646 mtx_lock(&pmap_ridmutex); 647 pmap_ridmap[idx] &= bit; 648 pmap_ridcount--; 649 mtx_unlock(&pmap_ridmutex); 650} 651 652/*************************************************** 653 * Page table page management routines..... 654 ***************************************************/ 655 656void 657pmap_pinit0(struct pmap *pmap) 658{ 659 /* kernel_pmap is the same as any other pmap. */ 660 pmap_pinit(pmap); 661} 662 663/* 664 * Initialize a preallocated and zeroed pmap structure, 665 * such as one in a vmspace structure. 666 */ 667int 668pmap_pinit(struct pmap *pmap) 669{ 670 int i; 671 672 PMAP_LOCK_INIT(pmap); 673 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 674 pmap->pm_rid[i] = pmap_allocate_rid(); 675 TAILQ_INIT(&pmap->pm_pvlist); 676 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 677 return (1); 678} 679 680/*************************************************** 681 * Pmap allocation/deallocation routines. 682 ***************************************************/ 683 684/* 685 * Release any resources held by the given physical map. 686 * Called when a pmap initialized by pmap_pinit is being released. 687 * Should only be called if the map contains no valid mappings. 688 */ 689void 690pmap_release(pmap_t pmap) 691{ 692 int i; 693 694 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 695 if (pmap->pm_rid[i]) 696 pmap_free_rid(pmap->pm_rid[i]); 697 PMAP_LOCK_DESTROY(pmap); 698} 699 700/* 701 * grow the number of kernel page table entries, if needed 702 */ 703void 704pmap_growkernel(vm_offset_t addr) 705{ 706 struct ia64_lpte **dir1; 707 struct ia64_lpte *leaf; 708 vm_page_t nkpg; 709 710 while (kernel_vm_end <= addr) { 711 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64) 712 panic("%s: out of kernel address space", __func__); 713 714 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)]; 715 if (dir1 == NULL) { 716 nkpg = vm_page_alloc(NULL, nkpt++, 717 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 718 if (!nkpg) 719 panic("%s: cannot add dir. page", __func__); 720 721 dir1 = (struct ia64_lpte **) 722 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg)); 723 bzero(dir1, PAGE_SIZE); 724 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1; 725 } 726 727 nkpg = vm_page_alloc(NULL, nkpt++, 728 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 729 if (!nkpg) 730 panic("%s: cannot add PTE page", __func__); 731 732 leaf = (struct ia64_lpte *) 733 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg)); 734 bzero(leaf, PAGE_SIZE); 735 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf; 736 737 kernel_vm_end += PAGE_SIZE * NKPTEPG; 738 } 739} 740 741/*************************************************** 742 * page management routines. 743 ***************************************************/ 744 745/* 746 * free the pv_entry back to the free list 747 */ 748static PMAP_INLINE void 749free_pv_entry(pv_entry_t pv) 750{ 751 pv_entry_count--; 752 uma_zfree(pvzone, pv); 753} 754 755/* 756 * get a new pv_entry, allocating a block from the system 757 * when needed. 758 */ 759static pv_entry_t 760get_pv_entry(pmap_t locked_pmap) 761{ 762 static const struct timeval printinterval = { 60, 0 }; 763 static struct timeval lastprint; 764 struct vpgqueues *vpq; 765 struct ia64_lpte *pte; 766 pmap_t oldpmap, pmap; 767 pv_entry_t allocated_pv, next_pv, pv; 768 vm_offset_t va; 769 vm_page_t m; 770 771 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 772 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 773 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 774 if (allocated_pv != NULL) { 775 pv_entry_count++; 776 if (pv_entry_count > pv_entry_high_water) 777 pagedaemon_wakeup(); 778 else 779 return (allocated_pv); 780 } 781 782 /* 783 * Reclaim pv entries: At first, destroy mappings to inactive 784 * pages. After that, if a pv entry is still needed, destroy 785 * mappings to active pages. 786 */ 787 if (ratecheck(&lastprint, &printinterval)) 788 printf("Approaching the limit on PV entries, " 789 "increase the vm.pmap.shpgperproc tunable.\n"); 790 vpq = &vm_page_queues[PQ_INACTIVE]; 791retry: 792 TAILQ_FOREACH(m, &vpq->pl, pageq) { 793 if (m->hold_count || m->busy) 794 continue; 795 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 796 va = pv->pv_va; 797 pmap = pv->pv_pmap; 798 /* Avoid deadlock and lock recursion. */ 799 if (pmap > locked_pmap) 800 PMAP_LOCK(pmap); 801 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 802 continue; 803 pmap->pm_stats.resident_count--; 804 oldpmap = pmap_switch(pmap); 805 pte = pmap_find_vhpt(va); 806 KASSERT(pte != NULL, ("pte")); 807 pmap_remove_vhpt(va); 808 pmap_invalidate_page(va); 809 pmap_switch(oldpmap); 810 if (pmap_accessed(pte)) 811 vm_page_flag_set(m, PG_REFERENCED); 812 if (pmap_dirty(pte)) 813 vm_page_dirty(m); 814 pmap_free_pte(pte, va); 815 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 816 m->md.pv_list_count--; 817 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 818 if (pmap != locked_pmap) 819 PMAP_UNLOCK(pmap); 820 if (allocated_pv == NULL) 821 allocated_pv = pv; 822 else 823 free_pv_entry(pv); 824 } 825 if (TAILQ_EMPTY(&m->md.pv_list)) 826 vm_page_flag_clear(m, PG_WRITEABLE); 827 } 828 if (allocated_pv == NULL) { 829 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 830 vpq = &vm_page_queues[PQ_ACTIVE]; 831 goto retry; 832 } 833 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 834 } 835 return (allocated_pv); 836} 837 838/* 839 * Conditionally create a pv entry. 840 */ 841static boolean_t 842pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 843{ 844 pv_entry_t pv; 845 846 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 847 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 848 if (pv_entry_count < pv_entry_high_water && 849 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 850 pv_entry_count++; 851 pv->pv_va = va; 852 pv->pv_pmap = pmap; 853 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 854 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 855 m->md.pv_list_count++; 856 return (TRUE); 857 } else 858 return (FALSE); 859} 860 861/* 862 * Add an ia64_lpte to the VHPT. 863 */ 864static void 865pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va) 866{ 867 struct ia64_bucket *bckt; 868 struct ia64_lpte *vhpte; 869 uint64_t pte_pa; 870 871 /* Can fault, so get it out of the way. */ 872 pte_pa = ia64_tpa((vm_offset_t)pte); 873 874 vhpte = (struct ia64_lpte *)ia64_thash(va); 875 bckt = (struct ia64_bucket *)vhpte->chain; 876 877 mtx_lock_spin(&bckt->mutex); 878 pte->chain = bckt->chain; 879 ia64_mf(); 880 bckt->chain = pte_pa; 881 882 pmap_vhpt_inserts++; 883 bckt->length++; 884 mtx_unlock_spin(&bckt->mutex); 885} 886 887/* 888 * Remove the ia64_lpte matching va from the VHPT. Return zero if it 889 * worked or an appropriate error code otherwise. 890 */ 891static int 892pmap_remove_vhpt(vm_offset_t va) 893{ 894 struct ia64_bucket *bckt; 895 struct ia64_lpte *pte; 896 struct ia64_lpte *lpte; 897 struct ia64_lpte *vhpte; 898 uint64_t chain, tag; 899 900 tag = ia64_ttag(va); 901 vhpte = (struct ia64_lpte *)ia64_thash(va); 902 bckt = (struct ia64_bucket *)vhpte->chain; 903 904 lpte = NULL; 905 mtx_lock_spin(&bckt->mutex); 906 chain = bckt->chain; 907 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 908 while (chain != 0 && pte->tag != tag) { 909 lpte = pte; 910 chain = pte->chain; 911 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 912 } 913 if (chain == 0) { 914 mtx_unlock_spin(&bckt->mutex); 915 return (ENOENT); 916 } 917 918 /* Snip this pv_entry out of the collision chain. */ 919 if (lpte == NULL) 920 bckt->chain = pte->chain; 921 else 922 lpte->chain = pte->chain; 923 ia64_mf(); 924 925 bckt->length--; 926 mtx_unlock_spin(&bckt->mutex); 927 return (0); 928} 929 930/* 931 * Find the ia64_lpte for the given va, if any. 932 */ 933static struct ia64_lpte * 934pmap_find_vhpt(vm_offset_t va) 935{ 936 struct ia64_bucket *bckt; 937 struct ia64_lpte *pte; 938 uint64_t chain, tag; 939 940 tag = ia64_ttag(va); 941 pte = (struct ia64_lpte *)ia64_thash(va); 942 bckt = (struct ia64_bucket *)pte->chain; 943 944 mtx_lock_spin(&bckt->mutex); 945 chain = bckt->chain; 946 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 947 while (chain != 0 && pte->tag != tag) { 948 chain = pte->chain; 949 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 950 } 951 mtx_unlock_spin(&bckt->mutex); 952 return ((chain != 0) ? pte : NULL); 953} 954 955/* 956 * Remove an entry from the list of managed mappings. 957 */ 958static int 959pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv) 960{ 961 if (!pv) { 962 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 963 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 964 if (pmap == pv->pv_pmap && va == pv->pv_va) 965 break; 966 } 967 } else { 968 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 969 if (va == pv->pv_va) 970 break; 971 } 972 } 973 } 974 975 if (pv) { 976 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 977 m->md.pv_list_count--; 978 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 979 vm_page_flag_clear(m, PG_WRITEABLE); 980 981 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 982 free_pv_entry(pv); 983 return 0; 984 } else { 985 return ENOENT; 986 } 987} 988 989/* 990 * Create a pv entry for page at pa for 991 * (pmap, va). 992 */ 993static void 994pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 995{ 996 pv_entry_t pv; 997 998 pv = get_pv_entry(pmap); 999 pv->pv_pmap = pmap; 1000 pv->pv_va = va; 1001 1002 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1003 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1004 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1005 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1006 m->md.pv_list_count++; 1007} 1008 1009/* 1010 * Routine: pmap_extract 1011 * Function: 1012 * Extract the physical page address associated 1013 * with the given map/virtual_address pair. 1014 */ 1015vm_paddr_t 1016pmap_extract(pmap_t pmap, vm_offset_t va) 1017{ 1018 struct ia64_lpte *pte; 1019 pmap_t oldpmap; 1020 vm_paddr_t pa; 1021 1022 pa = 0; 1023 PMAP_LOCK(pmap); 1024 oldpmap = pmap_switch(pmap); 1025 pte = pmap_find_vhpt(va); 1026 if (pte != NULL && pmap_present(pte)) 1027 pa = pmap_ppn(pte); 1028 pmap_switch(oldpmap); 1029 PMAP_UNLOCK(pmap); 1030 return (pa); 1031} 1032 1033/* 1034 * Routine: pmap_extract_and_hold 1035 * Function: 1036 * Atomically extract and hold the physical page 1037 * with the given pmap and virtual address pair 1038 * if that mapping permits the given protection. 1039 */ 1040vm_page_t 1041pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1042{ 1043 struct ia64_lpte *pte; 1044 pmap_t oldpmap; 1045 vm_page_t m; 1046 vm_paddr_t pa; 1047 1048 pa = 0; 1049 m = NULL; 1050 PMAP_LOCK(pmap); 1051 oldpmap = pmap_switch(pmap); 1052retry: 1053 pte = pmap_find_vhpt(va); 1054 if (pte != NULL && pmap_present(pte) && 1055 (pmap_prot(pte) & prot) == prot) { 1056 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1057 if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) 1058 goto retry; 1059 vm_page_hold(m); 1060 } 1061 PA_UNLOCK_COND(pa); 1062 pmap_switch(oldpmap); 1063 PMAP_UNLOCK(pmap); 1064 return (m); 1065} 1066 1067/*************************************************** 1068 * Low level mapping routines..... 1069 ***************************************************/ 1070 1071/* 1072 * Find the kernel lpte for mapping the given virtual address, which 1073 * must be in the part of region 5 which we can cover with our kernel 1074 * 'page tables'. 1075 */ 1076static struct ia64_lpte * 1077pmap_find_kpte(vm_offset_t va) 1078{ 1079 struct ia64_lpte **dir1; 1080 struct ia64_lpte *leaf; 1081 1082 KASSERT((va >> 61) == 5, 1083 ("kernel mapping 0x%lx not in region 5", va)); 1084 KASSERT(va < kernel_vm_end, 1085 ("kernel mapping 0x%lx out of range", va)); 1086 1087 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)]; 1088 leaf = dir1[KPTE_DIR1_INDEX(va)]; 1089 return (&leaf[KPTE_PTE_INDEX(va)]); 1090} 1091 1092/* 1093 * Find a pte suitable for mapping a user-space address. If one exists 1094 * in the VHPT, that one will be returned, otherwise a new pte is 1095 * allocated. 1096 */ 1097static struct ia64_lpte * 1098pmap_find_pte(vm_offset_t va) 1099{ 1100 struct ia64_lpte *pte; 1101 1102 if (va >= VM_MAXUSER_ADDRESS) 1103 return pmap_find_kpte(va); 1104 1105 pte = pmap_find_vhpt(va); 1106 if (pte == NULL) { 1107 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO); 1108 pte->tag = 1UL << 63; 1109 } 1110 return (pte); 1111} 1112 1113/* 1114 * Free a pte which is now unused. This simply returns it to the zone 1115 * allocator if it is a user mapping. For kernel mappings, clear the 1116 * valid bit to make it clear that the mapping is not currently used. 1117 */ 1118static void 1119pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va) 1120{ 1121 if (va < VM_MAXUSER_ADDRESS) 1122 uma_zfree(ptezone, pte); 1123 else 1124 pmap_clear_present(pte); 1125} 1126 1127static PMAP_INLINE void 1128pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot) 1129{ 1130 static long prot2ar[4] = { 1131 PTE_AR_R, /* VM_PROT_NONE */ 1132 PTE_AR_RW, /* VM_PROT_WRITE */ 1133 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */ 1134 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */ 1135 }; 1136 1137 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED); 1138 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56; 1139 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap) 1140 ? PTE_PL_KERN : PTE_PL_USER; 1141 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1]; 1142} 1143 1144/* 1145 * Set a pte to contain a valid mapping and enter it in the VHPT. If 1146 * the pte was orginally valid, then its assumed to already be in the 1147 * VHPT. 1148 * This functions does not set the protection bits. It's expected 1149 * that those have been set correctly prior to calling this function. 1150 */ 1151static void 1152pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa, 1153 boolean_t wired, boolean_t managed) 1154{ 1155 1156 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED; 1157 pte->pte |= PTE_PRESENT | PTE_MA_WB; 1158 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED); 1159 pte->pte |= (wired) ? PTE_WIRED : 0; 1160 pte->pte |= pa & PTE_PPN_MASK; 1161 1162 pte->itir = PAGE_SHIFT << 2; 1163 1164 pte->tag = ia64_ttag(va); 1165} 1166 1167/* 1168 * Remove the (possibly managed) mapping represented by pte from the 1169 * given pmap. 1170 */ 1171static int 1172pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va, 1173 pv_entry_t pv, int freepte) 1174{ 1175 int error; 1176 vm_page_t m; 1177 1178 /* 1179 * First remove from the VHPT. 1180 */ 1181 error = pmap_remove_vhpt(va); 1182 if (error) 1183 return (error); 1184 1185 pmap_invalidate_page(va); 1186 1187 if (pmap_wired(pte)) 1188 pmap->pm_stats.wired_count -= 1; 1189 1190 pmap->pm_stats.resident_count -= 1; 1191 if (pmap_managed(pte)) { 1192 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1193 if (pmap_dirty(pte)) 1194 vm_page_dirty(m); 1195 if (pmap_accessed(pte)) 1196 vm_page_flag_set(m, PG_REFERENCED); 1197 1198 error = pmap_remove_entry(pmap, m, va, pv); 1199 } 1200 if (freepte) 1201 pmap_free_pte(pte, va); 1202 1203 return (error); 1204} 1205 1206/* 1207 * Extract the physical page address associated with a kernel 1208 * virtual address. 1209 */ 1210vm_paddr_t 1211pmap_kextract(vm_offset_t va) 1212{ 1213 struct ia64_lpte *pte; 1214 uint64_t *pbvm_pgtbl; 1215 u_int idx; 1216 1217 KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA")); 1218 1219 /* Regions 6 and 7 are direct mapped. */ 1220 if (va >= IA64_RR_BASE(6)) 1221 return (IA64_RR_MASK(va)); 1222 1223 /* Bail out if the virtual address is beyond our limits. */ 1224 if (va >= kernel_vm_end) 1225 return (0); 1226 1227 if (va >= VM_MIN_KERNEL_ADDRESS) { 1228 pte = pmap_find_kpte(va); 1229 return (pmap_present(pte) ? pmap_ppn(pte)|(va&PAGE_MASK) : 0); 1230 } 1231 1232 /* PBVM page table. */ 1233 if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz); 1234 return (0); 1235 if (va >= IA64_PBVM_PGTBL) 1236 return (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl; 1237 1238 /* PBVM. */ 1239 if (va >= IA64_PBVM_BASE) { 1240 pbvm_pgtbl = (void *)IA64_PBVM_PGTBL; 1241 idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT; 1242 if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3)) 1243 return (0); 1244 if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0) 1245 return (0); 1246 return ((pbvm_pgtbl[idx] & PTE_PPN_MASK) + 1247 (va & IA64_PBVM_PAGE_MASK)); 1248 } 1249 1250 printf("XXX: %s: va=%#lx\n", __func__, va); 1251 return (0); 1252} 1253 1254/* 1255 * Add a list of wired pages to the kva this routine is only used for 1256 * temporary kernel mappings that do not need to have page modification 1257 * or references recorded. Note that old mappings are simply written 1258 * over. The page is effectively wired, but it's customary to not have 1259 * the PTE reflect that, nor update statistics. 1260 */ 1261void 1262pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 1263{ 1264 struct ia64_lpte *pte; 1265 int i; 1266 1267 for (i = 0; i < count; i++) { 1268 pte = pmap_find_kpte(va); 1269 if (pmap_present(pte)) 1270 pmap_invalidate_page(va); 1271 else 1272 pmap_enter_vhpt(pte, va); 1273 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1274 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE); 1275 va += PAGE_SIZE; 1276 } 1277} 1278 1279/* 1280 * this routine jerks page mappings from the 1281 * kernel -- it is meant only for temporary mappings. 1282 */ 1283void 1284pmap_qremove(vm_offset_t va, int count) 1285{ 1286 struct ia64_lpte *pte; 1287 int i; 1288 1289 for (i = 0; i < count; i++) { 1290 pte = pmap_find_kpte(va); 1291 if (pmap_present(pte)) { 1292 pmap_remove_vhpt(va); 1293 pmap_invalidate_page(va); 1294 pmap_clear_present(pte); 1295 } 1296 va += PAGE_SIZE; 1297 } 1298} 1299 1300/* 1301 * Add a wired page to the kva. As for pmap_qenter(), it's customary 1302 * to not have the PTE reflect that, nor update statistics. 1303 */ 1304void 1305pmap_kenter(vm_offset_t va, vm_offset_t pa) 1306{ 1307 struct ia64_lpte *pte; 1308 1309 pte = pmap_find_kpte(va); 1310 if (pmap_present(pte)) 1311 pmap_invalidate_page(va); 1312 else 1313 pmap_enter_vhpt(pte, va); 1314 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1315 pmap_set_pte(pte, va, pa, FALSE, FALSE); 1316} 1317 1318/* 1319 * Remove a page from the kva 1320 */ 1321void 1322pmap_kremove(vm_offset_t va) 1323{ 1324 struct ia64_lpte *pte; 1325 1326 pte = pmap_find_kpte(va); 1327 if (pmap_present(pte)) { 1328 pmap_remove_vhpt(va); 1329 pmap_invalidate_page(va); 1330 pmap_clear_present(pte); 1331 } 1332} 1333 1334/* 1335 * Used to map a range of physical addresses into kernel 1336 * virtual address space. 1337 * 1338 * The value passed in '*virt' is a suggested virtual address for 1339 * the mapping. Architectures which can support a direct-mapped 1340 * physical to virtual region can return the appropriate address 1341 * within that region, leaving '*virt' unchanged. Other 1342 * architectures should map the pages starting at '*virt' and 1343 * update '*virt' with the first usable address after the mapped 1344 * region. 1345 */ 1346vm_offset_t 1347pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 1348{ 1349 return IA64_PHYS_TO_RR7(start); 1350} 1351 1352/* 1353 * Remove the given range of addresses from the specified map. 1354 * 1355 * It is assumed that the start and end are properly 1356 * rounded to the page size. 1357 */ 1358void 1359pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1360{ 1361 pmap_t oldpmap; 1362 vm_offset_t va; 1363 pv_entry_t npv, pv; 1364 struct ia64_lpte *pte; 1365 1366 if (pmap->pm_stats.resident_count == 0) 1367 return; 1368 1369 vm_page_lock_queues(); 1370 PMAP_LOCK(pmap); 1371 oldpmap = pmap_switch(pmap); 1372 1373 /* 1374 * special handling of removing one page. a very 1375 * common operation and easy to short circuit some 1376 * code. 1377 */ 1378 if (sva + PAGE_SIZE == eva) { 1379 pte = pmap_find_vhpt(sva); 1380 if (pte != NULL) 1381 pmap_remove_pte(pmap, pte, sva, 0, 1); 1382 goto out; 1383 } 1384 1385 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) { 1386 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) { 1387 va = pv->pv_va; 1388 if (va >= sva && va < eva) { 1389 pte = pmap_find_vhpt(va); 1390 KASSERT(pte != NULL, ("pte")); 1391 pmap_remove_pte(pmap, pte, va, pv, 1); 1392 } 1393 } 1394 } else { 1395 for (va = sva; va < eva; va += PAGE_SIZE) { 1396 pte = pmap_find_vhpt(va); 1397 if (pte != NULL) 1398 pmap_remove_pte(pmap, pte, va, 0, 1); 1399 } 1400 } 1401 1402out: 1403 vm_page_unlock_queues(); 1404 pmap_switch(oldpmap); 1405 PMAP_UNLOCK(pmap); 1406} 1407 1408/* 1409 * Routine: pmap_remove_all 1410 * Function: 1411 * Removes this physical page from 1412 * all physical maps in which it resides. 1413 * Reflects back modify bits to the pager. 1414 * 1415 * Notes: 1416 * Original versions of this routine were very 1417 * inefficient because they iteratively called 1418 * pmap_remove (slow...) 1419 */ 1420 1421void 1422pmap_remove_all(vm_page_t m) 1423{ 1424 pmap_t oldpmap; 1425 pv_entry_t pv; 1426 1427 KASSERT((m->flags & PG_FICTITIOUS) == 0, 1428 ("pmap_remove_all: page %p is fictitious", m)); 1429 vm_page_lock_queues(); 1430 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1431 struct ia64_lpte *pte; 1432 pmap_t pmap = pv->pv_pmap; 1433 vm_offset_t va = pv->pv_va; 1434 1435 PMAP_LOCK(pmap); 1436 oldpmap = pmap_switch(pmap); 1437 pte = pmap_find_vhpt(va); 1438 KASSERT(pte != NULL, ("pte")); 1439 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m)) 1440 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m)); 1441 pmap_remove_pte(pmap, pte, va, pv, 1); 1442 pmap_switch(oldpmap); 1443 PMAP_UNLOCK(pmap); 1444 } 1445 vm_page_flag_clear(m, PG_WRITEABLE); 1446 vm_page_unlock_queues(); 1447} 1448 1449/* 1450 * Set the physical protection on the 1451 * specified range of this map as requested. 1452 */ 1453void 1454pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1455{ 1456 pmap_t oldpmap; 1457 struct ia64_lpte *pte; 1458 1459 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1460 pmap_remove(pmap, sva, eva); 1461 return; 1462 } 1463 1464 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 1465 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 1466 return; 1467 1468 if ((sva & PAGE_MASK) || (eva & PAGE_MASK)) 1469 panic("pmap_protect: unaligned addresses"); 1470 1471 vm_page_lock_queues(); 1472 PMAP_LOCK(pmap); 1473 oldpmap = pmap_switch(pmap); 1474 for ( ; sva < eva; sva += PAGE_SIZE) { 1475 /* If page is invalid, skip this page */ 1476 pte = pmap_find_vhpt(sva); 1477 if (pte == NULL) 1478 continue; 1479 1480 /* If there's no change, skip it too */ 1481 if (pmap_prot(pte) == prot) 1482 continue; 1483 1484 if ((prot & VM_PROT_WRITE) == 0 && 1485 pmap_managed(pte) && pmap_dirty(pte)) { 1486 vm_paddr_t pa = pmap_ppn(pte); 1487 vm_page_t m = PHYS_TO_VM_PAGE(pa); 1488 1489 vm_page_dirty(m); 1490 pmap_clear_dirty(pte); 1491 } 1492 1493 if (prot & VM_PROT_EXECUTE) 1494 ia64_sync_icache(sva, PAGE_SIZE); 1495 1496 pmap_pte_prot(pmap, pte, prot); 1497 pmap_invalidate_page(sva); 1498 } 1499 vm_page_unlock_queues(); 1500 pmap_switch(oldpmap); 1501 PMAP_UNLOCK(pmap); 1502} 1503 1504/* 1505 * Insert the given physical page (p) at 1506 * the specified virtual address (v) in the 1507 * target physical map with the protection requested. 1508 * 1509 * If specified, the page will be wired down, meaning 1510 * that the related pte can not be reclaimed. 1511 * 1512 * NB: This is the only routine which MAY NOT lazy-evaluate 1513 * or lose information. That is, this routine must actually 1514 * insert this page into the given map NOW. 1515 */ 1516void 1517pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1518 vm_prot_t prot, boolean_t wired) 1519{ 1520 pmap_t oldpmap; 1521 vm_offset_t pa; 1522 vm_offset_t opa; 1523 struct ia64_lpte origpte; 1524 struct ia64_lpte *pte; 1525 boolean_t icache_inval, managed; 1526 1527 vm_page_lock_queues(); 1528 PMAP_LOCK(pmap); 1529 oldpmap = pmap_switch(pmap); 1530 1531 va &= ~PAGE_MASK; 1532 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1533 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || 1534 (m->oflags & VPO_BUSY) != 0, 1535 ("pmap_enter: page %p is not busy", m)); 1536 1537 /* 1538 * Find (or create) a pte for the given mapping. 1539 */ 1540 while ((pte = pmap_find_pte(va)) == NULL) { 1541 pmap_switch(oldpmap); 1542 PMAP_UNLOCK(pmap); 1543 vm_page_unlock_queues(); 1544 VM_WAIT; 1545 vm_page_lock_queues(); 1546 PMAP_LOCK(pmap); 1547 oldpmap = pmap_switch(pmap); 1548 } 1549 origpte = *pte; 1550 if (!pmap_present(pte)) { 1551 opa = ~0UL; 1552 pmap_enter_vhpt(pte, va); 1553 } else 1554 opa = pmap_ppn(pte); 1555 managed = FALSE; 1556 pa = VM_PAGE_TO_PHYS(m); 1557 1558 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE; 1559 1560 /* 1561 * Mapping has not changed, must be protection or wiring change. 1562 */ 1563 if (opa == pa) { 1564 /* 1565 * Wiring change, just update stats. We don't worry about 1566 * wiring PT pages as they remain resident as long as there 1567 * are valid mappings in them. Hence, if a user page is wired, 1568 * the PT page will be also. 1569 */ 1570 if (wired && !pmap_wired(&origpte)) 1571 pmap->pm_stats.wired_count++; 1572 else if (!wired && pmap_wired(&origpte)) 1573 pmap->pm_stats.wired_count--; 1574 1575 managed = (pmap_managed(&origpte)) ? TRUE : FALSE; 1576 1577 /* 1578 * We might be turning off write access to the page, 1579 * so we go ahead and sense modify status. Otherwise, 1580 * we can avoid I-cache invalidation if the page 1581 * already allowed execution. 1582 */ 1583 if (managed && pmap_dirty(&origpte)) 1584 vm_page_dirty(m); 1585 else if (pmap_exec(&origpte)) 1586 icache_inval = FALSE; 1587 1588 pmap_invalidate_page(va); 1589 goto validate; 1590 } 1591 1592 /* 1593 * Mapping has changed, invalidate old range and fall 1594 * through to handle validating new mapping. 1595 */ 1596 if (opa != ~0UL) { 1597 pmap_remove_pte(pmap, pte, va, 0, 0); 1598 pmap_enter_vhpt(pte, va); 1599 } 1600 1601 /* 1602 * Enter on the PV list if part of our managed memory. 1603 */ 1604 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1605 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1606 ("pmap_enter: managed mapping within the clean submap")); 1607 pmap_insert_entry(pmap, va, m); 1608 managed = TRUE; 1609 } 1610 1611 /* 1612 * Increment counters 1613 */ 1614 pmap->pm_stats.resident_count++; 1615 if (wired) 1616 pmap->pm_stats.wired_count++; 1617 1618validate: 1619 1620 /* 1621 * Now validate mapping with desired protection/wiring. This 1622 * adds the pte to the VHPT if necessary. 1623 */ 1624 pmap_pte_prot(pmap, pte, prot); 1625 pmap_set_pte(pte, va, pa, wired, managed); 1626 1627 /* Invalidate the I-cache when needed. */ 1628 if (icache_inval) 1629 ia64_sync_icache(va, PAGE_SIZE); 1630 1631 if ((prot & VM_PROT_WRITE) != 0 && managed) 1632 vm_page_flag_set(m, PG_WRITEABLE); 1633 vm_page_unlock_queues(); 1634 pmap_switch(oldpmap); 1635 PMAP_UNLOCK(pmap); 1636} 1637 1638/* 1639 * Maps a sequence of resident pages belonging to the same object. 1640 * The sequence begins with the given page m_start. This page is 1641 * mapped at the given virtual address start. Each subsequent page is 1642 * mapped at a virtual address that is offset from start by the same 1643 * amount as the page is offset from m_start within the object. The 1644 * last page in the sequence is the page with the largest offset from 1645 * m_start that can be mapped at a virtual address less than the given 1646 * virtual address end. Not every virtual page between start and end 1647 * is mapped; only those for which a resident page exists with the 1648 * corresponding offset from m_start are mapped. 1649 */ 1650void 1651pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 1652 vm_page_t m_start, vm_prot_t prot) 1653{ 1654 pmap_t oldpmap; 1655 vm_page_t m; 1656 vm_pindex_t diff, psize; 1657 1658 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 1659 psize = atop(end - start); 1660 m = m_start; 1661 vm_page_lock_queues(); 1662 PMAP_LOCK(pmap); 1663 oldpmap = pmap_switch(pmap); 1664 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1665 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); 1666 m = TAILQ_NEXT(m, listq); 1667 } 1668 vm_page_unlock_queues(); 1669 pmap_switch(oldpmap); 1670 PMAP_UNLOCK(pmap); 1671} 1672 1673/* 1674 * this code makes some *MAJOR* assumptions: 1675 * 1. Current pmap & pmap exists. 1676 * 2. Not wired. 1677 * 3. Read access. 1678 * 4. No page table pages. 1679 * but is *MUCH* faster than pmap_enter... 1680 */ 1681 1682void 1683pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1684{ 1685 pmap_t oldpmap; 1686 1687 vm_page_lock_queues(); 1688 PMAP_LOCK(pmap); 1689 oldpmap = pmap_switch(pmap); 1690 pmap_enter_quick_locked(pmap, va, m, prot); 1691 vm_page_unlock_queues(); 1692 pmap_switch(oldpmap); 1693 PMAP_UNLOCK(pmap); 1694} 1695 1696static void 1697pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1698 vm_prot_t prot) 1699{ 1700 struct ia64_lpte *pte; 1701 boolean_t managed; 1702 1703 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1704 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 1705 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1706 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1707 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1708 1709 if ((pte = pmap_find_pte(va)) == NULL) 1710 return; 1711 1712 if (!pmap_present(pte)) { 1713 /* Enter on the PV list if the page is managed. */ 1714 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1715 if (!pmap_try_insert_pv_entry(pmap, va, m)) { 1716 pmap_free_pte(pte, va); 1717 return; 1718 } 1719 managed = TRUE; 1720 } else 1721 managed = FALSE; 1722 1723 /* Increment counters. */ 1724 pmap->pm_stats.resident_count++; 1725 1726 /* Initialise with R/O protection and enter into VHPT. */ 1727 pmap_enter_vhpt(pte, va); 1728 pmap_pte_prot(pmap, pte, 1729 prot & (VM_PROT_READ | VM_PROT_EXECUTE)); 1730 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed); 1731 1732 if (prot & VM_PROT_EXECUTE) 1733 ia64_sync_icache(va, PAGE_SIZE); 1734 } 1735} 1736 1737/* 1738 * pmap_object_init_pt preloads the ptes for a given object 1739 * into the specified pmap. This eliminates the blast of soft 1740 * faults on process startup and immediately after an mmap. 1741 */ 1742void 1743pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 1744 vm_object_t object, vm_pindex_t pindex, 1745 vm_size_t size) 1746{ 1747 1748 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1749 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 1750 ("pmap_object_init_pt: non-device object")); 1751} 1752 1753/* 1754 * Routine: pmap_change_wiring 1755 * Function: Change the wiring attribute for a map/virtual-address 1756 * pair. 1757 * In/out conditions: 1758 * The mapping must already exist in the pmap. 1759 */ 1760void 1761pmap_change_wiring(pmap, va, wired) 1762 register pmap_t pmap; 1763 vm_offset_t va; 1764 boolean_t wired; 1765{ 1766 pmap_t oldpmap; 1767 struct ia64_lpte *pte; 1768 1769 PMAP_LOCK(pmap); 1770 oldpmap = pmap_switch(pmap); 1771 1772 pte = pmap_find_vhpt(va); 1773 KASSERT(pte != NULL, ("pte")); 1774 if (wired && !pmap_wired(pte)) { 1775 pmap->pm_stats.wired_count++; 1776 pmap_set_wired(pte); 1777 } else if (!wired && pmap_wired(pte)) { 1778 pmap->pm_stats.wired_count--; 1779 pmap_clear_wired(pte); 1780 } 1781 1782 pmap_switch(oldpmap); 1783 PMAP_UNLOCK(pmap); 1784} 1785 1786 1787 1788/* 1789 * Copy the range specified by src_addr/len 1790 * from the source map to the range dst_addr/len 1791 * in the destination map. 1792 * 1793 * This routine is only advisory and need not do anything. 1794 */ 1795 1796void 1797pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 1798 vm_offset_t src_addr) 1799{ 1800} 1801 1802 1803/* 1804 * pmap_zero_page zeros the specified hardware page by 1805 * mapping it into virtual memory and using bzero to clear 1806 * its contents. 1807 */ 1808 1809void 1810pmap_zero_page(vm_page_t m) 1811{ 1812 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 1813 bzero((caddr_t) va, PAGE_SIZE); 1814} 1815 1816 1817/* 1818 * pmap_zero_page_area zeros the specified hardware page by 1819 * mapping it into virtual memory and using bzero to clear 1820 * its contents. 1821 * 1822 * off and size must reside within a single page. 1823 */ 1824 1825void 1826pmap_zero_page_area(vm_page_t m, int off, int size) 1827{ 1828 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 1829 bzero((char *)(caddr_t)va + off, size); 1830} 1831 1832 1833/* 1834 * pmap_zero_page_idle zeros the specified hardware page by 1835 * mapping it into virtual memory and using bzero to clear 1836 * its contents. This is for the vm_idlezero process. 1837 */ 1838 1839void 1840pmap_zero_page_idle(vm_page_t m) 1841{ 1842 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 1843 bzero((caddr_t) va, PAGE_SIZE); 1844} 1845 1846 1847/* 1848 * pmap_copy_page copies the specified (machine independent) 1849 * page by mapping the page into virtual memory and using 1850 * bcopy to copy the page, one machine dependent page at a 1851 * time. 1852 */ 1853void 1854pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 1855{ 1856 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc)); 1857 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst)); 1858 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE); 1859} 1860 1861/* 1862 * Returns true if the pmap's pv is one of the first 1863 * 16 pvs linked to from this page. This count may 1864 * be changed upwards or downwards in the future; it 1865 * is only necessary that true be returned for a small 1866 * subset of pmaps for proper page aging. 1867 */ 1868boolean_t 1869pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 1870{ 1871 pv_entry_t pv; 1872 int loops = 0; 1873 boolean_t rv; 1874 1875 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1876 ("pmap_page_exists_quick: page %p is not managed", m)); 1877 rv = FALSE; 1878 vm_page_lock_queues(); 1879 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1880 if (pv->pv_pmap == pmap) { 1881 rv = TRUE; 1882 break; 1883 } 1884 loops++; 1885 if (loops >= 16) 1886 break; 1887 } 1888 vm_page_unlock_queues(); 1889 return (rv); 1890} 1891 1892/* 1893 * pmap_page_wired_mappings: 1894 * 1895 * Return the number of managed mappings to the given physical page 1896 * that are wired. 1897 */ 1898int 1899pmap_page_wired_mappings(vm_page_t m) 1900{ 1901 struct ia64_lpte *pte; 1902 pmap_t oldpmap, pmap; 1903 pv_entry_t pv; 1904 int count; 1905 1906 count = 0; 1907 if ((m->flags & PG_FICTITIOUS) != 0) 1908 return (count); 1909 vm_page_lock_queues(); 1910 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1911 pmap = pv->pv_pmap; 1912 PMAP_LOCK(pmap); 1913 oldpmap = pmap_switch(pmap); 1914 pte = pmap_find_vhpt(pv->pv_va); 1915 KASSERT(pte != NULL, ("pte")); 1916 if (pmap_wired(pte)) 1917 count++; 1918 pmap_switch(oldpmap); 1919 PMAP_UNLOCK(pmap); 1920 } 1921 vm_page_unlock_queues(); 1922 return (count); 1923} 1924 1925/* 1926 * Remove all pages from specified address space 1927 * this aids process exit speeds. Also, this code 1928 * is special cased for current process only, but 1929 * can have the more generic (and slightly slower) 1930 * mode enabled. This is much faster than pmap_remove 1931 * in the case of running down an entire address space. 1932 */ 1933void 1934pmap_remove_pages(pmap_t pmap) 1935{ 1936 pmap_t oldpmap; 1937 pv_entry_t pv, npv; 1938 1939 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 1940 printf("warning: %s called with non-current pmap\n", 1941 __func__); 1942 return; 1943 } 1944 1945 vm_page_lock_queues(); 1946 PMAP_LOCK(pmap); 1947 oldpmap = pmap_switch(pmap); 1948 1949 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 1950 struct ia64_lpte *pte; 1951 1952 npv = TAILQ_NEXT(pv, pv_plist); 1953 1954 pte = pmap_find_vhpt(pv->pv_va); 1955 KASSERT(pte != NULL, ("pte")); 1956 if (!pmap_wired(pte)) 1957 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1); 1958 } 1959 1960 pmap_switch(oldpmap); 1961 PMAP_UNLOCK(pmap); 1962 vm_page_unlock_queues(); 1963} 1964 1965/* 1966 * pmap_ts_referenced: 1967 * 1968 * Return a count of reference bits for a page, clearing those bits. 1969 * It is not necessary for every reference bit to be cleared, but it 1970 * is necessary that 0 only be returned when there are truly no 1971 * reference bits set. 1972 * 1973 * XXX: The exact number of bits to check and clear is a matter that 1974 * should be tested and standardized at some point in the future for 1975 * optimal aging of shared pages. 1976 */ 1977int 1978pmap_ts_referenced(vm_page_t m) 1979{ 1980 struct ia64_lpte *pte; 1981 pmap_t oldpmap; 1982 pv_entry_t pv; 1983 int count = 0; 1984 1985 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 1986 ("pmap_ts_referenced: page %p is not managed", m)); 1987 vm_page_lock_queues(); 1988 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1989 PMAP_LOCK(pv->pv_pmap); 1990 oldpmap = pmap_switch(pv->pv_pmap); 1991 pte = pmap_find_vhpt(pv->pv_va); 1992 KASSERT(pte != NULL, ("pte")); 1993 if (pmap_accessed(pte)) { 1994 count++; 1995 pmap_clear_accessed(pte); 1996 pmap_invalidate_page(pv->pv_va); 1997 } 1998 pmap_switch(oldpmap); 1999 PMAP_UNLOCK(pv->pv_pmap); 2000 } 2001 vm_page_unlock_queues(); 2002 return (count); 2003} 2004 2005/* 2006 * pmap_is_modified: 2007 * 2008 * Return whether or not the specified physical page was modified 2009 * in any physical maps. 2010 */ 2011boolean_t 2012pmap_is_modified(vm_page_t m) 2013{ 2014 struct ia64_lpte *pte; 2015 pmap_t oldpmap; 2016 pv_entry_t pv; 2017 boolean_t rv; 2018 2019 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2020 ("pmap_is_modified: page %p is not managed", m)); 2021 rv = FALSE; 2022 2023 /* 2024 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 2025 * concurrently set while the object is locked. Thus, if PG_WRITEABLE 2026 * is clear, no PTEs can be dirty. 2027 */ 2028 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2029 if ((m->oflags & VPO_BUSY) == 0 && 2030 (m->flags & PG_WRITEABLE) == 0) 2031 return (rv); 2032 vm_page_lock_queues(); 2033 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2034 PMAP_LOCK(pv->pv_pmap); 2035 oldpmap = pmap_switch(pv->pv_pmap); 2036 pte = pmap_find_vhpt(pv->pv_va); 2037 pmap_switch(oldpmap); 2038 KASSERT(pte != NULL, ("pte")); 2039 rv = pmap_dirty(pte) ? TRUE : FALSE; 2040 PMAP_UNLOCK(pv->pv_pmap); 2041 if (rv) 2042 break; 2043 } 2044 vm_page_unlock_queues(); 2045 return (rv); 2046} 2047 2048/* 2049 * pmap_is_prefaultable: 2050 * 2051 * Return whether or not the specified virtual address is elgible 2052 * for prefault. 2053 */ 2054boolean_t 2055pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2056{ 2057 struct ia64_lpte *pte; 2058 2059 pte = pmap_find_vhpt(addr); 2060 if (pte != NULL && pmap_present(pte)) 2061 return (FALSE); 2062 return (TRUE); 2063} 2064 2065/* 2066 * pmap_is_referenced: 2067 * 2068 * Return whether or not the specified physical page was referenced 2069 * in any physical maps. 2070 */ 2071boolean_t 2072pmap_is_referenced(vm_page_t m) 2073{ 2074 struct ia64_lpte *pte; 2075 pmap_t oldpmap; 2076 pv_entry_t pv; 2077 boolean_t rv; 2078 2079 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2080 ("pmap_is_referenced: page %p is not managed", m)); 2081 rv = FALSE; 2082 vm_page_lock_queues(); 2083 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2084 PMAP_LOCK(pv->pv_pmap); 2085 oldpmap = pmap_switch(pv->pv_pmap); 2086 pte = pmap_find_vhpt(pv->pv_va); 2087 pmap_switch(oldpmap); 2088 KASSERT(pte != NULL, ("pte")); 2089 rv = pmap_accessed(pte) ? TRUE : FALSE; 2090 PMAP_UNLOCK(pv->pv_pmap); 2091 if (rv) 2092 break; 2093 } 2094 vm_page_unlock_queues(); 2095 return (rv); 2096} 2097 2098/* 2099 * Clear the modify bits on the specified physical page. 2100 */ 2101void 2102pmap_clear_modify(vm_page_t m) 2103{ 2104 struct ia64_lpte *pte; 2105 pmap_t oldpmap; 2106 pv_entry_t pv; 2107 2108 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2109 ("pmap_clear_modify: page %p is not managed", m)); 2110 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2111 KASSERT((m->oflags & VPO_BUSY) == 0, 2112 ("pmap_clear_modify: page %p is busy", m)); 2113 2114 /* 2115 * If the page is not PG_WRITEABLE, then no PTEs can be modified. 2116 * If the object containing the page is locked and the page is not 2117 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 2118 */ 2119 if ((m->flags & PG_WRITEABLE) == 0) 2120 return; 2121 vm_page_lock_queues(); 2122 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2123 PMAP_LOCK(pv->pv_pmap); 2124 oldpmap = pmap_switch(pv->pv_pmap); 2125 pte = pmap_find_vhpt(pv->pv_va); 2126 KASSERT(pte != NULL, ("pte")); 2127 if (pmap_dirty(pte)) { 2128 pmap_clear_dirty(pte); 2129 pmap_invalidate_page(pv->pv_va); 2130 } 2131 pmap_switch(oldpmap); 2132 PMAP_UNLOCK(pv->pv_pmap); 2133 } 2134 vm_page_unlock_queues(); 2135} 2136 2137/* 2138 * pmap_clear_reference: 2139 * 2140 * Clear the reference bit on the specified physical page. 2141 */ 2142void 2143pmap_clear_reference(vm_page_t m) 2144{ 2145 struct ia64_lpte *pte; 2146 pmap_t oldpmap; 2147 pv_entry_t pv; 2148 2149 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2150 ("pmap_clear_reference: page %p is not managed", m)); 2151 vm_page_lock_queues(); 2152 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2153 PMAP_LOCK(pv->pv_pmap); 2154 oldpmap = pmap_switch(pv->pv_pmap); 2155 pte = pmap_find_vhpt(pv->pv_va); 2156 KASSERT(pte != NULL, ("pte")); 2157 if (pmap_accessed(pte)) { 2158 pmap_clear_accessed(pte); 2159 pmap_invalidate_page(pv->pv_va); 2160 } 2161 pmap_switch(oldpmap); 2162 PMAP_UNLOCK(pv->pv_pmap); 2163 } 2164 vm_page_unlock_queues(); 2165} 2166 2167/* 2168 * Clear the write and modified bits in each of the given page's mappings. 2169 */ 2170void 2171pmap_remove_write(vm_page_t m) 2172{ 2173 struct ia64_lpte *pte; 2174 pmap_t oldpmap, pmap; 2175 pv_entry_t pv; 2176 vm_prot_t prot; 2177 2178 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2179 ("pmap_remove_write: page %p is not managed", m)); 2180 2181 /* 2182 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 2183 * another thread while the object is locked. Thus, if PG_WRITEABLE 2184 * is clear, no page table entries need updating. 2185 */ 2186 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2187 if ((m->oflags & VPO_BUSY) == 0 && 2188 (m->flags & PG_WRITEABLE) == 0) 2189 return; 2190 vm_page_lock_queues(); 2191 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2192 pmap = pv->pv_pmap; 2193 PMAP_LOCK(pmap); 2194 oldpmap = pmap_switch(pmap); 2195 pte = pmap_find_vhpt(pv->pv_va); 2196 KASSERT(pte != NULL, ("pte")); 2197 prot = pmap_prot(pte); 2198 if ((prot & VM_PROT_WRITE) != 0) { 2199 if (pmap_dirty(pte)) { 2200 vm_page_dirty(m); 2201 pmap_clear_dirty(pte); 2202 } 2203 prot &= ~VM_PROT_WRITE; 2204 pmap_pte_prot(pmap, pte, prot); 2205 pmap_invalidate_page(pv->pv_va); 2206 } 2207 pmap_switch(oldpmap); 2208 PMAP_UNLOCK(pmap); 2209 } 2210 vm_page_flag_clear(m, PG_WRITEABLE); 2211 vm_page_unlock_queues(); 2212} 2213 2214/* 2215 * Map a set of physical memory pages into the kernel virtual 2216 * address space. Return a pointer to where it is mapped. This 2217 * routine is intended to be used for mapping device memory, 2218 * NOT real memory. 2219 */ 2220void * 2221pmap_mapdev(vm_paddr_t pa, vm_size_t size) 2222{ 2223 vm_offset_t va; 2224 2225 va = pa | IA64_RR_BASE(6); 2226 return ((void *)va); 2227} 2228 2229/* 2230 * 'Unmap' a range mapped by pmap_mapdev(). 2231 */ 2232void 2233pmap_unmapdev(vm_offset_t va, vm_size_t size) 2234{ 2235} 2236 2237/* 2238 * perform the pmap work for mincore 2239 */ 2240int 2241pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2242{ 2243 pmap_t oldpmap; 2244 struct ia64_lpte *pte, tpte; 2245 vm_paddr_t pa; 2246 int val; 2247 2248 PMAP_LOCK(pmap); 2249retry: 2250 oldpmap = pmap_switch(pmap); 2251 pte = pmap_find_vhpt(addr); 2252 if (pte != NULL) { 2253 tpte = *pte; 2254 pte = &tpte; 2255 } 2256 pmap_switch(oldpmap); 2257 if (pte == NULL || !pmap_present(pte)) { 2258 val = 0; 2259 goto out; 2260 } 2261 val = MINCORE_INCORE; 2262 if (pmap_dirty(pte)) 2263 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2264 if (pmap_accessed(pte)) 2265 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2266 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 2267 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 2268 pmap_managed(pte)) { 2269 pa = pmap_ppn(pte); 2270 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 2271 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 2272 goto retry; 2273 } else 2274out: 2275 PA_UNLOCK_COND(*locked_pa); 2276 PMAP_UNLOCK(pmap); 2277 return (val); 2278} 2279 2280void 2281pmap_activate(struct thread *td) 2282{ 2283 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace)); 2284} 2285 2286pmap_t 2287pmap_switch(pmap_t pm) 2288{ 2289 pmap_t prevpm; 2290 int i; 2291 2292 critical_enter(); 2293 prevpm = PCPU_GET(md.current_pmap); 2294 if (prevpm == pm) 2295 goto out; 2296 if (pm == NULL) { 2297 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2298 ia64_set_rr(IA64_RR_BASE(i), 2299 (i << 8)|(PAGE_SHIFT << 2)|1); 2300 } 2301 } else { 2302 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2303 ia64_set_rr(IA64_RR_BASE(i), 2304 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1); 2305 } 2306 } 2307 PCPU_SET(md.current_pmap, pm); 2308 ia64_srlz_d(); 2309 2310out: 2311 critical_exit(); 2312 return (prevpm); 2313} 2314 2315void 2316pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2317{ 2318 pmap_t oldpm; 2319 struct ia64_lpte *pte; 2320 vm_offset_t lim; 2321 vm_size_t len; 2322 2323 sz += va & 31; 2324 va &= ~31; 2325 sz = (sz + 31) & ~31; 2326 2327 PMAP_LOCK(pm); 2328 oldpm = pmap_switch(pm); 2329 while (sz > 0) { 2330 lim = round_page(va); 2331 len = MIN(lim - va, sz); 2332 pte = pmap_find_vhpt(va); 2333 if (pte != NULL && pmap_present(pte)) 2334 ia64_sync_icache(va, len); 2335 va += len; 2336 sz -= len; 2337 } 2338 pmap_switch(oldpm); 2339 PMAP_UNLOCK(pm); 2340} 2341 2342/* 2343 * Increase the starting virtual address of the given mapping if a 2344 * different alignment might result in more superpage mappings. 2345 */ 2346void 2347pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2348 vm_offset_t *addr, vm_size_t size) 2349{ 2350} 2351 2352#include "opt_ddb.h" 2353 2354#ifdef DDB 2355 2356#include <ddb/ddb.h> 2357 2358static const char* psnames[] = { 2359 "1B", "2B", "4B", "8B", 2360 "16B", "32B", "64B", "128B", 2361 "256B", "512B", "1K", "2K", 2362 "4K", "8K", "16K", "32K", 2363 "64K", "128K", "256K", "512K", 2364 "1M", "2M", "4M", "8M", 2365 "16M", "32M", "64M", "128M", 2366 "256M", "512M", "1G", "2G" 2367}; 2368 2369static void 2370print_trs(int type) 2371{ 2372 struct ia64_pal_result res; 2373 int i, maxtr; 2374 struct { 2375 pt_entry_t pte; 2376 uint64_t itir; 2377 uint64_t ifa; 2378 struct ia64_rr rr; 2379 } buf; 2380 static const char *manames[] = { 2381 "WB", "bad", "bad", "bad", 2382 "UC", "UCE", "WC", "NaT", 2383 }; 2384 2385 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 2386 if (res.pal_status != 0) { 2387 db_printf("Can't get VM summary\n"); 2388 return; 2389 } 2390 2391 if (type == 0) 2392 maxtr = (res.pal_result[0] >> 40) & 0xff; 2393 else 2394 maxtr = (res.pal_result[0] >> 32) & 0xff; 2395 2396 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n"); 2397 for (i = 0; i <= maxtr; i++) { 2398 bzero(&buf, sizeof(buf)); 2399 res = ia64_pal_physical(PAL_VM_TR_READ, i, type, 2400 ia64_tpa((uint64_t)&buf)); 2401 if (!(res.pal_result[0] & 1)) 2402 buf.pte &= ~PTE_AR_MASK; 2403 if (!(res.pal_result[0] & 2)) 2404 buf.pte &= ~PTE_PL_MASK; 2405 if (!(res.pal_result[0] & 4)) 2406 pmap_clear_dirty(&buf); 2407 if (!(res.pal_result[0] & 8)) 2408 buf.pte &= ~PTE_MA_MASK; 2409 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s " 2410 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid, 2411 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12, 2412 psnames[(buf.itir & ITIR_PS_MASK) >> 2], 2413 (buf.pte & PTE_ED) ? 1 : 0, 2414 (int)(buf.pte & PTE_AR_MASK) >> 9, 2415 (int)(buf.pte & PTE_PL_MASK) >> 7, 2416 (pmap_dirty(&buf)) ? 1 : 0, 2417 (pmap_accessed(&buf)) ? 1 : 0, 2418 manames[(buf.pte & PTE_MA_MASK) >> 2], 2419 (pmap_present(&buf)) ? 1 : 0, 2420 (int)((buf.itir & ITIR_KEY_MASK) >> 8)); 2421 } 2422} 2423 2424DB_COMMAND(itr, db_itr) 2425{ 2426 print_trs(0); 2427} 2428 2429DB_COMMAND(dtr, db_dtr) 2430{ 2431 print_trs(1); 2432} 2433 2434DB_COMMAND(rr, db_rr) 2435{ 2436 int i; 2437 uint64_t t; 2438 struct ia64_rr rr; 2439 2440 printf("RR RID PgSz VE\n"); 2441 for (i = 0; i < 8; i++) { 2442 __asm __volatile ("mov %0=rr[%1]" 2443 : "=r"(t) 2444 : "r"(IA64_RR_BASE(i))); 2445 *(uint64_t *) &rr = t; 2446 printf("%d %06x %4s %d\n", 2447 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve); 2448 } 2449} 2450 2451DB_COMMAND(thash, db_thash) 2452{ 2453 if (!have_addr) 2454 return; 2455 2456 db_printf("%p\n", (void *) ia64_thash(addr)); 2457} 2458 2459DB_COMMAND(ttag, db_ttag) 2460{ 2461 if (!have_addr) 2462 return; 2463 2464 db_printf("0x%lx\n", ia64_ttag(addr)); 2465} 2466 2467DB_COMMAND(kpte, db_kpte) 2468{ 2469 struct ia64_lpte *pte; 2470 2471 if (!have_addr) { 2472 db_printf("usage: kpte <kva>\n"); 2473 return; 2474 } 2475 if (addr < VM_MIN_KERNEL_ADDRESS) { 2476 db_printf("kpte: error: invalid <kva>\n"); 2477 return; 2478 } 2479 pte = pmap_find_kpte(addr); 2480 db_printf("kpte at %p:\n", pte); 2481 db_printf(" pte =%016lx\n", pte->pte); 2482 db_printf(" itir =%016lx\n", pte->itir); 2483 db_printf(" tag =%016lx\n", pte->tag); 2484 db_printf(" chain=%016lx\n", pte->chain); 2485} 2486 2487#endif 2488