1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 1998,2000 Doug Rabson 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp 45 * with some ideas from NetBSD's alpha pmap 46 */ 47 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD$"); 50 51#include <sys/param.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/mman.h> 55#include <sys/mutex.h> 56#include <sys/proc.h> 57#include <sys/smp.h> 58#include <sys/sysctl.h> 59#include <sys/systm.h> 60 61#include <vm/vm.h> 62#include <vm/vm_param.h> 63#include <vm/vm_page.h> 64#include <vm/vm_map.h> 65#include <vm/vm_object.h> 66#include <vm/vm_pageout.h> 67#include <vm/uma.h> 68 69#include <machine/bootinfo.h> 70#include <machine/efi.h> 71#include <machine/md_var.h> 72#include <machine/pal.h> 73 74/* 75 * Manages physical address maps. 76 * 77 * In addition to hardware address maps, this 78 * module is called upon to provide software-use-only 79 * maps which may or may not be stored in the same 80 * form as hardware maps. These pseudo-maps are 81 * used to store intermediate results from copy 82 * operations to and from address spaces. 83 * 84 * Since the information managed by this module is 85 * also stored by the logical address mapping module, 86 * this module may throw away valid virtual-to-physical 87 * mappings at almost any time. However, invalidations 88 * of virtual-to-physical mappings must be done as 89 * requested. 90 * 91 * In order to cope with hardware architectures which 92 * make virtual-to-physical map invalidates expensive, 93 * this module may delay invalidate or reduced protection 94 * operations until such time as they are actually 95 * necessary. This module is given full information as 96 * to which processors are currently using which maps, 97 * and to when physical maps must be made correct. 98 */ 99 100/* 101 * Following the Linux model, region IDs are allocated in groups of 102 * eight so that a single region ID can be used for as many RRs as we 103 * want by encoding the RR number into the low bits of the ID. 104 * 105 * We reserve region ID 0 for the kernel and allocate the remaining 106 * IDs for user pmaps. 107 * 108 * Region 0-3: User virtually mapped 109 * Region 4: PBVM and special mappings 110 * Region 5: Kernel virtual memory 111 * Region 6: Direct-mapped uncacheable 112 * Region 7: Direct-mapped cacheable 113 */ 114 115/* XXX move to a header. */ 116extern uint64_t ia64_gateway_page[]; 117 118#ifndef PMAP_SHPGPERPROC 119#define PMAP_SHPGPERPROC 200 120#endif 121 122#if !defined(DIAGNOSTIC) 123#define PMAP_INLINE __inline 124#else 125#define PMAP_INLINE 126#endif 127 128#define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED) 129#define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY) 130#define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX) 131#define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED) 132#define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK) 133#define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT) 134#define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56) 135#define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED) 136 137#define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED 138#define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY 139#define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT 140#define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED 141 142#define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED 143 144/* 145 * The VHPT bucket head structure. 146 */ 147struct ia64_bucket { 148 uint64_t chain; 149 struct mtx mutex; 150 u_int length; 151}; 152 153/* 154 * Statically allocated kernel pmap 155 */ 156struct pmap kernel_pmap_store; 157 158vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 159vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 160 161/* 162 * Kernel virtual memory management. 163 */ 164static int nkpt; 165extern struct ia64_lpte ***ia64_kptdir; 166 167#define KPTE_DIR0_INDEX(va) \ 168 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1)) 169#define KPTE_DIR1_INDEX(va) \ 170 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1)) 171#define KPTE_PTE_INDEX(va) \ 172 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1)) 173#define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte)) 174 175vm_offset_t kernel_vm_end; 176 177/* Values for ptc.e. XXX values for SKI. */ 178static uint64_t pmap_ptc_e_base = 0x100000000; 179static uint64_t pmap_ptc_e_count1 = 3; 180static uint64_t pmap_ptc_e_count2 = 2; 181static uint64_t pmap_ptc_e_stride1 = 0x2000; 182static uint64_t pmap_ptc_e_stride2 = 0x100000000; 183 184struct mtx pmap_ptc_mutex; 185 186/* 187 * Data for the RID allocator 188 */ 189static int pmap_ridcount; 190static int pmap_rididx; 191static int pmap_ridmapsz; 192static int pmap_ridmax; 193static uint64_t *pmap_ridmap; 194struct mtx pmap_ridmutex; 195 196/* 197 * Data for the pv entry allocation mechanism 198 */ 199static uma_zone_t pvzone; 200static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 201 202/* 203 * Data for allocating PTEs for user processes. 204 */ 205static uma_zone_t ptezone; 206 207/* 208 * Virtual Hash Page Table (VHPT) data. 209 */ 210/* SYSCTL_DECL(_machdep); */ 211static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, ""); 212 213struct ia64_bucket *pmap_vhpt_bucket; 214 215int pmap_vhpt_nbuckets; 216SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD, 217 &pmap_vhpt_nbuckets, 0, ""); 218 219int pmap_vhpt_log2size = 0; 220TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size); 221SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD, 222 &pmap_vhpt_log2size, 0, ""); 223 224static int pmap_vhpt_inserts; 225SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD, 226 &pmap_vhpt_inserts, 0, ""); 227 228static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS); 229SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD, 230 NULL, 0, pmap_vhpt_population, "I", ""); 231 232static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va); 233 234static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 235static pv_entry_t get_pv_entry(pmap_t locked_pmap); 236 237static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 238 vm_page_t m, vm_prot_t prot); 239static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va); 240static void pmap_invalidate_all(void); 241static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, 242 vm_offset_t va, pv_entry_t pv, int freepte); 243static int pmap_remove_vhpt(vm_offset_t va); 244static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 245 vm_page_t m); 246 247static void 248pmap_initialize_vhpt(vm_offset_t vhpt) 249{ 250 struct ia64_lpte *pte; 251 u_int i; 252 253 pte = (struct ia64_lpte *)vhpt; 254 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 255 pte[i].pte = 0; 256 pte[i].itir = 0; 257 pte[i].tag = 1UL << 63; /* Invalid tag */ 258 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i); 259 } 260} 261 262#ifdef SMP 263MALLOC_DECLARE(M_SMP); 264 265vm_offset_t 266pmap_alloc_vhpt(void) 267{ 268 vm_offset_t vhpt; 269 vm_size_t size; 270 271 size = 1UL << pmap_vhpt_log2size; 272 vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL); 273 if (vhpt != 0) { 274 vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt)); 275 pmap_initialize_vhpt(vhpt); 276 } 277 return (vhpt); 278} 279#endif 280 281/* 282 * Bootstrap the system enough to run with virtual memory. 283 */ 284void 285pmap_bootstrap() 286{ 287 struct ia64_pal_result res; 288 vm_offset_t base; 289 size_t size; 290 int i, ridbits; 291 292 /* 293 * Query the PAL Code to find the loop parameters for the 294 * ptc.e instruction. 295 */ 296 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0); 297 if (res.pal_status != 0) 298 panic("Can't configure ptc.e parameters"); 299 pmap_ptc_e_base = res.pal_result[0]; 300 pmap_ptc_e_count1 = res.pal_result[1] >> 32; 301 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1); 302 pmap_ptc_e_stride1 = res.pal_result[2] >> 32; 303 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1); 304 if (bootverbose) 305 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, " 306 "stride1=0x%lx, stride2=0x%lx\n", 307 pmap_ptc_e_base, 308 pmap_ptc_e_count1, 309 pmap_ptc_e_count2, 310 pmap_ptc_e_stride1, 311 pmap_ptc_e_stride2); 312 313 mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN); 314 315 /* 316 * Setup RIDs. RIDs 0..7 are reserved for the kernel. 317 * 318 * We currently need at least 19 bits in the RID because PID_MAX 319 * can only be encoded in 17 bits and we need RIDs for 4 regions 320 * per process. With PID_MAX equalling 99999 this means that we 321 * need to be able to encode 399996 (=4*PID_MAX). 322 * The Itanium processor only has 18 bits and the architected 323 * minimum is exactly that. So, we cannot use a PID based scheme 324 * in those cases. Enter pmap_ridmap... 325 * We should avoid the map when running on a processor that has 326 * implemented enough bits. This means that we should pass the 327 * process/thread ID to pmap. This we currently don't do, so we 328 * use the map anyway. However, we don't want to allocate a map 329 * that is large enough to cover the range dictated by the number 330 * of bits in the RID, because that may result in a RID map of 331 * 2MB in size for a 24-bit RID. A 64KB map is enough. 332 * The bottomline: we create a 32KB map when the processor only 333 * implements 18 bits (or when we can't figure it out). Otherwise 334 * we create a 64KB map. 335 */ 336 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 337 if (res.pal_status != 0) { 338 if (bootverbose) 339 printf("Can't read VM Summary - assuming 18 Region ID bits\n"); 340 ridbits = 18; /* guaranteed minimum */ 341 } else { 342 ridbits = (res.pal_result[1] >> 8) & 0xff; 343 if (bootverbose) 344 printf("Processor supports %d Region ID bits\n", 345 ridbits); 346 } 347 if (ridbits > 19) 348 ridbits = 19; 349 350 pmap_ridmax = (1 << ridbits); 351 pmap_ridmapsz = pmap_ridmax / 64; 352 pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE); 353 pmap_ridmap[0] |= 0xff; 354 pmap_rididx = 0; 355 pmap_ridcount = 8; 356 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF); 357 358 /* 359 * Allocate some memory for initial kernel 'page tables'. 360 */ 361 ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE); 362 nkpt = 0; 363 kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 364 365 /* 366 * Determine a valid (mappable) VHPT size. 367 */ 368 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size); 369 if (pmap_vhpt_log2size == 0) 370 pmap_vhpt_log2size = 20; 371 else if (pmap_vhpt_log2size < 16) 372 pmap_vhpt_log2size = 16; 373 else if (pmap_vhpt_log2size > 28) 374 pmap_vhpt_log2size = 28; 375 if (pmap_vhpt_log2size & 1) 376 pmap_vhpt_log2size--; 377 378 size = 1UL << pmap_vhpt_log2size; 379 base = (uintptr_t)ia64_physmem_alloc(size, size); 380 if (base == 0) 381 panic("Unable to allocate VHPT"); 382 383 PCPU_SET(md.vhpt, base); 384 if (bootverbose) 385 printf("VHPT: address=%#lx, size=%#lx\n", base, size); 386 387 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte); 388 pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets * 389 sizeof(struct ia64_bucket), PAGE_SIZE); 390 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 391 /* Stolen memory is zeroed. */ 392 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL, 393 MTX_NOWITNESS | MTX_SPIN); 394 } 395 396 pmap_initialize_vhpt(base); 397 map_vhpt(base); 398 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1); 399 ia64_srlz_i(); 400 401 virtual_avail = VM_MIN_KERNEL_ADDRESS; 402 virtual_end = VM_MAX_KERNEL_ADDRESS; 403 404 /* 405 * Initialize the kernel pmap (which is statically allocated). 406 */ 407 PMAP_LOCK_INIT(kernel_pmap); 408 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 409 kernel_pmap->pm_rid[i] = 0; 410 TAILQ_INIT(&kernel_pmap->pm_pvlist); 411 PCPU_SET(md.current_pmap, kernel_pmap); 412 413 /* Region 5 is mapped via the VHPT. */ 414 ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1); 415 416 /* 417 * Clear out any random TLB entries left over from booting. 418 */ 419 pmap_invalidate_all(); 420 421 map_gateway_page(); 422} 423 424static int 425pmap_vhpt_population(SYSCTL_HANDLER_ARGS) 426{ 427 int count, error, i; 428 429 count = 0; 430 for (i = 0; i < pmap_vhpt_nbuckets; i++) 431 count += pmap_vhpt_bucket[i].length; 432 433 error = SYSCTL_OUT(req, &count, sizeof(count)); 434 return (error); 435} 436 437vm_offset_t 438pmap_page_to_va(vm_page_t m) 439{ 440 vm_paddr_t pa; 441 vm_offset_t va; 442 443 pa = VM_PAGE_TO_PHYS(m); 444 va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) : 445 IA64_PHYS_TO_RR7(pa); 446 return (va); 447} 448 449/* 450 * Initialize a vm_page's machine-dependent fields. 451 */ 452void 453pmap_page_init(vm_page_t m) 454{ 455 456 TAILQ_INIT(&m->md.pv_list); 457 m->md.pv_list_count = 0; 458 m->md.memattr = VM_MEMATTR_DEFAULT; 459} 460 461/* 462 * Initialize the pmap module. 463 * Called by vm_init, to initialize any structures that the pmap 464 * system needs to map virtual memory. 465 */ 466void 467pmap_init(void) 468{ 469 int shpgperproc = PMAP_SHPGPERPROC; 470 471 /* 472 * Initialize the address space (zone) for the pv entries. Set a 473 * high water mark so that the system can recover from excessive 474 * numbers of pv entries. 475 */ 476 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 477 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 478 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 479 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 480 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 481 pv_entry_high_water = 9 * (pv_entry_max / 10); 482 483 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte), 484 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE); 485} 486 487 488/*************************************************** 489 * Manipulate TLBs for a pmap 490 ***************************************************/ 491 492static void 493pmap_invalidate_page(vm_offset_t va) 494{ 495 struct ia64_lpte *pte; 496 struct pcpu *pc; 497 uint64_t tag; 498 u_int vhpt_ofs; 499 500 critical_enter(); 501 502 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt); 503 tag = ia64_ttag(va); 504 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 505 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs); 506 atomic_cmpset_64(&pte->tag, tag, 1UL << 63); 507 } 508 509 mtx_lock_spin(&pmap_ptc_mutex); 510 511 ia64_ptc_ga(va, PAGE_SHIFT << 2); 512 ia64_mf(); 513 ia64_srlz_i(); 514 515 mtx_unlock_spin(&pmap_ptc_mutex); 516 517 ia64_invala(); 518 519 critical_exit(); 520} 521 522static void 523pmap_invalidate_all_1(void *arg) 524{ 525 uint64_t addr; 526 int i, j; 527 528 critical_enter(); 529 addr = pmap_ptc_e_base; 530 for (i = 0; i < pmap_ptc_e_count1; i++) { 531 for (j = 0; j < pmap_ptc_e_count2; j++) { 532 ia64_ptc_e(addr); 533 addr += pmap_ptc_e_stride2; 534 } 535 addr += pmap_ptc_e_stride1; 536 } 537 critical_exit(); 538} 539 540static void 541pmap_invalidate_all(void) 542{ 543 544#ifdef SMP 545 if (mp_ncpus > 1) { 546 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL); 547 return; 548 } 549#endif 550 pmap_invalidate_all_1(NULL); 551} 552 553static uint32_t 554pmap_allocate_rid(void) 555{ 556 uint64_t bit, bits; 557 int rid; 558 559 mtx_lock(&pmap_ridmutex); 560 if (pmap_ridcount == pmap_ridmax) 561 panic("pmap_allocate_rid: All Region IDs used"); 562 563 /* Find an index with a free bit. */ 564 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) { 565 pmap_rididx++; 566 if (pmap_rididx == pmap_ridmapsz) 567 pmap_rididx = 0; 568 } 569 rid = pmap_rididx * 64; 570 571 /* Find a free bit. */ 572 bit = 1UL; 573 while (bits & bit) { 574 rid++; 575 bit <<= 1; 576 } 577 578 pmap_ridmap[pmap_rididx] |= bit; 579 pmap_ridcount++; 580 mtx_unlock(&pmap_ridmutex); 581 582 return rid; 583} 584 585static void 586pmap_free_rid(uint32_t rid) 587{ 588 uint64_t bit; 589 int idx; 590 591 idx = rid / 64; 592 bit = ~(1UL << (rid & 63)); 593 594 mtx_lock(&pmap_ridmutex); 595 pmap_ridmap[idx] &= bit; 596 pmap_ridcount--; 597 mtx_unlock(&pmap_ridmutex); 598} 599 600/*************************************************** 601 * Page table page management routines..... 602 ***************************************************/ 603 604void 605pmap_pinit0(struct pmap *pmap) 606{ 607 /* kernel_pmap is the same as any other pmap. */ 608 pmap_pinit(pmap); 609} 610 611/* 612 * Initialize a preallocated and zeroed pmap structure, 613 * such as one in a vmspace structure. 614 */ 615int 616pmap_pinit(struct pmap *pmap) 617{ 618 int i; 619 620 PMAP_LOCK_INIT(pmap); 621 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 622 pmap->pm_rid[i] = pmap_allocate_rid(); 623 TAILQ_INIT(&pmap->pm_pvlist); 624 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 625 return (1); 626} 627 628/*************************************************** 629 * Pmap allocation/deallocation routines. 630 ***************************************************/ 631 632/* 633 * Release any resources held by the given physical map. 634 * Called when a pmap initialized by pmap_pinit is being released. 635 * Should only be called if the map contains no valid mappings. 636 */ 637void 638pmap_release(pmap_t pmap) 639{ 640 int i; 641 642 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 643 if (pmap->pm_rid[i]) 644 pmap_free_rid(pmap->pm_rid[i]); 645 PMAP_LOCK_DESTROY(pmap); 646} 647 648/* 649 * grow the number of kernel page table entries, if needed 650 */ 651void 652pmap_growkernel(vm_offset_t addr) 653{ 654 struct ia64_lpte **dir1; 655 struct ia64_lpte *leaf; 656 vm_page_t nkpg; 657 658 while (kernel_vm_end <= addr) { 659 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64) 660 panic("%s: out of kernel address space", __func__); 661 662 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)]; 663 if (dir1 == NULL) { 664 nkpg = vm_page_alloc(NULL, nkpt++, 665 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 666 if (!nkpg) 667 panic("%s: cannot add dir. page", __func__); 668 669 dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg); 670 bzero(dir1, PAGE_SIZE); 671 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1; 672 } 673 674 nkpg = vm_page_alloc(NULL, nkpt++, 675 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 676 if (!nkpg) 677 panic("%s: cannot add PTE page", __func__); 678 679 leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg); 680 bzero(leaf, PAGE_SIZE); 681 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf; 682 683 kernel_vm_end += PAGE_SIZE * NKPTEPG; 684 } 685} 686 687/*************************************************** 688 * page management routines. 689 ***************************************************/ 690 691/* 692 * free the pv_entry back to the free list 693 */ 694static PMAP_INLINE void 695free_pv_entry(pv_entry_t pv) 696{ 697 pv_entry_count--; 698 uma_zfree(pvzone, pv); 699} 700 701/* 702 * get a new pv_entry, allocating a block from the system 703 * when needed. 704 */ 705static pv_entry_t 706get_pv_entry(pmap_t locked_pmap) 707{ 708 static const struct timeval printinterval = { 60, 0 }; 709 static struct timeval lastprint; 710 struct vpgqueues *vpq; 711 struct ia64_lpte *pte; 712 pmap_t oldpmap, pmap; 713 pv_entry_t allocated_pv, next_pv, pv; 714 vm_offset_t va; 715 vm_page_t m; 716 717 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 718 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 719 allocated_pv = uma_zalloc(pvzone, M_NOWAIT); 720 if (allocated_pv != NULL) { 721 pv_entry_count++; 722 if (pv_entry_count > pv_entry_high_water) 723 pagedaemon_wakeup(); 724 else 725 return (allocated_pv); 726 } 727 728 /* 729 * Reclaim pv entries: At first, destroy mappings to inactive 730 * pages. After that, if a pv entry is still needed, destroy 731 * mappings to active pages. 732 */ 733 if (ratecheck(&lastprint, &printinterval)) 734 printf("Approaching the limit on PV entries, " 735 "increase the vm.pmap.shpgperproc tunable.\n"); 736 vpq = &vm_page_queues[PQ_INACTIVE]; 737retry: 738 TAILQ_FOREACH(m, &vpq->pl, pageq) { 739 if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) 740 continue; 741 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 742 va = pv->pv_va; 743 pmap = pv->pv_pmap; 744 /* Avoid deadlock and lock recursion. */ 745 if (pmap > locked_pmap) 746 PMAP_LOCK(pmap); 747 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 748 continue; 749 pmap->pm_stats.resident_count--; 750 oldpmap = pmap_switch(pmap); 751 pte = pmap_find_vhpt(va); 752 KASSERT(pte != NULL, ("pte")); 753 pmap_remove_vhpt(va); 754 pmap_invalidate_page(va); 755 pmap_switch(oldpmap); 756 if (pmap_accessed(pte)) 757 vm_page_aflag_set(m, PGA_REFERENCED); 758 if (pmap_dirty(pte)) 759 vm_page_dirty(m); 760 pmap_free_pte(pte, va); 761 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 762 m->md.pv_list_count--; 763 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 764 if (pmap != locked_pmap) 765 PMAP_UNLOCK(pmap); 766 if (allocated_pv == NULL) 767 allocated_pv = pv; 768 else 769 free_pv_entry(pv); 770 } 771 if (TAILQ_EMPTY(&m->md.pv_list)) 772 vm_page_aflag_clear(m, PGA_WRITEABLE); 773 } 774 if (allocated_pv == NULL) { 775 if (vpq == &vm_page_queues[PQ_INACTIVE]) { 776 vpq = &vm_page_queues[PQ_ACTIVE]; 777 goto retry; 778 } 779 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); 780 } 781 return (allocated_pv); 782} 783 784/* 785 * Conditionally create a pv entry. 786 */ 787static boolean_t 788pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 789{ 790 pv_entry_t pv; 791 792 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 793 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 794 if (pv_entry_count < pv_entry_high_water && 795 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { 796 pv_entry_count++; 797 pv->pv_va = va; 798 pv->pv_pmap = pmap; 799 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 800 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 801 m->md.pv_list_count++; 802 return (TRUE); 803 } else 804 return (FALSE); 805} 806 807/* 808 * Add an ia64_lpte to the VHPT. 809 */ 810static void 811pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va) 812{ 813 struct ia64_bucket *bckt; 814 struct ia64_lpte *vhpte; 815 uint64_t pte_pa; 816 817 /* Can fault, so get it out of the way. */ 818 pte_pa = ia64_tpa((vm_offset_t)pte); 819 820 vhpte = (struct ia64_lpte *)ia64_thash(va); 821 bckt = (struct ia64_bucket *)vhpte->chain; 822 823 mtx_lock_spin(&bckt->mutex); 824 pte->chain = bckt->chain; 825 ia64_mf(); 826 bckt->chain = pte_pa; 827 828 pmap_vhpt_inserts++; 829 bckt->length++; 830 mtx_unlock_spin(&bckt->mutex); 831} 832 833/* 834 * Remove the ia64_lpte matching va from the VHPT. Return zero if it 835 * worked or an appropriate error code otherwise. 836 */ 837static int 838pmap_remove_vhpt(vm_offset_t va) 839{ 840 struct ia64_bucket *bckt; 841 struct ia64_lpte *pte; 842 struct ia64_lpte *lpte; 843 struct ia64_lpte *vhpte; 844 uint64_t chain, tag; 845 846 tag = ia64_ttag(va); 847 vhpte = (struct ia64_lpte *)ia64_thash(va); 848 bckt = (struct ia64_bucket *)vhpte->chain; 849 850 lpte = NULL; 851 mtx_lock_spin(&bckt->mutex); 852 chain = bckt->chain; 853 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 854 while (chain != 0 && pte->tag != tag) { 855 lpte = pte; 856 chain = pte->chain; 857 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 858 } 859 if (chain == 0) { 860 mtx_unlock_spin(&bckt->mutex); 861 return (ENOENT); 862 } 863 864 /* Snip this pv_entry out of the collision chain. */ 865 if (lpte == NULL) 866 bckt->chain = pte->chain; 867 else 868 lpte->chain = pte->chain; 869 ia64_mf(); 870 871 bckt->length--; 872 mtx_unlock_spin(&bckt->mutex); 873 return (0); 874} 875 876/* 877 * Find the ia64_lpte for the given va, if any. 878 */ 879static struct ia64_lpte * 880pmap_find_vhpt(vm_offset_t va) 881{ 882 struct ia64_bucket *bckt; 883 struct ia64_lpte *pte; 884 uint64_t chain, tag; 885 886 tag = ia64_ttag(va); 887 pte = (struct ia64_lpte *)ia64_thash(va); 888 bckt = (struct ia64_bucket *)pte->chain; 889 890 mtx_lock_spin(&bckt->mutex); 891 chain = bckt->chain; 892 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 893 while (chain != 0 && pte->tag != tag) { 894 chain = pte->chain; 895 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 896 } 897 mtx_unlock_spin(&bckt->mutex); 898 return ((chain != 0) ? pte : NULL); 899} 900 901/* 902 * Remove an entry from the list of managed mappings. 903 */ 904static int 905pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv) 906{ 907 if (!pv) { 908 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 909 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 910 if (pmap == pv->pv_pmap && va == pv->pv_va) 911 break; 912 } 913 } else { 914 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 915 if (va == pv->pv_va) 916 break; 917 } 918 } 919 } 920 921 if (pv) { 922 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 923 m->md.pv_list_count--; 924 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 925 vm_page_aflag_clear(m, PGA_WRITEABLE); 926 927 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 928 free_pv_entry(pv); 929 return 0; 930 } else { 931 return ENOENT; 932 } 933} 934 935/* 936 * Create a pv entry for page at pa for 937 * (pmap, va). 938 */ 939static void 940pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 941{ 942 pv_entry_t pv; 943 944 pv = get_pv_entry(pmap); 945 pv->pv_pmap = pmap; 946 pv->pv_va = va; 947 948 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 949 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 950 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 951 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 952 m->md.pv_list_count++; 953} 954 955/* 956 * Routine: pmap_extract 957 * Function: 958 * Extract the physical page address associated 959 * with the given map/virtual_address pair. 960 */ 961vm_paddr_t 962pmap_extract(pmap_t pmap, vm_offset_t va) 963{ 964 struct ia64_lpte *pte; 965 pmap_t oldpmap; 966 vm_paddr_t pa; 967 968 pa = 0; 969 PMAP_LOCK(pmap); 970 oldpmap = pmap_switch(pmap); 971 pte = pmap_find_vhpt(va); 972 if (pte != NULL && pmap_present(pte)) 973 pa = pmap_ppn(pte); 974 pmap_switch(oldpmap); 975 PMAP_UNLOCK(pmap); 976 return (pa); 977} 978 979/* 980 * Routine: pmap_extract_and_hold 981 * Function: 982 * Atomically extract and hold the physical page 983 * with the given pmap and virtual address pair 984 * if that mapping permits the given protection. 985 */ 986vm_page_t 987pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 988{ 989 struct ia64_lpte *pte; 990 pmap_t oldpmap; 991 vm_page_t m; 992 vm_paddr_t pa; 993 994 pa = 0; 995 m = NULL; 996 PMAP_LOCK(pmap); 997 oldpmap = pmap_switch(pmap); 998retry: 999 pte = pmap_find_vhpt(va); 1000 if (pte != NULL && pmap_present(pte) && 1001 (pmap_prot(pte) & prot) == prot) { 1002 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1003 if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) 1004 goto retry; 1005 vm_page_hold(m); 1006 } 1007 PA_UNLOCK_COND(pa); 1008 pmap_switch(oldpmap); 1009 PMAP_UNLOCK(pmap); 1010 return (m); 1011} 1012 1013/*************************************************** 1014 * Low level mapping routines..... 1015 ***************************************************/ 1016 1017/* 1018 * Find the kernel lpte for mapping the given virtual address, which 1019 * must be in the part of region 5 which we can cover with our kernel 1020 * 'page tables'. 1021 */ 1022static struct ia64_lpte * 1023pmap_find_kpte(vm_offset_t va) 1024{ 1025 struct ia64_lpte **dir1; 1026 struct ia64_lpte *leaf; 1027 1028 KASSERT((va >> 61) == 5, 1029 ("kernel mapping 0x%lx not in region 5", va)); 1030 KASSERT(va < kernel_vm_end, 1031 ("kernel mapping 0x%lx out of range", va)); 1032 1033 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)]; 1034 leaf = dir1[KPTE_DIR1_INDEX(va)]; 1035 return (&leaf[KPTE_PTE_INDEX(va)]); 1036} 1037 1038/* 1039 * Find a pte suitable for mapping a user-space address. If one exists 1040 * in the VHPT, that one will be returned, otherwise a new pte is 1041 * allocated. 1042 */ 1043static struct ia64_lpte * 1044pmap_find_pte(vm_offset_t va) 1045{ 1046 struct ia64_lpte *pte; 1047 1048 if (va >= VM_MAXUSER_ADDRESS) 1049 return pmap_find_kpte(va); 1050 1051 pte = pmap_find_vhpt(va); 1052 if (pte == NULL) { 1053 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO); 1054 pte->tag = 1UL << 63; 1055 } 1056 return (pte); 1057} 1058 1059/* 1060 * Free a pte which is now unused. This simply returns it to the zone 1061 * allocator if it is a user mapping. For kernel mappings, clear the 1062 * valid bit to make it clear that the mapping is not currently used. 1063 */ 1064static void 1065pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va) 1066{ 1067 if (va < VM_MAXUSER_ADDRESS) 1068 uma_zfree(ptezone, pte); 1069 else 1070 pmap_clear_present(pte); 1071} 1072 1073static PMAP_INLINE void 1074pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot) 1075{ 1076 static long prot2ar[4] = { 1077 PTE_AR_R, /* VM_PROT_NONE */ 1078 PTE_AR_RW, /* VM_PROT_WRITE */ 1079 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */ 1080 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */ 1081 }; 1082 1083 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED); 1084 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56; 1085 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap) 1086 ? PTE_PL_KERN : PTE_PL_USER; 1087 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1]; 1088} 1089 1090static PMAP_INLINE void 1091pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma) 1092{ 1093 1094 pte->pte &= ~PTE_MA_MASK; 1095 pte->pte |= (ma & PTE_MA_MASK); 1096} 1097 1098/* 1099 * Set a pte to contain a valid mapping and enter it in the VHPT. If 1100 * the pte was orginally valid, then its assumed to already be in the 1101 * VHPT. 1102 * This functions does not set the protection bits. It's expected 1103 * that those have been set correctly prior to calling this function. 1104 */ 1105static void 1106pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa, 1107 boolean_t wired, boolean_t managed) 1108{ 1109 1110 pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK | 1111 PTE_AR_MASK | PTE_ED; 1112 pte->pte |= PTE_PRESENT; 1113 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED); 1114 pte->pte |= (wired) ? PTE_WIRED : 0; 1115 pte->pte |= pa & PTE_PPN_MASK; 1116 1117 pte->itir = PAGE_SHIFT << 2; 1118 1119 pte->tag = ia64_ttag(va); 1120} 1121 1122/* 1123 * Remove the (possibly managed) mapping represented by pte from the 1124 * given pmap. 1125 */ 1126static int 1127pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va, 1128 pv_entry_t pv, int freepte) 1129{ 1130 int error; 1131 vm_page_t m; 1132 1133 /* 1134 * First remove from the VHPT. 1135 */ 1136 error = pmap_remove_vhpt(va); 1137 if (error) 1138 return (error); 1139 1140 pmap_invalidate_page(va); 1141 1142 if (pmap_wired(pte)) 1143 pmap->pm_stats.wired_count -= 1; 1144 1145 pmap->pm_stats.resident_count -= 1; 1146 if (pmap_managed(pte)) { 1147 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1148 if (pmap_dirty(pte)) 1149 vm_page_dirty(m); 1150 if (pmap_accessed(pte)) 1151 vm_page_aflag_set(m, PGA_REFERENCED); 1152 1153 error = pmap_remove_entry(pmap, m, va, pv); 1154 } 1155 if (freepte) 1156 pmap_free_pte(pte, va); 1157 1158 return (error); 1159} 1160 1161/* 1162 * Extract the physical page address associated with a kernel 1163 * virtual address. 1164 */ 1165vm_paddr_t 1166pmap_kextract(vm_offset_t va) 1167{ 1168 struct ia64_lpte *pte; 1169 uint64_t *pbvm_pgtbl; 1170 vm_paddr_t pa; 1171 u_int idx; 1172 1173 KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA")); 1174 1175 /* Regions 6 and 7 are direct mapped. */ 1176 if (va >= IA64_RR_BASE(6)) { 1177 pa = IA64_RR_MASK(va); 1178 goto out; 1179 } 1180 1181 /* Region 5 is our KVA. Bail out if the VA is beyond our limits. */ 1182 if (va >= kernel_vm_end) 1183 goto err_out; 1184 if (va >= VM_MIN_KERNEL_ADDRESS) { 1185 pte = pmap_find_kpte(va); 1186 pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0; 1187 goto out; 1188 } 1189 1190 /* The PBVM page table. */ 1191 if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz) 1192 goto err_out; 1193 if (va >= IA64_PBVM_PGTBL) { 1194 pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl; 1195 goto out; 1196 } 1197 1198 /* The PBVM itself. */ 1199 if (va >= IA64_PBVM_BASE) { 1200 pbvm_pgtbl = (void *)IA64_PBVM_PGTBL; 1201 idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT; 1202 if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3)) 1203 goto err_out; 1204 if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0) 1205 goto err_out; 1206 pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) + 1207 (va & IA64_PBVM_PAGE_MASK); 1208 goto out; 1209 } 1210 1211 err_out: 1212 printf("XXX: %s: va=%#lx is invalid\n", __func__, va); 1213 pa = 0; 1214 /* FALLTHROUGH */ 1215 1216 out: 1217 return (pa); 1218} 1219 1220/* 1221 * Add a list of wired pages to the kva this routine is only used for 1222 * temporary kernel mappings that do not need to have page modification 1223 * or references recorded. Note that old mappings are simply written 1224 * over. The page is effectively wired, but it's customary to not have 1225 * the PTE reflect that, nor update statistics. 1226 */ 1227void 1228pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 1229{ 1230 struct ia64_lpte *pte; 1231 int i; 1232 1233 for (i = 0; i < count; i++) { 1234 pte = pmap_find_kpte(va); 1235 if (pmap_present(pte)) 1236 pmap_invalidate_page(va); 1237 else 1238 pmap_enter_vhpt(pte, va); 1239 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1240 pmap_pte_attr(pte, m[i]->md.memattr); 1241 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE); 1242 va += PAGE_SIZE; 1243 } 1244} 1245 1246/* 1247 * this routine jerks page mappings from the 1248 * kernel -- it is meant only for temporary mappings. 1249 */ 1250void 1251pmap_qremove(vm_offset_t va, int count) 1252{ 1253 struct ia64_lpte *pte; 1254 int i; 1255 1256 for (i = 0; i < count; i++) { 1257 pte = pmap_find_kpte(va); 1258 if (pmap_present(pte)) { 1259 pmap_remove_vhpt(va); 1260 pmap_invalidate_page(va); 1261 pmap_clear_present(pte); 1262 } 1263 va += PAGE_SIZE; 1264 } 1265} 1266 1267/* 1268 * Add a wired page to the kva. As for pmap_qenter(), it's customary 1269 * to not have the PTE reflect that, nor update statistics. 1270 */ 1271void 1272pmap_kenter(vm_offset_t va, vm_offset_t pa) 1273{ 1274 struct ia64_lpte *pte; 1275 1276 pte = pmap_find_kpte(va); 1277 if (pmap_present(pte)) 1278 pmap_invalidate_page(va); 1279 else 1280 pmap_enter_vhpt(pte, va); 1281 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1282 pmap_pte_attr(pte, VM_MEMATTR_DEFAULT); 1283 pmap_set_pte(pte, va, pa, FALSE, FALSE); 1284} 1285 1286/* 1287 * Remove a page from the kva 1288 */ 1289void 1290pmap_kremove(vm_offset_t va) 1291{ 1292 struct ia64_lpte *pte; 1293 1294 pte = pmap_find_kpte(va); 1295 if (pmap_present(pte)) { 1296 pmap_remove_vhpt(va); 1297 pmap_invalidate_page(va); 1298 pmap_clear_present(pte); 1299 } 1300} 1301 1302/* 1303 * Used to map a range of physical addresses into kernel 1304 * virtual address space. 1305 * 1306 * The value passed in '*virt' is a suggested virtual address for 1307 * the mapping. Architectures which can support a direct-mapped 1308 * physical to virtual region can return the appropriate address 1309 * within that region, leaving '*virt' unchanged. Other 1310 * architectures should map the pages starting at '*virt' and 1311 * update '*virt' with the first usable address after the mapped 1312 * region. 1313 */ 1314vm_offset_t 1315pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 1316{ 1317 return IA64_PHYS_TO_RR7(start); 1318} 1319 1320/* 1321 * Remove the given range of addresses from the specified map. 1322 * 1323 * It is assumed that the start and end are properly 1324 * rounded to the page size. 1325 */ 1326void 1327pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1328{ 1329 pmap_t oldpmap; 1330 vm_offset_t va; 1331 pv_entry_t npv, pv; 1332 struct ia64_lpte *pte; 1333 1334 if (pmap->pm_stats.resident_count == 0) 1335 return; 1336 1337 vm_page_lock_queues(); 1338 PMAP_LOCK(pmap); 1339 oldpmap = pmap_switch(pmap); 1340 1341 /* 1342 * special handling of removing one page. a very 1343 * common operation and easy to short circuit some 1344 * code. 1345 */ 1346 if (sva + PAGE_SIZE == eva) { 1347 pte = pmap_find_vhpt(sva); 1348 if (pte != NULL) 1349 pmap_remove_pte(pmap, pte, sva, 0, 1); 1350 goto out; 1351 } 1352 1353 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) { 1354 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) { 1355 va = pv->pv_va; 1356 if (va >= sva && va < eva) { 1357 pte = pmap_find_vhpt(va); 1358 KASSERT(pte != NULL, ("pte")); 1359 pmap_remove_pte(pmap, pte, va, pv, 1); 1360 } 1361 } 1362 } else { 1363 for (va = sva; va < eva; va += PAGE_SIZE) { 1364 pte = pmap_find_vhpt(va); 1365 if (pte != NULL) 1366 pmap_remove_pte(pmap, pte, va, 0, 1); 1367 } 1368 } 1369 1370out: 1371 vm_page_unlock_queues(); 1372 pmap_switch(oldpmap); 1373 PMAP_UNLOCK(pmap); 1374} 1375 1376/* 1377 * Routine: pmap_remove_all 1378 * Function: 1379 * Removes this physical page from 1380 * all physical maps in which it resides. 1381 * Reflects back modify bits to the pager. 1382 * 1383 * Notes: 1384 * Original versions of this routine were very 1385 * inefficient because they iteratively called 1386 * pmap_remove (slow...) 1387 */ 1388 1389void 1390pmap_remove_all(vm_page_t m) 1391{ 1392 pmap_t oldpmap; 1393 pv_entry_t pv; 1394 1395 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1396 ("pmap_remove_all: page %p is not managed", m)); 1397 vm_page_lock_queues(); 1398 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1399 struct ia64_lpte *pte; 1400 pmap_t pmap = pv->pv_pmap; 1401 vm_offset_t va = pv->pv_va; 1402 1403 PMAP_LOCK(pmap); 1404 oldpmap = pmap_switch(pmap); 1405 pte = pmap_find_vhpt(va); 1406 KASSERT(pte != NULL, ("pte")); 1407 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m)) 1408 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m)); 1409 pmap_remove_pte(pmap, pte, va, pv, 1); 1410 pmap_switch(oldpmap); 1411 PMAP_UNLOCK(pmap); 1412 } 1413 vm_page_aflag_clear(m, PGA_WRITEABLE); 1414 vm_page_unlock_queues(); 1415} 1416 1417/* 1418 * Set the physical protection on the 1419 * specified range of this map as requested. 1420 */ 1421void 1422pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1423{ 1424 pmap_t oldpmap; 1425 struct ia64_lpte *pte; 1426 1427 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1428 pmap_remove(pmap, sva, eva); 1429 return; 1430 } 1431 1432 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 1433 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 1434 return; 1435 1436 if ((sva & PAGE_MASK) || (eva & PAGE_MASK)) 1437 panic("pmap_protect: unaligned addresses"); 1438 1439 PMAP_LOCK(pmap); 1440 oldpmap = pmap_switch(pmap); 1441 for ( ; sva < eva; sva += PAGE_SIZE) { 1442 /* If page is invalid, skip this page */ 1443 pte = pmap_find_vhpt(sva); 1444 if (pte == NULL) 1445 continue; 1446 1447 /* If there's no change, skip it too */ 1448 if (pmap_prot(pte) == prot) 1449 continue; 1450 1451 if ((prot & VM_PROT_WRITE) == 0 && 1452 pmap_managed(pte) && pmap_dirty(pte)) { 1453 vm_paddr_t pa = pmap_ppn(pte); 1454 vm_page_t m = PHYS_TO_VM_PAGE(pa); 1455 1456 vm_page_dirty(m); 1457 pmap_clear_dirty(pte); 1458 } 1459 1460 if (prot & VM_PROT_EXECUTE) 1461 ia64_sync_icache(sva, PAGE_SIZE); 1462 1463 pmap_pte_prot(pmap, pte, prot); 1464 pmap_invalidate_page(sva); 1465 } 1466 pmap_switch(oldpmap); 1467 PMAP_UNLOCK(pmap); 1468} 1469 1470/* 1471 * Insert the given physical page (p) at 1472 * the specified virtual address (v) in the 1473 * target physical map with the protection requested. 1474 * 1475 * If specified, the page will be wired down, meaning 1476 * that the related pte can not be reclaimed. 1477 * 1478 * NB: This is the only routine which MAY NOT lazy-evaluate 1479 * or lose information. That is, this routine must actually 1480 * insert this page into the given map NOW. 1481 */ 1482void 1483pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 1484 vm_prot_t prot, boolean_t wired) 1485{ 1486 pmap_t oldpmap; 1487 vm_offset_t pa; 1488 vm_offset_t opa; 1489 struct ia64_lpte origpte; 1490 struct ia64_lpte *pte; 1491 boolean_t icache_inval, managed; 1492 1493 vm_page_lock_queues(); 1494 PMAP_LOCK(pmap); 1495 oldpmap = pmap_switch(pmap); 1496 1497 va &= ~PAGE_MASK; 1498 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1499 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, 1500 ("pmap_enter: page %p is not busy", m)); 1501 1502 /* 1503 * Find (or create) a pte for the given mapping. 1504 */ 1505 while ((pte = pmap_find_pte(va)) == NULL) { 1506 pmap_switch(oldpmap); 1507 PMAP_UNLOCK(pmap); 1508 vm_page_unlock_queues(); 1509 VM_WAIT; 1510 vm_page_lock_queues(); 1511 PMAP_LOCK(pmap); 1512 oldpmap = pmap_switch(pmap); 1513 } 1514 origpte = *pte; 1515 if (!pmap_present(pte)) { 1516 opa = ~0UL; 1517 pmap_enter_vhpt(pte, va); 1518 } else 1519 opa = pmap_ppn(pte); 1520 managed = FALSE; 1521 pa = VM_PAGE_TO_PHYS(m); 1522 1523 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE; 1524 1525 /* 1526 * Mapping has not changed, must be protection or wiring change. 1527 */ 1528 if (opa == pa) { 1529 /* 1530 * Wiring change, just update stats. We don't worry about 1531 * wiring PT pages as they remain resident as long as there 1532 * are valid mappings in them. Hence, if a user page is wired, 1533 * the PT page will be also. 1534 */ 1535 if (wired && !pmap_wired(&origpte)) 1536 pmap->pm_stats.wired_count++; 1537 else if (!wired && pmap_wired(&origpte)) 1538 pmap->pm_stats.wired_count--; 1539 1540 managed = (pmap_managed(&origpte)) ? TRUE : FALSE; 1541 1542 /* 1543 * We might be turning off write access to the page, 1544 * so we go ahead and sense modify status. Otherwise, 1545 * we can avoid I-cache invalidation if the page 1546 * already allowed execution. 1547 */ 1548 if (managed && pmap_dirty(&origpte)) 1549 vm_page_dirty(m); 1550 else if (pmap_exec(&origpte)) 1551 icache_inval = FALSE; 1552 1553 pmap_invalidate_page(va); 1554 goto validate; 1555 } 1556 1557 /* 1558 * Mapping has changed, invalidate old range and fall 1559 * through to handle validating new mapping. 1560 */ 1561 if (opa != ~0UL) { 1562 pmap_remove_pte(pmap, pte, va, 0, 0); 1563 pmap_enter_vhpt(pte, va); 1564 } 1565 1566 /* 1567 * Enter on the PV list if part of our managed memory. 1568 */ 1569 if ((m->oflags & VPO_UNMANAGED) == 0) { 1570 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1571 ("pmap_enter: managed mapping within the clean submap")); 1572 pmap_insert_entry(pmap, va, m); 1573 managed = TRUE; 1574 } 1575 1576 /* 1577 * Increment counters 1578 */ 1579 pmap->pm_stats.resident_count++; 1580 if (wired) 1581 pmap->pm_stats.wired_count++; 1582 1583validate: 1584 1585 /* 1586 * Now validate mapping with desired protection/wiring. This 1587 * adds the pte to the VHPT if necessary. 1588 */ 1589 pmap_pte_prot(pmap, pte, prot); 1590 pmap_pte_attr(pte, m->md.memattr); 1591 pmap_set_pte(pte, va, pa, wired, managed); 1592 1593 /* Invalidate the I-cache when needed. */ 1594 if (icache_inval) 1595 ia64_sync_icache(va, PAGE_SIZE); 1596 1597 if ((prot & VM_PROT_WRITE) != 0 && managed) 1598 vm_page_aflag_set(m, PGA_WRITEABLE); 1599 vm_page_unlock_queues(); 1600 pmap_switch(oldpmap); 1601 PMAP_UNLOCK(pmap); 1602} 1603 1604/* 1605 * Maps a sequence of resident pages belonging to the same object. 1606 * The sequence begins with the given page m_start. This page is 1607 * mapped at the given virtual address start. Each subsequent page is 1608 * mapped at a virtual address that is offset from start by the same 1609 * amount as the page is offset from m_start within the object. The 1610 * last page in the sequence is the page with the largest offset from 1611 * m_start that can be mapped at a virtual address less than the given 1612 * virtual address end. Not every virtual page between start and end 1613 * is mapped; only those for which a resident page exists with the 1614 * corresponding offset from m_start are mapped. 1615 */ 1616void 1617pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 1618 vm_page_t m_start, vm_prot_t prot) 1619{ 1620 pmap_t oldpmap; 1621 vm_page_t m; 1622 vm_pindex_t diff, psize; 1623 1624 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 1625 psize = atop(end - start); 1626 m = m_start; 1627 vm_page_lock_queues(); 1628 PMAP_LOCK(pmap); 1629 oldpmap = pmap_switch(pmap); 1630 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1631 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); 1632 m = TAILQ_NEXT(m, listq); 1633 } 1634 vm_page_unlock_queues(); 1635 pmap_switch(oldpmap); 1636 PMAP_UNLOCK(pmap); 1637} 1638 1639/* 1640 * this code makes some *MAJOR* assumptions: 1641 * 1. Current pmap & pmap exists. 1642 * 2. Not wired. 1643 * 3. Read access. 1644 * 4. No page table pages. 1645 * but is *MUCH* faster than pmap_enter... 1646 */ 1647 1648void 1649pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1650{ 1651 pmap_t oldpmap; 1652 1653 vm_page_lock_queues(); 1654 PMAP_LOCK(pmap); 1655 oldpmap = pmap_switch(pmap); 1656 pmap_enter_quick_locked(pmap, va, m, prot); 1657 vm_page_unlock_queues(); 1658 pmap_switch(oldpmap); 1659 PMAP_UNLOCK(pmap); 1660} 1661 1662static void 1663pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1664 vm_prot_t prot) 1665{ 1666 struct ia64_lpte *pte; 1667 boolean_t managed; 1668 1669 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1670 (m->oflags & VPO_UNMANAGED) != 0, 1671 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1672 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1673 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1674 1675 if ((pte = pmap_find_pte(va)) == NULL) 1676 return; 1677 1678 if (!pmap_present(pte)) { 1679 /* Enter on the PV list if the page is managed. */ 1680 if ((m->oflags & VPO_UNMANAGED) == 0) { 1681 if (!pmap_try_insert_pv_entry(pmap, va, m)) { 1682 pmap_free_pte(pte, va); 1683 return; 1684 } 1685 managed = TRUE; 1686 } else 1687 managed = FALSE; 1688 1689 /* Increment counters. */ 1690 pmap->pm_stats.resident_count++; 1691 1692 /* Initialise with R/O protection and enter into VHPT. */ 1693 pmap_enter_vhpt(pte, va); 1694 pmap_pte_prot(pmap, pte, 1695 prot & (VM_PROT_READ | VM_PROT_EXECUTE)); 1696 pmap_pte_attr(pte, m->md.memattr); 1697 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed); 1698 1699 if (prot & VM_PROT_EXECUTE) 1700 ia64_sync_icache(va, PAGE_SIZE); 1701 } 1702} 1703 1704/* 1705 * pmap_object_init_pt preloads the ptes for a given object 1706 * into the specified pmap. This eliminates the blast of soft 1707 * faults on process startup and immediately after an mmap. 1708 */ 1709void 1710pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 1711 vm_object_t object, vm_pindex_t pindex, 1712 vm_size_t size) 1713{ 1714 1715 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1716 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 1717 ("pmap_object_init_pt: non-device object")); 1718} 1719 1720/* 1721 * Routine: pmap_change_wiring 1722 * Function: Change the wiring attribute for a map/virtual-address 1723 * pair. 1724 * In/out conditions: 1725 * The mapping must already exist in the pmap. 1726 */ 1727void 1728pmap_change_wiring(pmap, va, wired) 1729 register pmap_t pmap; 1730 vm_offset_t va; 1731 boolean_t wired; 1732{ 1733 pmap_t oldpmap; 1734 struct ia64_lpte *pte; 1735 1736 PMAP_LOCK(pmap); 1737 oldpmap = pmap_switch(pmap); 1738 1739 pte = pmap_find_vhpt(va); 1740 KASSERT(pte != NULL, ("pte")); 1741 if (wired && !pmap_wired(pte)) { 1742 pmap->pm_stats.wired_count++; 1743 pmap_set_wired(pte); 1744 } else if (!wired && pmap_wired(pte)) { 1745 pmap->pm_stats.wired_count--; 1746 pmap_clear_wired(pte); 1747 } 1748 1749 pmap_switch(oldpmap); 1750 PMAP_UNLOCK(pmap); 1751} 1752 1753 1754 1755/* 1756 * Copy the range specified by src_addr/len 1757 * from the source map to the range dst_addr/len 1758 * in the destination map. 1759 * 1760 * This routine is only advisory and need not do anything. 1761 */ 1762 1763void 1764pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 1765 vm_offset_t src_addr) 1766{ 1767} 1768 1769 1770/* 1771 * pmap_zero_page zeros the specified hardware page by 1772 * mapping it into virtual memory and using bzero to clear 1773 * its contents. 1774 */ 1775 1776void 1777pmap_zero_page(vm_page_t m) 1778{ 1779 void *p; 1780 1781 p = (void *)pmap_page_to_va(m); 1782 bzero(p, PAGE_SIZE); 1783} 1784 1785 1786/* 1787 * pmap_zero_page_area zeros the specified hardware page by 1788 * mapping it into virtual memory and using bzero to clear 1789 * its contents. 1790 * 1791 * off and size must reside within a single page. 1792 */ 1793 1794void 1795pmap_zero_page_area(vm_page_t m, int off, int size) 1796{ 1797 char *p; 1798 1799 p = (void *)pmap_page_to_va(m); 1800 bzero(p + off, size); 1801} 1802 1803 1804/* 1805 * pmap_zero_page_idle zeros the specified hardware page by 1806 * mapping it into virtual memory and using bzero to clear 1807 * its contents. This is for the vm_idlezero process. 1808 */ 1809 1810void 1811pmap_zero_page_idle(vm_page_t m) 1812{ 1813 void *p; 1814 1815 p = (void *)pmap_page_to_va(m); 1816 bzero(p, PAGE_SIZE); 1817} 1818 1819 1820/* 1821 * pmap_copy_page copies the specified (machine independent) 1822 * page by mapping the page into virtual memory and using 1823 * bcopy to copy the page, one machine dependent page at a 1824 * time. 1825 */ 1826void 1827pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 1828{ 1829 void *dst, *src; 1830 1831 src = (void *)pmap_page_to_va(msrc); 1832 dst = (void *)pmap_page_to_va(mdst); 1833 bcopy(src, dst, PAGE_SIZE); 1834} 1835 1836int unmapped_buf_allowed; 1837 1838void 1839pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 1840 vm_offset_t b_offset, int xfersize) 1841{ 1842 void *a_cp, *b_cp; 1843 vm_offset_t a_pg_offset, b_pg_offset; 1844 int cnt; 1845 1846 while (xfersize > 0) { 1847 a_pg_offset = a_offset & PAGE_MASK; 1848 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 1849 a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) + 1850 a_pg_offset; 1851 b_pg_offset = b_offset & PAGE_MASK; 1852 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 1853 b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) + 1854 b_pg_offset; 1855 bcopy(a_cp, b_cp, cnt); 1856 a_offset += cnt; 1857 b_offset += cnt; 1858 xfersize -= cnt; 1859 } 1860} 1861 1862/* 1863 * Returns true if the pmap's pv is one of the first 1864 * 16 pvs linked to from this page. This count may 1865 * be changed upwards or downwards in the future; it 1866 * is only necessary that true be returned for a small 1867 * subset of pmaps for proper page aging. 1868 */ 1869boolean_t 1870pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 1871{ 1872 pv_entry_t pv; 1873 int loops = 0; 1874 boolean_t rv; 1875 1876 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1877 ("pmap_page_exists_quick: page %p is not managed", m)); 1878 rv = FALSE; 1879 vm_page_lock_queues(); 1880 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1881 if (pv->pv_pmap == pmap) { 1882 rv = TRUE; 1883 break; 1884 } 1885 loops++; 1886 if (loops >= 16) 1887 break; 1888 } 1889 vm_page_unlock_queues(); 1890 return (rv); 1891} 1892 1893/* 1894 * pmap_page_wired_mappings: 1895 * 1896 * Return the number of managed mappings to the given physical page 1897 * that are wired. 1898 */ 1899int 1900pmap_page_wired_mappings(vm_page_t m) 1901{ 1902 struct ia64_lpte *pte; 1903 pmap_t oldpmap, pmap; 1904 pv_entry_t pv; 1905 int count; 1906 1907 count = 0; 1908 if ((m->oflags & VPO_UNMANAGED) != 0) 1909 return (count); 1910 vm_page_lock_queues(); 1911 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1912 pmap = pv->pv_pmap; 1913 PMAP_LOCK(pmap); 1914 oldpmap = pmap_switch(pmap); 1915 pte = pmap_find_vhpt(pv->pv_va); 1916 KASSERT(pte != NULL, ("pte")); 1917 if (pmap_wired(pte)) 1918 count++; 1919 pmap_switch(oldpmap); 1920 PMAP_UNLOCK(pmap); 1921 } 1922 vm_page_unlock_queues(); 1923 return (count); 1924} 1925 1926/* 1927 * Remove all pages from specified address space 1928 * this aids process exit speeds. Also, this code 1929 * is special cased for current process only, but 1930 * can have the more generic (and slightly slower) 1931 * mode enabled. This is much faster than pmap_remove 1932 * in the case of running down an entire address space. 1933 */ 1934void 1935pmap_remove_pages(pmap_t pmap) 1936{ 1937 pmap_t oldpmap; 1938 pv_entry_t pv, npv; 1939 1940 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 1941 printf("warning: %s called with non-current pmap\n", 1942 __func__); 1943 return; 1944 } 1945 1946 vm_page_lock_queues(); 1947 PMAP_LOCK(pmap); 1948 oldpmap = pmap_switch(pmap); 1949 1950 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 1951 struct ia64_lpte *pte; 1952 1953 npv = TAILQ_NEXT(pv, pv_plist); 1954 1955 pte = pmap_find_vhpt(pv->pv_va); 1956 KASSERT(pte != NULL, ("pte")); 1957 if (!pmap_wired(pte)) 1958 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1); 1959 } 1960 1961 pmap_switch(oldpmap); 1962 PMAP_UNLOCK(pmap); 1963 vm_page_unlock_queues(); 1964} 1965 1966/* 1967 * pmap_ts_referenced: 1968 * 1969 * Return a count of reference bits for a page, clearing those bits. 1970 * It is not necessary for every reference bit to be cleared, but it 1971 * is necessary that 0 only be returned when there are truly no 1972 * reference bits set. 1973 * 1974 * XXX: The exact number of bits to check and clear is a matter that 1975 * should be tested and standardized at some point in the future for 1976 * optimal aging of shared pages. 1977 */ 1978int 1979pmap_ts_referenced(vm_page_t m) 1980{ 1981 struct ia64_lpte *pte; 1982 pmap_t oldpmap; 1983 pv_entry_t pv; 1984 int count = 0; 1985 1986 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1987 ("pmap_ts_referenced: page %p is not managed", m)); 1988 vm_page_lock_queues(); 1989 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1990 PMAP_LOCK(pv->pv_pmap); 1991 oldpmap = pmap_switch(pv->pv_pmap); 1992 pte = pmap_find_vhpt(pv->pv_va); 1993 KASSERT(pte != NULL, ("pte")); 1994 if (pmap_accessed(pte)) { 1995 count++; 1996 pmap_clear_accessed(pte); 1997 pmap_invalidate_page(pv->pv_va); 1998 } 1999 pmap_switch(oldpmap); 2000 PMAP_UNLOCK(pv->pv_pmap); 2001 } 2002 vm_page_unlock_queues(); 2003 return (count); 2004} 2005 2006/* 2007 * pmap_is_modified: 2008 * 2009 * Return whether or not the specified physical page was modified 2010 * in any physical maps. 2011 */ 2012boolean_t 2013pmap_is_modified(vm_page_t m) 2014{ 2015 struct ia64_lpte *pte; 2016 pmap_t oldpmap; 2017 pv_entry_t pv; 2018 boolean_t rv; 2019 2020 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2021 ("pmap_is_modified: page %p is not managed", m)); 2022 rv = FALSE; 2023 2024 /* 2025 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 2026 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2027 * is clear, no PTEs can be dirty. 2028 */ 2029 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2030 if ((m->oflags & VPO_BUSY) == 0 && 2031 (m->aflags & PGA_WRITEABLE) == 0) 2032 return (rv); 2033 vm_page_lock_queues(); 2034 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2035 PMAP_LOCK(pv->pv_pmap); 2036 oldpmap = pmap_switch(pv->pv_pmap); 2037 pte = pmap_find_vhpt(pv->pv_va); 2038 pmap_switch(oldpmap); 2039 KASSERT(pte != NULL, ("pte")); 2040 rv = pmap_dirty(pte) ? TRUE : FALSE; 2041 PMAP_UNLOCK(pv->pv_pmap); 2042 if (rv) 2043 break; 2044 } 2045 vm_page_unlock_queues(); 2046 return (rv); 2047} 2048 2049/* 2050 * pmap_is_prefaultable: 2051 * 2052 * Return whether or not the specified virtual address is elgible 2053 * for prefault. 2054 */ 2055boolean_t 2056pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2057{ 2058 struct ia64_lpte *pte; 2059 2060 pte = pmap_find_vhpt(addr); 2061 if (pte != NULL && pmap_present(pte)) 2062 return (FALSE); 2063 return (TRUE); 2064} 2065 2066/* 2067 * pmap_is_referenced: 2068 * 2069 * Return whether or not the specified physical page was referenced 2070 * in any physical maps. 2071 */ 2072boolean_t 2073pmap_is_referenced(vm_page_t m) 2074{ 2075 struct ia64_lpte *pte; 2076 pmap_t oldpmap; 2077 pv_entry_t pv; 2078 boolean_t rv; 2079 2080 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2081 ("pmap_is_referenced: page %p is not managed", m)); 2082 rv = FALSE; 2083 vm_page_lock_queues(); 2084 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2085 PMAP_LOCK(pv->pv_pmap); 2086 oldpmap = pmap_switch(pv->pv_pmap); 2087 pte = pmap_find_vhpt(pv->pv_va); 2088 pmap_switch(oldpmap); 2089 KASSERT(pte != NULL, ("pte")); 2090 rv = pmap_accessed(pte) ? TRUE : FALSE; 2091 PMAP_UNLOCK(pv->pv_pmap); 2092 if (rv) 2093 break; 2094 } 2095 vm_page_unlock_queues(); 2096 return (rv); 2097} 2098 2099/* 2100 * Clear the modify bits on the specified physical page. 2101 */ 2102void 2103pmap_clear_modify(vm_page_t m) 2104{ 2105 struct ia64_lpte *pte; 2106 pmap_t oldpmap; 2107 pv_entry_t pv; 2108 2109 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2110 ("pmap_clear_modify: page %p is not managed", m)); 2111 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2112 KASSERT((m->oflags & VPO_BUSY) == 0, 2113 ("pmap_clear_modify: page %p is busy", m)); 2114 2115 /* 2116 * If the page is not PGA_WRITEABLE, then no PTEs can be modified. 2117 * If the object containing the page is locked and the page is not 2118 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 2119 */ 2120 if ((m->aflags & PGA_WRITEABLE) == 0) 2121 return; 2122 vm_page_lock_queues(); 2123 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2124 PMAP_LOCK(pv->pv_pmap); 2125 oldpmap = pmap_switch(pv->pv_pmap); 2126 pte = pmap_find_vhpt(pv->pv_va); 2127 KASSERT(pte != NULL, ("pte")); 2128 if (pmap_dirty(pte)) { 2129 pmap_clear_dirty(pte); 2130 pmap_invalidate_page(pv->pv_va); 2131 } 2132 pmap_switch(oldpmap); 2133 PMAP_UNLOCK(pv->pv_pmap); 2134 } 2135 vm_page_unlock_queues(); 2136} 2137 2138/* 2139 * pmap_clear_reference: 2140 * 2141 * Clear the reference bit on the specified physical page. 2142 */ 2143void 2144pmap_clear_reference(vm_page_t m) 2145{ 2146 struct ia64_lpte *pte; 2147 pmap_t oldpmap; 2148 pv_entry_t pv; 2149 2150 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2151 ("pmap_clear_reference: page %p is not managed", m)); 2152 vm_page_lock_queues(); 2153 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2154 PMAP_LOCK(pv->pv_pmap); 2155 oldpmap = pmap_switch(pv->pv_pmap); 2156 pte = pmap_find_vhpt(pv->pv_va); 2157 KASSERT(pte != NULL, ("pte")); 2158 if (pmap_accessed(pte)) { 2159 pmap_clear_accessed(pte); 2160 pmap_invalidate_page(pv->pv_va); 2161 } 2162 pmap_switch(oldpmap); 2163 PMAP_UNLOCK(pv->pv_pmap); 2164 } 2165 vm_page_unlock_queues(); 2166} 2167 2168/* 2169 * Clear the write and modified bits in each of the given page's mappings. 2170 */ 2171void 2172pmap_remove_write(vm_page_t m) 2173{ 2174 struct ia64_lpte *pte; 2175 pmap_t oldpmap, pmap; 2176 pv_entry_t pv; 2177 vm_prot_t prot; 2178 2179 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2180 ("pmap_remove_write: page %p is not managed", m)); 2181 2182 /* 2183 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 2184 * another thread while the object is locked. Thus, if PGA_WRITEABLE 2185 * is clear, no page table entries need updating. 2186 */ 2187 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2188 if ((m->oflags & VPO_BUSY) == 0 && 2189 (m->aflags & PGA_WRITEABLE) == 0) 2190 return; 2191 vm_page_lock_queues(); 2192 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2193 pmap = pv->pv_pmap; 2194 PMAP_LOCK(pmap); 2195 oldpmap = pmap_switch(pmap); 2196 pte = pmap_find_vhpt(pv->pv_va); 2197 KASSERT(pte != NULL, ("pte")); 2198 prot = pmap_prot(pte); 2199 if ((prot & VM_PROT_WRITE) != 0) { 2200 if (pmap_dirty(pte)) { 2201 vm_page_dirty(m); 2202 pmap_clear_dirty(pte); 2203 } 2204 prot &= ~VM_PROT_WRITE; 2205 pmap_pte_prot(pmap, pte, prot); 2206 pmap_pte_attr(pte, m->md.memattr); 2207 pmap_invalidate_page(pv->pv_va); 2208 } 2209 pmap_switch(oldpmap); 2210 PMAP_UNLOCK(pmap); 2211 } 2212 vm_page_aflag_clear(m, PGA_WRITEABLE); 2213 vm_page_unlock_queues(); 2214} 2215 2216/* 2217 * Map a set of physical memory pages into the kernel virtual 2218 * address space. Return a pointer to where it is mapped. This 2219 * routine is intended to be used for mapping device memory, 2220 * NOT real memory. 2221 */ 2222void * 2223pmap_mapdev(vm_paddr_t pa, vm_size_t sz) 2224{ 2225 static void *last_va = NULL; 2226 static vm_paddr_t last_pa = 0; 2227 static vm_size_t last_sz = 0; 2228 struct efi_md *md; 2229 vm_offset_t va; 2230 2231 if (pa == last_pa && sz == last_sz) 2232 return (last_va); 2233 2234 md = efi_md_find(pa); 2235 if (md == NULL) { 2236 printf("%s: [%#lx..%#lx] not covered by memory descriptor\n", 2237 __func__, pa, pa + sz - 1); 2238 return (NULL); 2239 } 2240 2241 if (md->md_type == EFI_MD_TYPE_FREE) { 2242 printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa, 2243 pa + sz - 1); 2244 return (NULL); 2245 } 2246 2247 va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) : 2248 IA64_PHYS_TO_RR6(pa); 2249 2250 last_va = (void *)va; 2251 last_pa = pa; 2252 last_sz = sz; 2253 return (last_va); 2254} 2255 2256/* 2257 * 'Unmap' a range mapped by pmap_mapdev(). 2258 */ 2259void 2260pmap_unmapdev(vm_offset_t va, vm_size_t size) 2261{ 2262} 2263 2264/* 2265 * Sets the memory attribute for the specified page. 2266 */ 2267static void 2268pmap_page_set_memattr_1(void *arg) 2269{ 2270 struct ia64_pal_result res; 2271 register_t is; 2272 uintptr_t pp = (uintptr_t)arg; 2273 2274 is = intr_disable(); 2275 res = ia64_call_pal_static(pp, 0, 0, 0); 2276 intr_restore(is); 2277} 2278 2279void 2280pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 2281{ 2282 struct ia64_lpte *pte; 2283 pmap_t oldpmap; 2284 pv_entry_t pv; 2285 void *va; 2286 2287 vm_page_lock_queues(); 2288 m->md.memattr = ma; 2289 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2290 PMAP_LOCK(pv->pv_pmap); 2291 oldpmap = pmap_switch(pv->pv_pmap); 2292 pte = pmap_find_vhpt(pv->pv_va); 2293 KASSERT(pte != NULL, ("pte")); 2294 pmap_pte_attr(pte, ma); 2295 pmap_invalidate_page(pv->pv_va); 2296 pmap_switch(oldpmap); 2297 PMAP_UNLOCK(pv->pv_pmap); 2298 } 2299 vm_page_unlock_queues(); 2300 2301 if (ma == VM_MEMATTR_UNCACHEABLE) { 2302#ifdef SMP 2303 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL, 2304 (void *)PAL_PREFETCH_VISIBILITY); 2305#else 2306 pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY); 2307#endif 2308 va = (void *)pmap_page_to_va(m); 2309 critical_enter(); 2310 cpu_flush_dcache(va, PAGE_SIZE); 2311 critical_exit(); 2312#ifdef SMP 2313 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL, 2314 (void *)PAL_MC_DRAIN); 2315#else 2316 pmap_page_set_memattr_1((void *)PAL_MC_DRAIN); 2317#endif 2318 } 2319} 2320 2321/* 2322 * perform the pmap work for mincore 2323 */ 2324int 2325pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2326{ 2327 pmap_t oldpmap; 2328 struct ia64_lpte *pte, tpte; 2329 vm_paddr_t pa; 2330 int val; 2331 2332 PMAP_LOCK(pmap); 2333retry: 2334 oldpmap = pmap_switch(pmap); 2335 pte = pmap_find_vhpt(addr); 2336 if (pte != NULL) { 2337 tpte = *pte; 2338 pte = &tpte; 2339 } 2340 pmap_switch(oldpmap); 2341 if (pte == NULL || !pmap_present(pte)) { 2342 val = 0; 2343 goto out; 2344 } 2345 val = MINCORE_INCORE; 2346 if (pmap_dirty(pte)) 2347 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2348 if (pmap_accessed(pte)) 2349 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2350 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 2351 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 2352 pmap_managed(pte)) { 2353 pa = pmap_ppn(pte); 2354 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 2355 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 2356 goto retry; 2357 } else 2358out: 2359 PA_UNLOCK_COND(*locked_pa); 2360 PMAP_UNLOCK(pmap); 2361 return (val); 2362} 2363 2364void 2365pmap_activate(struct thread *td) 2366{ 2367 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace)); 2368} 2369 2370pmap_t 2371pmap_switch(pmap_t pm) 2372{ 2373 pmap_t prevpm; 2374 int i; 2375 2376 critical_enter(); 2377 prevpm = PCPU_GET(md.current_pmap); 2378 if (prevpm == pm) 2379 goto out; 2380 if (pm == NULL) { 2381 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2382 ia64_set_rr(IA64_RR_BASE(i), 2383 (i << 8)|(PAGE_SHIFT << 2)|1); 2384 } 2385 } else { 2386 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2387 ia64_set_rr(IA64_RR_BASE(i), 2388 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1); 2389 } 2390 } 2391 PCPU_SET(md.current_pmap, pm); 2392 ia64_srlz_d(); 2393 2394out: 2395 critical_exit(); 2396 return (prevpm); 2397} 2398 2399void 2400pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2401{ 2402 pmap_t oldpm; 2403 struct ia64_lpte *pte; 2404 vm_offset_t lim; 2405 vm_size_t len; 2406 2407 sz += va & 31; 2408 va &= ~31; 2409 sz = (sz + 31) & ~31; 2410 2411 PMAP_LOCK(pm); 2412 oldpm = pmap_switch(pm); 2413 while (sz > 0) { 2414 lim = round_page(va); 2415 len = MIN(lim - va, sz); 2416 pte = pmap_find_vhpt(va); 2417 if (pte != NULL && pmap_present(pte)) 2418 ia64_sync_icache(va, len); 2419 va += len; 2420 sz -= len; 2421 } 2422 pmap_switch(oldpm); 2423 PMAP_UNLOCK(pm); 2424} 2425 2426/* 2427 * Increase the starting virtual address of the given mapping if a 2428 * different alignment might result in more superpage mappings. 2429 */ 2430void 2431pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2432 vm_offset_t *addr, vm_size_t size) 2433{ 2434} 2435 2436#include "opt_ddb.h" 2437 2438#ifdef DDB 2439 2440#include <ddb/ddb.h> 2441 2442static const char* psnames[] = { 2443 "1B", "2B", "4B", "8B", 2444 "16B", "32B", "64B", "128B", 2445 "256B", "512B", "1K", "2K", 2446 "4K", "8K", "16K", "32K", 2447 "64K", "128K", "256K", "512K", 2448 "1M", "2M", "4M", "8M", 2449 "16M", "32M", "64M", "128M", 2450 "256M", "512M", "1G", "2G" 2451}; 2452 2453static void 2454print_trs(int type) 2455{ 2456 struct ia64_pal_result res; 2457 int i, maxtr; 2458 struct { 2459 pt_entry_t pte; 2460 uint64_t itir; 2461 uint64_t ifa; 2462 struct ia64_rr rr; 2463 } buf; 2464 static const char *manames[] = { 2465 "WB", "bad", "bad", "bad", 2466 "UC", "UCE", "WC", "NaT", 2467 }; 2468 2469 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 2470 if (res.pal_status != 0) { 2471 db_printf("Can't get VM summary\n"); 2472 return; 2473 } 2474 2475 if (type == 0) 2476 maxtr = (res.pal_result[0] >> 40) & 0xff; 2477 else 2478 maxtr = (res.pal_result[0] >> 32) & 0xff; 2479 2480 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n"); 2481 for (i = 0; i <= maxtr; i++) { 2482 bzero(&buf, sizeof(buf)); 2483 res = ia64_pal_physical(PAL_VM_TR_READ, i, type, 2484 ia64_tpa((uint64_t)&buf)); 2485 if (!(res.pal_result[0] & 1)) 2486 buf.pte &= ~PTE_AR_MASK; 2487 if (!(res.pal_result[0] & 2)) 2488 buf.pte &= ~PTE_PL_MASK; 2489 if (!(res.pal_result[0] & 4)) 2490 pmap_clear_dirty(&buf); 2491 if (!(res.pal_result[0] & 8)) 2492 buf.pte &= ~PTE_MA_MASK; 2493 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s " 2494 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid, 2495 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12, 2496 psnames[(buf.itir & ITIR_PS_MASK) >> 2], 2497 (buf.pte & PTE_ED) ? 1 : 0, 2498 (int)(buf.pte & PTE_AR_MASK) >> 9, 2499 (int)(buf.pte & PTE_PL_MASK) >> 7, 2500 (pmap_dirty(&buf)) ? 1 : 0, 2501 (pmap_accessed(&buf)) ? 1 : 0, 2502 manames[(buf.pte & PTE_MA_MASK) >> 2], 2503 (pmap_present(&buf)) ? 1 : 0, 2504 (int)((buf.itir & ITIR_KEY_MASK) >> 8)); 2505 } 2506} 2507 2508DB_COMMAND(itr, db_itr) 2509{ 2510 print_trs(0); 2511} 2512 2513DB_COMMAND(dtr, db_dtr) 2514{ 2515 print_trs(1); 2516} 2517 2518DB_COMMAND(rr, db_rr) 2519{ 2520 int i; 2521 uint64_t t; 2522 struct ia64_rr rr; 2523 2524 printf("RR RID PgSz VE\n"); 2525 for (i = 0; i < 8; i++) { 2526 __asm __volatile ("mov %0=rr[%1]" 2527 : "=r"(t) 2528 : "r"(IA64_RR_BASE(i))); 2529 *(uint64_t *) &rr = t; 2530 printf("%d %06x %4s %d\n", 2531 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve); 2532 } 2533} 2534 2535DB_COMMAND(thash, db_thash) 2536{ 2537 if (!have_addr) 2538 return; 2539 2540 db_printf("%p\n", (void *) ia64_thash(addr)); 2541} 2542 2543DB_COMMAND(ttag, db_ttag) 2544{ 2545 if (!have_addr) 2546 return; 2547 2548 db_printf("0x%lx\n", ia64_ttag(addr)); 2549} 2550 2551DB_COMMAND(kpte, db_kpte) 2552{ 2553 struct ia64_lpte *pte; 2554 2555 if (!have_addr) { 2556 db_printf("usage: kpte <kva>\n"); 2557 return; 2558 } 2559 if (addr < VM_MIN_KERNEL_ADDRESS) { 2560 db_printf("kpte: error: invalid <kva>\n"); 2561 return; 2562 } 2563 pte = pmap_find_kpte(addr); 2564 db_printf("kpte at %p:\n", pte); 2565 db_printf(" pte =%016lx\n", pte->pte); 2566 db_printf(" itir =%016lx\n", pte->itir); 2567 db_printf(" tag =%016lx\n", pte->tag); 2568 db_printf(" chain=%016lx\n", pte->chain); 2569} 2570 2571#endif 2572