pmap.c revision 109623
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 1998,2000 Doug Rabson 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp 45 * with some ideas from NetBSD's alpha pmap 46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 109623 2003-01-21 08:56:16Z alfred $ 47 */ 48 49/* 50 * Manages physical address maps. 51 * 52 * In addition to hardware address maps, this 53 * module is called upon to provide software-use-only 54 * maps which may or may not be stored in the same 55 * form as hardware maps. These pseudo-maps are 56 * used to store intermediate results from copy 57 * operations to and from address spaces. 58 * 59 * Since the information managed by this module is 60 * also stored by the logical address mapping module, 61 * this module may throw away valid virtual-to-physical 62 * mappings at almost any time. However, invalidations 63 * of virtual-to-physical mappings must be done as 64 * requested. 65 * 66 * In order to cope with hardware architectures which 67 * make virtual-to-physical map invalidates expensive, 68 * this module may delay invalidate or reduced protection 69 * operations until such time as they are actually 70 * necessary. This module is given full information as 71 * to which processors are currently using which maps, 72 * and to when physical maps must be made correct. 73 */ 74 75/* 76 * Following the Linux model, region IDs are allocated in groups of 77 * eight so that a single region ID can be used for as many RRs as we 78 * want by encoding the RR number into the low bits of the ID. 79 * 80 * We reserve region ID 0 for the kernel and allocate the remaining 81 * IDs for user pmaps. 82 * 83 * Region 0..4 84 * User virtually mapped 85 * 86 * Region 5 87 * Kernel virtually mapped 88 * 89 * Region 6 90 * Kernel physically mapped uncacheable 91 * 92 * Region 7 93 * Kernel physically mapped cacheable 94 */ 95 96#include <sys/param.h> 97#include <sys/kernel.h> 98#include <sys/lock.h> 99#include <sys/malloc.h> 100#include <sys/mman.h> 101#include <sys/msgbuf.h> 102#include <sys/mutex.h> 103#include <sys/proc.h> 104#include <sys/sx.h> 105#include <sys/systm.h> 106#include <sys/vmmeter.h> 107#include <sys/smp.h> 108#include <sys/sysctl.h> 109 110#include <vm/vm.h> 111#include <vm/vm_param.h> 112#include <vm/vm_kern.h> 113#include <vm/vm_page.h> 114#include <vm/vm_map.h> 115#include <vm/vm_object.h> 116#include <vm/vm_extern.h> 117#include <vm/vm_pageout.h> 118#include <vm/vm_pager.h> 119#include <vm/uma.h> 120#include <vm/uma_int.h> 121 122#include <sys/user.h> 123 124#include <machine/pal.h> 125#include <machine/md_var.h> 126 127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures"); 128 129#ifndef PMAP_SHPGPERPROC 130#define PMAP_SHPGPERPROC 200 131#endif 132 133#if defined(DIAGNOSTIC) 134#define PMAP_DIAGNOSTIC 135#endif 136 137#define MINPV 2048 /* Preallocate at least this many */ 138#define MAXPV 20480 /* But no more than this */ 139 140#if 0 141#define PMAP_DIAGNOSTIC 142#define PMAP_DEBUG 143#endif 144 145#if !defined(PMAP_DIAGNOSTIC) 146#define PMAP_INLINE __inline 147#else 148#define PMAP_INLINE 149#endif 150 151/* 152 * Get PDEs and PTEs for user/kernel address space 153 */ 154#define pmap_pte_w(pte) ((pte)->pte_ig & PTE_IG_WIRED) 155#define pmap_pte_managed(pte) ((pte)->pte_ig & PTE_IG_MANAGED) 156#define pmap_pte_v(pte) ((pte)->pte_p) 157#define pmap_pte_pa(pte) (((pte)->pte_ppn) << 12) 158#define pmap_pte_prot(pte) (((pte)->pte_ar << 2) | (pte)->pte_pl) 159 160#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \ 161 :((pte)->pte_ig &= ~PTE_IG_WIRED)) 162#define pmap_pte_set_prot(pte, v) do { \ 163 (pte)->pte_ar = v >> 2; \ 164 (pte)->pte_pl = v & 3; \ 165} while (0) 166 167/* 168 * Given a map and a machine independent protection code, 169 * convert to an ia64 protection code. 170 */ 171#define pte_prot(m, p) (protection_codes[m == kernel_pmap ? 0 : 1][p]) 172#define pte_prot_pl(m, p) (pte_prot(m, p) & 3) 173#define pte_prot_ar(m, p) (pte_prot(m, p) >> 2) 174int protection_codes[2][8]; 175 176/* 177 * Return non-zero if this pmap is currently active 178 */ 179#define pmap_isactive(pmap) (pmap->pm_active) 180 181/* 182 * Statically allocated kernel pmap 183 */ 184struct pmap kernel_pmap_store; 185 186vm_offset_t avail_start; /* PA of first available physical page */ 187vm_offset_t avail_end; /* PA of last available physical page */ 188vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 189vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 190static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 191 192vm_offset_t vhpt_base, vhpt_size; 193 194/* 195 * We use an object to own the kernel's 'page tables'. For simplicity, 196 * we use one page directory to index a set of pages containing 197 * ia64_lptes. This gives us up to 2Gb of kernel virtual space. 198 */ 199static vm_object_t kptobj; 200static int nkpt; 201static struct ia64_lpte **kptdir; 202#define KPTE_DIR_INDEX(va) \ 203 ((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1)) 204#define KPTE_PTE_INDEX(va) \ 205 ((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1)) 206#define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte)) 207 208vm_offset_t kernel_vm_end; 209 210/* 211 * Values for ptc.e. XXX values for SKI. 212 */ 213static u_int64_t pmap_ptc_e_base = 0x100000000; 214static u_int64_t pmap_ptc_e_count1 = 3; 215static u_int64_t pmap_ptc_e_count2 = 2; 216static u_int64_t pmap_ptc_e_stride1 = 0x2000; 217static u_int64_t pmap_ptc_e_stride2 = 0x100000000; 218 219/* 220 * Data for the RID allocator 221 */ 222static u_int64_t *pmap_ridbusy; 223static int pmap_ridmax, pmap_ridcount; 224struct mtx pmap_ridmutex; 225 226/* 227 * Data for the pv entry allocation mechanism 228 */ 229static uma_zone_t pvzone; 230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 231int pmap_pagedaemon_waken; 232static struct pv_entry *pvbootentries; 233static int pvbootnext, pvbootmax; 234 235/* 236 * Data for allocating PTEs for user processes. 237 */ 238static uma_zone_t ptezone; 239 240/* 241 * VHPT instrumentation. 242 */ 243static int pmap_vhpt_inserts; 244static int pmap_vhpt_collisions; 245static int pmap_vhpt_resident; 246SYSCTL_DECL(_vm_stats); 247SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, ""); 248SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD, 249 &pmap_vhpt_inserts, 0, ""); 250SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD, 251 &pmap_vhpt_collisions, 0, ""); 252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD, 253 &pmap_vhpt_resident, 0, ""); 254 255static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 256static pv_entry_t get_pv_entry(void); 257static void ia64_protection_init(void); 258 259static void pmap_invalidate_all(pmap_t pmap); 260static void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m); 261 262vm_offset_t 263pmap_steal_memory(vm_size_t size) 264{ 265 vm_size_t bank_size; 266 vm_offset_t pa, va; 267 268 size = round_page(size); 269 270 bank_size = phys_avail[1] - phys_avail[0]; 271 while (size > bank_size) { 272 int i; 273 for (i = 0; phys_avail[i+2]; i+= 2) { 274 phys_avail[i] = phys_avail[i+2]; 275 phys_avail[i+1] = phys_avail[i+3]; 276 } 277 phys_avail[i] = 0; 278 phys_avail[i+1] = 0; 279 if (!phys_avail[0]) 280 panic("pmap_steal_memory: out of memory"); 281 bank_size = phys_avail[1] - phys_avail[0]; 282 } 283 284 pa = phys_avail[0]; 285 phys_avail[0] += size; 286 287 va = IA64_PHYS_TO_RR7(pa); 288 bzero((caddr_t) va, size); 289 return va; 290} 291 292/* 293 * Bootstrap the system enough to run with virtual memory. 294 */ 295void 296pmap_bootstrap() 297{ 298 int i, j, count, ridbits; 299 struct ia64_pal_result res; 300 301 /* 302 * Query the PAL Code to find the loop parameters for the 303 * ptc.e instruction. 304 */ 305 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0); 306 if (res.pal_status != 0) 307 panic("Can't configure ptc.e parameters"); 308 pmap_ptc_e_base = res.pal_result[0]; 309 pmap_ptc_e_count1 = res.pal_result[1] >> 32; 310 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1); 311 pmap_ptc_e_stride1 = res.pal_result[2] >> 32; 312 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1); 313 if (bootverbose) 314 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, " 315 "stride1=0x%lx, stride2=0x%lx\n", 316 pmap_ptc_e_base, 317 pmap_ptc_e_count1, 318 pmap_ptc_e_count2, 319 pmap_ptc_e_stride1, 320 pmap_ptc_e_stride2); 321 322 /* 323 * Setup RIDs. RIDs 0..7 are reserved for the kernel. 324 */ 325 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 326 if (res.pal_status != 0) { 327 if (bootverbose) 328 printf("Can't read VM Summary - assuming 18 Region ID bits\n"); 329 ridbits = 18; /* guaranteed minimum */ 330 } else { 331 ridbits = (res.pal_result[1] >> 8) & 0xff; 332 if (bootverbose) 333 printf("Processor supports %d Region ID bits\n", 334 ridbits); 335 } 336 pmap_ridmax = (1 << ridbits); 337 pmap_ridcount = 8; 338 pmap_ridbusy = (u_int64_t *) 339 pmap_steal_memory(pmap_ridmax / 8); 340 bzero(pmap_ridbusy, pmap_ridmax / 8); 341 pmap_ridbusy[0] |= 0xff; 342 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF); 343 344 /* 345 * Allocate some memory for initial kernel 'page tables'. 346 */ 347 kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE); 348 for (i = 0; i < NKPT; i++) { 349 kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE); 350 } 351 nkpt = NKPT; 352 353 avail_start = phys_avail[0]; 354 for (i = 0; phys_avail[i+2]; i+= 2) ; 355 avail_end = phys_avail[i+1]; 356 count = i+2; 357 358 /* 359 * Figure out a useful size for the VHPT, based on the size of 360 * physical memory and try to locate a region which is large 361 * enough to contain the VHPT (which must be a power of two in 362 * size and aligned to a natural boundary). 363 * Don't use the difference between avail_start and avail_end 364 * as a measure for memory size. The address space is often 365 * enough sparse, causing us to (try to) create a huge VHPT. 366 */ 367 vhpt_size = 15; 368 while ((1<<vhpt_size) < ia64_btop(Maxmem) * 32) 369 vhpt_size++; 370 371 vhpt_base = 0; 372 while (!vhpt_base) { 373 vm_offset_t mask; 374 if (bootverbose) 375 printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size)); 376 mask = (1L << vhpt_size) - 1; 377 for (i = 0; i < count; i += 2) { 378 vm_offset_t base, limit; 379 base = (phys_avail[i] + mask) & ~mask; 380 limit = base + (1L << vhpt_size); 381 if (limit <= phys_avail[i+1]) 382 /* 383 * VHPT can fit in this region 384 */ 385 break; 386 } 387 if (!phys_avail[i]) { 388 /* 389 * Can't fit, try next smaller size. 390 */ 391 vhpt_size--; 392 } else { 393 vhpt_base = (phys_avail[i] + mask) & ~mask; 394 } 395 } 396 if (vhpt_size < 15) 397 panic("Can't find space for VHPT"); 398 399 if (bootverbose) 400 printf("Putting VHPT at %p\n", (void *) vhpt_base); 401 if (vhpt_base != phys_avail[i]) { 402 /* 403 * Split this region. 404 */ 405 if (bootverbose) 406 printf("Splitting [%p-%p]\n", 407 (void *) phys_avail[i], 408 (void *) phys_avail[i+1]); 409 for (j = count; j > i; j -= 2) { 410 phys_avail[j] = phys_avail[j-2]; 411 phys_avail[j+1] = phys_avail[j-2+1]; 412 } 413 phys_avail[count+2] = 0; 414 phys_avail[count+3] = 0; 415 phys_avail[i+1] = vhpt_base; 416 phys_avail[i+2] = vhpt_base + (1L << vhpt_size); 417 } else { 418 phys_avail[i] = vhpt_base + (1L << vhpt_size); 419 } 420 421 vhpt_base = IA64_PHYS_TO_RR7(vhpt_base); 422 bzero((void *) vhpt_base, (1L << vhpt_size)); 423 __asm __volatile("mov cr.pta=%0;; srlz.i;;" 424 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1)); 425 426 virtual_avail = IA64_RR_BASE(5); 427 virtual_end = IA64_RR_BASE(6)-1; 428 429 /* 430 * Initialize protection array. 431 */ 432 ia64_protection_init(); 433 434 /* 435 * Initialize the kernel pmap (which is statically allocated). 436 */ 437 for (i = 0; i < 5; i++) 438 kernel_pmap->pm_rid[i] = 0; 439 kernel_pmap->pm_active = 1; 440 TAILQ_INIT(&kernel_pmap->pm_pvlist); 441 PCPU_SET(current_pmap, kernel_pmap); 442 443 /* 444 * Region 5 is mapped via the vhpt. 445 */ 446 ia64_set_rr(IA64_RR_BASE(5), 447 (5 << 8) | (PAGE_SHIFT << 2) | 1); 448 449 /* 450 * Region 6 is direct mapped UC and region 7 is direct mapped 451 * WC. The details of this is controlled by the Alt {I,D}TLB 452 * handlers. Here we just make sure that they have the largest 453 * possible page size to minimise TLB usage. 454 */ 455 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2)); 456 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2)); 457 458 /* 459 * Reserve some memory for allocating pvs while bootstrapping 460 * the pv allocator. We need to have enough to cover mapping 461 * the kmem_alloc region used to allocate the initial_pvs in 462 * pmap_init. In general, the size of this region is 463 * approximately (# physical pages) * (size of pv entry). 464 */ 465 pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128; 466 pvbootentries = (struct pv_entry *) 467 pmap_steal_memory(pvbootmax * sizeof(struct pv_entry)); 468 pvbootnext = 0; 469 470 /* 471 * Clear out any random TLB entries left over from booting. 472 */ 473 pmap_invalidate_all(kernel_pmap); 474} 475 476void * 477uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 478{ 479 static vm_pindex_t color; 480 vm_page_t m; 481 int pflags; 482 void *va; 483 484 *flags = UMA_SLAB_PRIV; 485 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 486 pflags = VM_ALLOC_INTERRUPT; 487 else 488 pflags = VM_ALLOC_SYSTEM; 489 if (wait & M_ZERO) 490 pflags |= VM_ALLOC_ZERO; 491 492 for (;;) { 493 m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ); 494 if (m == NULL) { 495 if (wait & M_NOWAIT) 496 return (NULL); 497 else 498 VM_WAIT; 499 } else 500 break; 501 } 502 503 va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 504 if ((m->flags & PG_ZERO) == 0) 505 bzero(va, PAGE_SIZE); 506 return (va); 507} 508 509void 510uma_small_free(void *mem, int size, u_int8_t flags) 511{ 512 vm_page_t m; 513 514 m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem)); 515 vm_page_lock_queues(); 516 vm_page_free(m); 517 vm_page_unlock_queues(); 518} 519 520/* 521 * Initialize the pmap module. 522 * Called by vm_init, to initialize any structures that the pmap 523 * system needs to map virtual memory. 524 * pmap_init has been enhanced to support in a fairly consistant 525 * way, discontiguous physical memory. 526 */ 527void 528pmap_init(vm_offset_t phys_start, vm_offset_t phys_end) 529{ 530 int i; 531 int initial_pvs; 532 533 /* 534 * Allocate memory for random pmap data structures. Includes the 535 * pv_head_table. 536 */ 537 538 for(i = 0; i < vm_page_array_size; i++) { 539 vm_page_t m; 540 541 m = &vm_page_array[i]; 542 TAILQ_INIT(&m->md.pv_list); 543 m->md.pv_list_count = 0; 544 } 545 546 /* 547 * Init the pv free list and the PTE free list. 548 */ 549 initial_pvs = vm_page_array_size; 550 if (initial_pvs < MINPV) 551 initial_pvs = MINPV; 552 if (initial_pvs > MAXPV) 553 initial_pvs = MAXPV; 554 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), 555 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); 556 uma_prealloc(pvzone, initial_pvs); 557 558 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte), 559 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); 560 uma_prealloc(ptezone, initial_pvs); 561 562 /* 563 * Create the object for the kernel's page tables. 564 */ 565 kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT); 566 567 /* 568 * Now it is safe to enable pv_table recording. 569 */ 570 pmap_initialized = TRUE; 571} 572 573/* 574 * Initialize the address space (zone) for the pv_entries. Set a 575 * high water mark so that the system can recover from excessive 576 * numbers of pv entries. 577 */ 578void 579pmap_init2() 580{ 581 int shpgperproc = PMAP_SHPGPERPROC; 582 583 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 584 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 585 pv_entry_high_water = 9 * (pv_entry_max / 10); 586} 587 588 589/*************************************************** 590 * Manipulate TLBs for a pmap 591 ***************************************************/ 592 593static void 594pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 595{ 596 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)), 597 ("invalidating TLB for non-current pmap")); 598 ia64_ptc_g(va, PAGE_SHIFT << 2); 599} 600 601static void 602pmap_invalidate_all_1(void *arg) 603{ 604 u_int64_t addr; 605 int i, j; 606 register_t psr; 607 608 psr = intr_disable(); 609 addr = pmap_ptc_e_base; 610 for (i = 0; i < pmap_ptc_e_count1; i++) { 611 for (j = 0; j < pmap_ptc_e_count2; j++) { 612 ia64_ptc_e(addr); 613 addr += pmap_ptc_e_stride2; 614 } 615 addr += pmap_ptc_e_stride1; 616 } 617 intr_restore(psr); 618} 619 620static void 621pmap_invalidate_all(pmap_t pmap) 622{ 623 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)), 624 ("invalidating TLB for non-current pmap")); 625 626 627#ifdef SMP 628 smp_rendezvous(0, pmap_invalidate_all_1, 0, 0); 629#else 630 pmap_invalidate_all_1(0); 631#endif 632} 633 634static u_int32_t 635pmap_allocate_rid(void) 636{ 637 int rid; 638 639 if (pmap_ridcount == pmap_ridmax) 640 panic("pmap_allocate_rid: All Region IDs used"); 641 642 do { 643 rid = arc4random() & (pmap_ridmax - 1); 644 } while (pmap_ridbusy[rid / 64] & (1L << (rid & 63))); 645 pmap_ridbusy[rid / 64] |= (1L << (rid & 63)); 646 pmap_ridcount++; 647 648 return rid; 649} 650 651static void 652pmap_free_rid(u_int32_t rid) 653{ 654 mtx_lock(&pmap_ridmutex); 655 pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63)); 656 pmap_ridcount--; 657 mtx_unlock(&pmap_ridmutex); 658} 659 660static void 661pmap_ensure_rid(pmap_t pmap, vm_offset_t va) 662{ 663 int rr; 664 665 rr = va >> 61; 666 667 /* 668 * We get called for virtual addresses that may just as well be 669 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field 670 * only holds region IDs for user regions, we have to make sure 671 * the region is within bounds. 672 */ 673 if (rr >= 5) 674 return; 675 676 if (pmap->pm_rid[rr]) 677 return; 678 679 mtx_lock(&pmap_ridmutex); 680 pmap->pm_rid[rr] = pmap_allocate_rid(); 681 if (pmap == PCPU_GET(current_pmap)) 682 ia64_set_rr(IA64_RR_BASE(rr), 683 (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1); 684 mtx_unlock(&pmap_ridmutex); 685} 686 687/*************************************************** 688 * Low level helper routines..... 689 ***************************************************/ 690 691/* 692 * Install a pte into the VHPT 693 */ 694static PMAP_INLINE void 695pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte) 696{ 697 u_int64_t *vhp, *p; 698 699 /* invalidate the pte */ 700 atomic_set_64(&vhpte->pte_tag, 1L << 63); 701 ia64_mf(); /* make sure everyone sees */ 702 703 vhp = (u_int64_t *) vhpte; 704 p = (u_int64_t *) pte; 705 706 vhp[0] = p[0]; 707 vhp[1] = p[1]; 708 vhp[2] = p[2]; /* sets ti to one */ 709 710 ia64_mf(); 711} 712 713/* 714 * Compare essential parts of pte. 715 */ 716static PMAP_INLINE int 717pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2) 718{ 719 return *(u_int64_t *) pte1 == *(u_int64_t *) pte2; 720} 721 722/* 723 * this routine defines the region(s) of memory that should 724 * not be tested for the modified bit. 725 */ 726static PMAP_INLINE int 727pmap_track_modified(vm_offset_t va) 728{ 729 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 730 return 1; 731 else 732 return 0; 733} 734 735#ifndef KSTACK_MAX_PAGES 736#define KSTACK_MAX_PAGES 32 737#endif 738 739/* 740 * Create the KSTACK for a new thread. 741 * This routine directly affects the fork perf for a process/thread. 742 */ 743void 744pmap_new_thread(struct thread *td, int pages) 745{ 746 vm_offset_t *ks; 747 748 /* Bounds check */ 749 if (pages <= 1) 750 pages = KSTACK_PAGES; 751 else if (pages > KSTACK_MAX_PAGES) 752 pages = KSTACK_MAX_PAGES; 753 754 /* 755 * Use contigmalloc for user area so that we can use a region 756 * 7 address for it which makes it impossible to accidentally 757 * lose when recording a trapframe. 758 */ 759 ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, 0, 0ul, 760 256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024); 761 if (ks == NULL) 762 panic("pmap_new_thread: could not contigmalloc %d pages\n", 763 pages); 764 765 td->td_md.md_kstackvirt = ks; 766 td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks)); 767 td->td_kstack_pages = pages; 768} 769 770/* 771 * Dispose the KSTACK for a thread that has exited. 772 * This routine directly impacts the exit perf of a process/thread. 773 */ 774void 775pmap_dispose_thread(struct thread *td) 776{ 777 int pages; 778 779 pages = td->td_kstack_pages; 780 contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP); 781 td->td_md.md_kstackvirt = NULL; 782 td->td_kstack = 0; 783} 784 785/* 786 * Set up a variable sized alternate kstack. This appears to be MI. 787 */ 788void 789pmap_new_altkstack(struct thread *td, int pages) 790{ 791 792 /* 793 * Shuffle the original stack. Save the virtual kstack address 794 * instead of the physical address because 1) we can derive the 795 * physical address from the virtual address and 2) we need the 796 * virtual address in pmap_dispose_thread. 797 */ 798 td->td_altkstack_obj = td->td_kstack_obj; 799 td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt; 800 td->td_altkstack_pages = td->td_kstack_pages; 801 802 pmap_new_thread(td, pages); 803} 804 805void 806pmap_dispose_altkstack(struct thread *td) 807{ 808 809 pmap_dispose_thread(td); 810 811 /* 812 * Restore the original kstack. Note that td_altkstack holds the 813 * virtual kstack address of the previous kstack. 814 */ 815 td->td_md.md_kstackvirt = (void*)td->td_altkstack; 816 td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack)); 817 td->td_kstack_obj = td->td_altkstack_obj; 818 td->td_kstack_pages = td->td_altkstack_pages; 819 td->td_altkstack = 0; 820 td->td_altkstack_obj = NULL; 821 td->td_altkstack_pages = 0; 822} 823 824/* 825 * Allow the KSTACK for a thread to be prejudicially paged out. 826 */ 827void 828pmap_swapout_thread(struct thread *td) 829{ 830} 831 832/* 833 * Bring the KSTACK for a specified thread back in. 834 */ 835void 836pmap_swapin_thread(struct thread *td) 837{ 838} 839 840/*************************************************** 841 * Page table page management routines..... 842 ***************************************************/ 843 844void 845pmap_pinit0(struct pmap *pmap) 846{ 847 /* kernel_pmap is the same as any other pmap. */ 848 pmap_pinit(pmap); 849} 850 851/* 852 * Initialize a preallocated and zeroed pmap structure, 853 * such as one in a vmspace structure. 854 */ 855void 856pmap_pinit(struct pmap *pmap) 857{ 858 int i; 859 860 pmap->pm_flags = 0; 861 for (i = 0; i < 5; i++) 862 pmap->pm_rid[i] = 0; 863 pmap->pm_ptphint = NULL; 864 pmap->pm_active = 0; 865 TAILQ_INIT(&pmap->pm_pvlist); 866 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 867} 868 869/* 870 * Wire in kernel global address entries. To avoid a race condition 871 * between pmap initialization and pmap_growkernel, this procedure 872 * should be called after the vmspace is attached to the process 873 * but before this pmap is activated. 874 */ 875void 876pmap_pinit2(struct pmap *pmap) 877{ 878} 879 880/*************************************************** 881* Pmap allocation/deallocation routines. 882 ***************************************************/ 883 884/* 885 * Release any resources held by the given physical map. 886 * Called when a pmap initialized by pmap_pinit is being released. 887 * Should only be called if the map contains no valid mappings. 888 */ 889void 890pmap_release(pmap_t pmap) 891{ 892 int i; 893 894 for (i = 0; i < 5; i++) 895 if (pmap->pm_rid[i]) 896 pmap_free_rid(pmap->pm_rid[i]); 897} 898 899/* 900 * grow the number of kernel page table entries, if needed 901 */ 902void 903pmap_growkernel(vm_offset_t addr) 904{ 905 struct ia64_lpte *ptepage; 906 vm_page_t nkpg; 907 908 if (kernel_vm_end == 0) { 909 kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG 910 + IA64_RR_BASE(5); 911 } 912 addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1); 913 while (kernel_vm_end < addr) { 914 if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) { 915 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) 916 & ~(PAGE_SIZE * NKPTEPG - 1); 917 continue; 918 } 919 920 /* 921 * We could handle more by increasing the size of kptdir. 922 */ 923 if (nkpt == MAXKPT) 924 panic("pmap_growkernel: out of kernel address space"); 925 926 /* 927 * This index is bogus, but out of the way 928 */ 929 nkpg = vm_page_alloc(kptobj, nkpt, 930 VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 931 if (!nkpg) 932 panic("pmap_growkernel: no memory to grow kernel"); 933 934 nkpt++; 935 ptepage = (struct ia64_lpte *) 936 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg)); 937 bzero(ptepage, PAGE_SIZE); 938 kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage; 939 940 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1); 941 } 942} 943 944/*************************************************** 945 * page management routines. 946 ***************************************************/ 947 948/* 949 * free the pv_entry back to the free list 950 */ 951static PMAP_INLINE void 952free_pv_entry(pv_entry_t pv) 953{ 954 pv_entry_count--; 955 uma_zfree(pvzone, pv); 956} 957 958/* 959 * get a new pv_entry, allocating a block from the system 960 * when needed. 961 * the memory allocation is performed bypassing the malloc code 962 * because of the possibility of allocations at interrupt time. 963 */ 964static pv_entry_t 965get_pv_entry(void) 966{ 967 pv_entry_count++; 968 if (pv_entry_high_water && 969 (pv_entry_count > pv_entry_high_water) && 970 (pmap_pagedaemon_waken == 0)) { 971 pmap_pagedaemon_waken = 1; 972 wakeup (&vm_pages_needed); 973 } 974 return uma_zalloc(pvzone, M_NOWAIT); 975} 976 977/* 978 * Add an ia64_lpte to the VHPT. 979 */ 980static void 981pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va) 982{ 983 struct ia64_lpte *vhpte; 984 985 pmap_vhpt_inserts++; 986 pmap_vhpt_resident++; 987 988 vhpte = (struct ia64_lpte *) ia64_thash(va); 989 990 if (vhpte->pte_chain) 991 pmap_vhpt_collisions++; 992 993 pte->pte_chain = vhpte->pte_chain; 994 vhpte->pte_chain = ia64_tpa((vm_offset_t) pte); 995 996 if (!vhpte->pte_p && pte->pte_p) 997 pmap_install_pte(vhpte, pte); 998 else 999 ia64_mf(); 1000} 1001 1002/* 1003 * Update VHPT after a pte has changed. 1004 */ 1005static void 1006pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va) 1007{ 1008 struct ia64_lpte *vhpte; 1009 1010 vhpte = (struct ia64_lpte *) ia64_thash(va); 1011 1012 if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag) 1013 && pte->pte_p) 1014 pmap_install_pte(vhpte, pte); 1015} 1016 1017/* 1018 * Remove the ia64_lpte matching va from the VHPT. Return zero if it 1019 * worked or an appropriate error code otherwise. 1020 */ 1021static int 1022pmap_remove_vhpt(vm_offset_t va) 1023{ 1024 struct ia64_lpte *pte; 1025 struct ia64_lpte *lpte; 1026 struct ia64_lpte *vhpte; 1027 u_int64_t tag; 1028 int error = ENOENT; 1029 1030 vhpte = (struct ia64_lpte *) ia64_thash(va); 1031 1032 /* 1033 * If the VHPTE is invalid, there can't be a collision chain. 1034 */ 1035 if (!vhpte->pte_p) { 1036 KASSERT(!vhpte->pte_chain, ("bad vhpte")); 1037 printf("can't remove vhpt entry for 0x%lx\n", va); 1038 goto done; 1039 } 1040 1041 lpte = vhpte; 1042 pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain); 1043 tag = ia64_ttag(va); 1044 1045 while (pte->pte_tag != tag) { 1046 lpte = pte; 1047 if (pte->pte_chain) 1048 pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain); 1049 else { 1050 printf("can't remove vhpt entry for 0x%lx\n", va); 1051 goto done; 1052 } 1053 } 1054 1055 /* 1056 * Snip this pv_entry out of the collision chain. 1057 */ 1058 lpte->pte_chain = pte->pte_chain; 1059 1060 /* 1061 * If the VHPTE matches as well, change it to map the first 1062 * element from the chain if there is one. 1063 */ 1064 if (vhpte->pte_tag == tag) { 1065 if (vhpte->pte_chain) { 1066 pte = (struct ia64_lpte *) 1067 IA64_PHYS_TO_RR7(vhpte->pte_chain); 1068 pmap_install_pte(vhpte, pte); 1069 } else { 1070 vhpte->pte_p = 0; 1071 ia64_mf(); 1072 } 1073 } 1074 1075 pmap_vhpt_resident--; 1076 error = 0; 1077 done: 1078 return error; 1079} 1080 1081/* 1082 * Find the ia64_lpte for the given va, if any. 1083 */ 1084static struct ia64_lpte * 1085pmap_find_vhpt(vm_offset_t va) 1086{ 1087 struct ia64_lpte *pte; 1088 u_int64_t tag; 1089 1090 pte = (struct ia64_lpte *) ia64_thash(va); 1091 if (!pte->pte_chain) { 1092 pte = 0; 1093 goto done; 1094 } 1095 1096 tag = ia64_ttag(va); 1097 pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain); 1098 1099 while (pte->pte_tag != tag) { 1100 if (pte->pte_chain) { 1101 pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain); 1102 } else { 1103 pte = 0; 1104 break; 1105 } 1106 } 1107 1108 done: 1109 return pte; 1110} 1111 1112/* 1113 * Remove an entry from the list of managed mappings. 1114 */ 1115static int 1116pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv) 1117{ 1118 if (!pv) { 1119 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1120 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1121 if (pmap == pv->pv_pmap && va == pv->pv_va) 1122 break; 1123 } 1124 } else { 1125 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1126 if (va == pv->pv_va) 1127 break; 1128 } 1129 } 1130 } 1131 1132 if (pv) { 1133 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1134 m->md.pv_list_count--; 1135 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1136 vm_page_flag_clear(m, PG_WRITEABLE); 1137 1138 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1139 free_pv_entry(pv); 1140 return 0; 1141 } else { 1142 return ENOENT; 1143 } 1144} 1145 1146/* 1147 * Create a pv entry for page at pa for 1148 * (pmap, va). 1149 */ 1150static void 1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1152{ 1153 pv_entry_t pv; 1154 1155 pv = get_pv_entry(); 1156 pv->pv_pmap = pmap; 1157 pv->pv_va = va; 1158 1159 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1160 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1161 m->md.pv_list_count++; 1162} 1163 1164/* 1165 * Routine: pmap_extract 1166 * Function: 1167 * Extract the physical page address associated 1168 * with the given map/virtual_address pair. 1169 */ 1170vm_offset_t 1171pmap_extract(pmap, va) 1172 register pmap_t pmap; 1173 vm_offset_t va; 1174{ 1175 pmap_t oldpmap; 1176 vm_offset_t pa; 1177 1178 oldpmap = pmap_install(pmap); 1179 pa = ia64_tpa(va); 1180 pmap_install(oldpmap); 1181 return pa; 1182} 1183 1184/*************************************************** 1185 * Low level mapping routines..... 1186 ***************************************************/ 1187 1188/* 1189 * Find the kernel lpte for mapping the given virtual address, which 1190 * must be in the part of region 5 which we can cover with our kernel 1191 * 'page tables'. 1192 */ 1193static struct ia64_lpte * 1194pmap_find_kpte(vm_offset_t va) 1195{ 1196 KASSERT((va >> 61) == 5, 1197 ("kernel mapping 0x%lx not in region 5", va)); 1198 KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG), 1199 ("kernel mapping 0x%lx out of range", va)); 1200 return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]; 1201} 1202 1203/* 1204 * Find a pte suitable for mapping a user-space address. If one exists 1205 * in the VHPT, that one will be returned, otherwise a new pte is 1206 * allocated. 1207 */ 1208static struct ia64_lpte * 1209pmap_find_pte(vm_offset_t va) 1210{ 1211 struct ia64_lpte *pte; 1212 1213 if (va >= VM_MAXUSER_ADDRESS) 1214 return pmap_find_kpte(va); 1215 1216 pte = pmap_find_vhpt(va); 1217 if (!pte) { 1218 pte = uma_zalloc(ptezone, 0); 1219 pte->pte_p = 0; 1220 } 1221 return pte; 1222} 1223 1224/* 1225 * Free a pte which is now unused. This simply returns it to the zone 1226 * allocator if it is a user mapping. For kernel mappings, clear the 1227 * valid bit to make it clear that the mapping is not currently used. 1228 */ 1229static void 1230pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va) 1231{ 1232 if (va < VM_MAXUSER_ADDRESS) 1233 uma_zfree(ptezone, pte); 1234 else 1235 pte->pte_p = 0; 1236} 1237 1238/* 1239 * Set a pte to contain a valid mapping and enter it in the VHPT. If 1240 * the pte was orginally valid, then its assumed to already be in the 1241 * VHPT. 1242 */ 1243static void 1244pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa, 1245 int ig, int pl, int ar) 1246{ 1247 int wasvalid = pte->pte_p; 1248 1249 pte->pte_p = 1; 1250 pte->pte_ma = PTE_MA_WB; 1251 if (ig & PTE_IG_MANAGED) { 1252 pte->pte_a = 0; 1253 pte->pte_d = 0; 1254 } else { 1255 pte->pte_a = 1; 1256 pte->pte_d = 1; 1257 } 1258 pte->pte_pl = pl; 1259 pte->pte_ar = ar; 1260 pte->pte_ppn = pa >> 12; 1261 pte->pte_ed = 0; 1262 pte->pte_ig = ig; 1263 1264 pte->pte_ps = PAGE_SHIFT; 1265 pte->pte_key = 0; 1266 1267 pte->pte_tag = ia64_ttag(va); 1268 1269 if (wasvalid) { 1270 pmap_update_vhpt(pte, va); 1271 } else { 1272 pmap_enter_vhpt(pte, va); 1273 } 1274} 1275 1276/* 1277 * If a pte contains a valid mapping, clear it and update the VHPT. 1278 */ 1279static void 1280pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va) 1281{ 1282 if (pte->pte_p) { 1283 pmap_remove_vhpt(va); 1284 ia64_ptc_g(va, PAGE_SHIFT << 2); 1285 pte->pte_p = 0; 1286 } 1287} 1288 1289/* 1290 * Remove the (possibly managed) mapping represented by pte from the 1291 * given pmap. 1292 */ 1293static int 1294pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va, 1295 pv_entry_t pv, int freepte) 1296{ 1297 int error; 1298 vm_page_t m; 1299 1300 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)), 1301 ("removing pte for non-current pmap")); 1302 1303 /* 1304 * First remove from the VHPT. 1305 */ 1306 error = pmap_remove_vhpt(va); 1307 if (error) 1308 return error; 1309 1310 /* 1311 * Make sure pmap_set_pte() knows it isn't in the VHPT. 1312 */ 1313 pte->pte_p = 0; 1314 1315 if (pte->pte_ig & PTE_IG_WIRED) 1316 pmap->pm_stats.wired_count -= 1; 1317 1318 pmap->pm_stats.resident_count -= 1; 1319 if (pte->pte_ig & PTE_IG_MANAGED) { 1320 m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte)); 1321 if (pte->pte_d) 1322 if (pmap_track_modified(va)) 1323 vm_page_dirty(m); 1324 if (pte->pte_a) 1325 vm_page_flag_set(m, PG_REFERENCED); 1326 1327 if (freepte) 1328 pmap_free_pte(pte, va); 1329 return pmap_remove_entry(pmap, m, va, pv); 1330 } else { 1331 if (freepte) 1332 pmap_free_pte(pte, va); 1333 return 0; 1334 } 1335} 1336 1337/* 1338 * Add a list of wired pages to the kva 1339 * this routine is only used for temporary 1340 * kernel mappings that do not need to have 1341 * page modification or references recorded. 1342 * Note that old mappings are simply written 1343 * over. The page *must* be wired. 1344 */ 1345void 1346pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 1347{ 1348 int i; 1349 struct ia64_lpte *pte; 1350 1351 for (i = 0; i < count; i++) { 1352 vm_offset_t tva = va + i * PAGE_SIZE; 1353 int wasvalid; 1354 pte = pmap_find_kpte(tva); 1355 wasvalid = pte->pte_p; 1356 pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]), 1357 0, PTE_PL_KERN, PTE_AR_RWX); 1358 if (wasvalid) 1359 ia64_ptc_g(tva, PAGE_SHIFT << 2); 1360 } 1361} 1362 1363/* 1364 * this routine jerks page mappings from the 1365 * kernel -- it is meant only for temporary mappings. 1366 */ 1367void 1368pmap_qremove(vm_offset_t va, int count) 1369{ 1370 int i; 1371 struct ia64_lpte *pte; 1372 1373 for (i = 0; i < count; i++) { 1374 pte = pmap_find_kpte(va); 1375 pmap_clear_pte(pte, va); 1376 va += PAGE_SIZE; 1377 } 1378} 1379 1380/* 1381 * Add a wired page to the kva. 1382 */ 1383void 1384pmap_kenter(vm_offset_t va, vm_offset_t pa) 1385{ 1386 struct ia64_lpte *pte; 1387 int wasvalid; 1388 1389 pte = pmap_find_kpte(va); 1390 wasvalid = pte->pte_p; 1391 pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX); 1392 if (wasvalid) 1393 ia64_ptc_g(va, PAGE_SHIFT << 2); 1394} 1395 1396/* 1397 * Remove a page from the kva 1398 */ 1399void 1400pmap_kremove(vm_offset_t va) 1401{ 1402 struct ia64_lpte *pte; 1403 1404 pte = pmap_find_kpte(va); 1405 pmap_clear_pte(pte, va); 1406} 1407 1408/* 1409 * Used to map a range of physical addresses into kernel 1410 * virtual address space. 1411 * 1412 * The value passed in '*virt' is a suggested virtual address for 1413 * the mapping. Architectures which can support a direct-mapped 1414 * physical to virtual region can return the appropriate address 1415 * within that region, leaving '*virt' unchanged. Other 1416 * architectures should map the pages starting at '*virt' and 1417 * update '*virt' with the first usable address after the mapped 1418 * region. 1419 */ 1420vm_offset_t 1421pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 1422{ 1423 return IA64_PHYS_TO_RR7(start); 1424} 1425 1426/* 1427 * Remove a single page from a process address space 1428 */ 1429static void 1430pmap_remove_page(pmap_t pmap, vm_offset_t va) 1431{ 1432 struct ia64_lpte *pte; 1433 1434 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)), 1435 ("removing page for non-current pmap")); 1436 1437 pte = pmap_find_vhpt(va); 1438 if (pte) { 1439 pmap_remove_pte(pmap, pte, va, 0, 1); 1440 pmap_invalidate_page(pmap, va); 1441 } 1442 return; 1443} 1444 1445/* 1446 * Remove the given range of addresses from the specified map. 1447 * 1448 * It is assumed that the start and end are properly 1449 * rounded to the page size. 1450 */ 1451void 1452pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1453{ 1454 pmap_t oldpmap; 1455 vm_offset_t va; 1456 pv_entry_t pv; 1457 struct ia64_lpte *pte; 1458 1459 if (pmap == NULL) 1460 return; 1461 1462 if (pmap->pm_stats.resident_count == 0) 1463 return; 1464 1465 oldpmap = pmap_install(pmap); 1466 1467 /* 1468 * special handling of removing one page. a very 1469 * common operation and easy to short circuit some 1470 * code. 1471 */ 1472 if (sva + PAGE_SIZE == eva) { 1473 pmap_remove_page(pmap, sva); 1474 pmap_install(oldpmap); 1475 return; 1476 } 1477 1478 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) { 1479 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1480 va = pv->pv_va; 1481 if (va >= sva && va < eva) { 1482 pte = pmap_find_vhpt(va); 1483 pmap_remove_pte(pmap, pte, va, pv, 1); 1484 pmap_invalidate_page(pmap, va); 1485 } 1486 } 1487 1488 } else { 1489 for (va = sva; va < eva; va = va += PAGE_SIZE) { 1490 pte = pmap_find_vhpt(va); 1491 if (pte) { 1492 pmap_remove_pte(pmap, pte, va, 0, 1); 1493 pmap_invalidate_page(pmap, va); 1494 } 1495 } 1496 } 1497 1498 pmap_install(oldpmap); 1499} 1500 1501/* 1502 * Routine: pmap_remove_all 1503 * Function: 1504 * Removes this physical page from 1505 * all physical maps in which it resides. 1506 * Reflects back modify bits to the pager. 1507 * 1508 * Notes: 1509 * Original versions of this routine were very 1510 * inefficient because they iteratively called 1511 * pmap_remove (slow...) 1512 */ 1513 1514void 1515pmap_remove_all(vm_page_t m) 1516{ 1517 pmap_t oldpmap; 1518 pv_entry_t pv; 1519 int s; 1520 1521#if defined(PMAP_DIAGNOSTIC) 1522 /* 1523 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 1524 * pages! 1525 */ 1526 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1527 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m)); 1528 } 1529#endif 1530 1531 s = splvm(); 1532 1533 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1534 struct ia64_lpte *pte; 1535 pmap_t pmap = pv->pv_pmap; 1536 vm_offset_t va = pv->pv_va; 1537 1538 oldpmap = pmap_install(pmap); 1539 pte = pmap_find_vhpt(va); 1540 if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m)) 1541 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m)); 1542 pmap_remove_pte(pmap, pte, va, pv, 1); 1543 pmap_invalidate_page(pmap, va); 1544 pmap_install(oldpmap); 1545 } 1546 1547 vm_page_flag_clear(m, PG_WRITEABLE); 1548 1549 splx(s); 1550 return; 1551} 1552 1553/* 1554 * Set the physical protection on the 1555 * specified range of this map as requested. 1556 */ 1557void 1558pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1559{ 1560 pmap_t oldpmap; 1561 struct ia64_lpte *pte; 1562 int newprot; 1563 1564 if (pmap == NULL) 1565 return; 1566 1567 oldpmap = pmap_install(pmap); 1568 1569 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1570 pmap_remove(pmap, sva, eva); 1571 pmap_install(oldpmap); 1572 return; 1573 } 1574 1575 if (prot & VM_PROT_WRITE) { 1576 pmap_install(oldpmap); 1577 return; 1578 } 1579 1580 newprot = pte_prot(pmap, prot); 1581 1582 if ((sva & PAGE_MASK) || (eva & PAGE_MASK)) 1583 panic("pmap_protect: unaligned addresses"); 1584 1585 while (sva < eva) { 1586 /* 1587 * If page is invalid, skip this page 1588 */ 1589 pte = pmap_find_vhpt(sva); 1590 if (!pte) { 1591 sva += PAGE_SIZE; 1592 continue; 1593 } 1594 1595 if (pmap_pte_prot(pte) != newprot) { 1596 if (pte->pte_ig & PTE_IG_MANAGED) { 1597 vm_offset_t pa = pmap_pte_pa(pte); 1598 vm_page_t m = PHYS_TO_VM_PAGE(pa); 1599 if (pte->pte_d) { 1600 if (pmap_track_modified(sva)) 1601 vm_page_dirty(m); 1602 pte->pte_d = 0; 1603 } 1604 if (pte->pte_a) { 1605 vm_page_flag_set(m, PG_REFERENCED); 1606 pte->pte_a = 0; 1607 } 1608 } 1609 pmap_pte_set_prot(pte, newprot); 1610 pmap_update_vhpt(pte, sva); 1611 pmap_invalidate_page(pmap, sva); 1612 } 1613 1614 sva += PAGE_SIZE; 1615 } 1616 pmap_install(oldpmap); 1617} 1618 1619/* 1620 * Insert the given physical page (p) at 1621 * the specified virtual address (v) in the 1622 * target physical map with the protection requested. 1623 * 1624 * If specified, the page will be wired down, meaning 1625 * that the related pte can not be reclaimed. 1626 * 1627 * NB: This is the only routine which MAY NOT lazy-evaluate 1628 * or lose information. That is, this routine must actually 1629 * insert this page into the given map NOW. 1630 */ 1631void 1632pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1633 boolean_t wired) 1634{ 1635 pmap_t oldpmap; 1636 vm_offset_t pa; 1637 vm_offset_t opa; 1638 struct ia64_lpte origpte; 1639 struct ia64_lpte *pte; 1640 int managed; 1641 1642 if (pmap == NULL) 1643 return; 1644 1645 pmap_ensure_rid(pmap, va); 1646 1647 oldpmap = pmap_install(pmap); 1648 1649 va &= ~PAGE_MASK; 1650#ifdef PMAP_DIAGNOSTIC 1651 if (va > VM_MAX_KERNEL_ADDRESS) 1652 panic("pmap_enter: toobig"); 1653#endif 1654 1655 /* 1656 * Find (or create) a pte for the given mapping. 1657 */ 1658 pte = pmap_find_pte(va); 1659 origpte = *pte; 1660 1661 if (origpte.pte_p) 1662 opa = pmap_pte_pa(&origpte); 1663 else 1664 opa = 0; 1665 managed = 0; 1666 1667 pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK; 1668 1669 /* 1670 * Mapping has not changed, must be protection or wiring change. 1671 */ 1672 if (origpte.pte_p && (opa == pa)) { 1673 /* 1674 * Wiring change, just update stats. We don't worry about 1675 * wiring PT pages as they remain resident as long as there 1676 * are valid mappings in them. Hence, if a user page is wired, 1677 * the PT page will be also. 1678 */ 1679 if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0)) 1680 pmap->pm_stats.wired_count++; 1681 else if (!wired && (origpte.pte_ig & PTE_IG_WIRED)) 1682 pmap->pm_stats.wired_count--; 1683 1684 /* 1685 * We might be turning off write access to the page, 1686 * so we go ahead and sense modify status. 1687 */ 1688 if (origpte.pte_ig & PTE_IG_MANAGED) { 1689 if (origpte.pte_d && pmap_track_modified(va)) { 1690 vm_page_t om; 1691 om = PHYS_TO_VM_PAGE(opa); 1692 vm_page_dirty(om); 1693 } 1694 } 1695 1696 managed = origpte.pte_ig & PTE_IG_MANAGED; 1697 goto validate; 1698 } 1699 /* 1700 * Mapping has changed, invalidate old range and fall 1701 * through to handle validating new mapping. 1702 */ 1703 if (opa) { 1704 int error; 1705 vm_page_lock_queues(); 1706 error = pmap_remove_pte(pmap, pte, va, 0, 0); 1707 vm_page_unlock_queues(); 1708 if (error) 1709 panic("pmap_enter: pte vanished, va: 0x%lx", va); 1710 } 1711 1712 /* 1713 * Enter on the PV list if part of our managed memory. 1714 */ 1715 if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) { 1716 pmap_insert_entry(pmap, va, m); 1717 managed |= PTE_IG_MANAGED; 1718 } 1719 1720 /* 1721 * Increment counters 1722 */ 1723 pmap->pm_stats.resident_count++; 1724 if (wired) 1725 pmap->pm_stats.wired_count++; 1726 1727validate: 1728 1729 /* 1730 * Now validate mapping with desired protection/wiring. This 1731 * adds the pte to the VHPT if necessary. 1732 */ 1733 pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0), 1734 pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot)); 1735 1736 /* 1737 * if the mapping or permission bits are different, we need 1738 * to invalidate the page. 1739 */ 1740 if (!pmap_equal_pte(&origpte, pte)) 1741 pmap_invalidate_page(pmap, va); 1742 1743 pmap_install(oldpmap); 1744} 1745 1746/* 1747 * this code makes some *MAJOR* assumptions: 1748 * 1. Current pmap & pmap exists. 1749 * 2. Not wired. 1750 * 3. Read access. 1751 * 4. No page table pages. 1752 * 5. Tlbflush is deferred to calling procedure. 1753 * 6. Page IS managed. 1754 * but is *MUCH* faster than pmap_enter... 1755 */ 1756 1757static void 1758pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 1759{ 1760 struct ia64_lpte *pte; 1761 pmap_t oldpmap; 1762 1763 pmap_ensure_rid(pmap, va); 1764 1765 oldpmap = pmap_install(pmap); 1766 1767 pte = pmap_find_pte(va); 1768 if (pte->pte_p) 1769 return; 1770 1771 /* 1772 * Enter on the PV list since its part of our managed memory. 1773 */ 1774 pmap_insert_entry(pmap, va, m); 1775 1776 /* 1777 * Increment counters 1778 */ 1779 pmap->pm_stats.resident_count++; 1780 1781 /* 1782 * Initialise PTE with read-only protection and enter into VHPT. 1783 */ 1784 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), 1785 PTE_IG_MANAGED, 1786 PTE_PL_USER, PTE_AR_R); 1787 1788 pmap_install(oldpmap); 1789} 1790 1791/* 1792 * Make temporary mapping for a physical address. This is called 1793 * during dump. 1794 */ 1795void * 1796pmap_kenter_temporary(vm_offset_t pa, int i) 1797{ 1798 return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE)); 1799} 1800 1801#define MAX_INIT_PT (96) 1802/* 1803 * pmap_object_init_pt preloads the ptes for a given object 1804 * into the specified pmap. This eliminates the blast of soft 1805 * faults on process startup and immediately after an mmap. 1806 */ 1807void 1808pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 1809 vm_object_t object, vm_pindex_t pindex, 1810 vm_size_t size, int limit) 1811{ 1812 pmap_t oldpmap; 1813 vm_offset_t tmpidx; 1814 int psize; 1815 vm_page_t p; 1816 int objpgs; 1817 1818 if (pmap == NULL || object == NULL) 1819 return; 1820 1821 oldpmap = pmap_install(pmap); 1822 1823 psize = ia64_btop(size); 1824 1825 if ((object->type != OBJT_VNODE) || 1826 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 1827 (object->resident_page_count > MAX_INIT_PT))) { 1828 pmap_install(oldpmap); 1829 return; 1830 } 1831 1832 if (psize + pindex > object->size) { 1833 if (object->size < pindex) 1834 return; 1835 psize = object->size - pindex; 1836 } 1837 1838 /* 1839 * if we are processing a major portion of the object, then scan the 1840 * entire thing. 1841 */ 1842 if (psize > (object->resident_page_count >> 2)) { 1843 objpgs = psize; 1844 1845 for (p = TAILQ_FIRST(&object->memq); 1846 ((objpgs > 0) && (p != NULL)); 1847 p = TAILQ_NEXT(p, listq)) { 1848 1849 tmpidx = p->pindex; 1850 if (tmpidx < pindex) { 1851 continue; 1852 } 1853 tmpidx -= pindex; 1854 if (tmpidx >= psize) { 1855 continue; 1856 } 1857 /* 1858 * don't allow an madvise to blow away our really 1859 * free pages allocating pv entries. 1860 */ 1861 if ((limit & MAP_PREFAULT_MADVISE) && 1862 cnt.v_free_count < cnt.v_free_reserved) { 1863 break; 1864 } 1865 vm_page_lock_queues(); 1866 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1867 (p->busy == 0) && 1868 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1869 if ((p->queue - p->pc) == PQ_CACHE) 1870 vm_page_deactivate(p); 1871 vm_page_busy(p); 1872 vm_page_unlock_queues(); 1873 pmap_enter_quick(pmap, 1874 addr + ia64_ptob(tmpidx), p); 1875 vm_page_lock_queues(); 1876 vm_page_wakeup(p); 1877 } 1878 vm_page_unlock_queues(); 1879 objpgs -= 1; 1880 } 1881 } else { 1882 /* 1883 * else lookup the pages one-by-one. 1884 */ 1885 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { 1886 /* 1887 * don't allow an madvise to blow away our really 1888 * free pages allocating pv entries. 1889 */ 1890 if ((limit & MAP_PREFAULT_MADVISE) && 1891 cnt.v_free_count < cnt.v_free_reserved) { 1892 break; 1893 } 1894 p = vm_page_lookup(object, tmpidx + pindex); 1895 if (p == NULL) 1896 continue; 1897 vm_page_lock_queues(); 1898 if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL && 1899 (p->busy == 0) && 1900 (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1901 if ((p->queue - p->pc) == PQ_CACHE) 1902 vm_page_deactivate(p); 1903 vm_page_busy(p); 1904 vm_page_unlock_queues(); 1905 pmap_enter_quick(pmap, 1906 addr + ia64_ptob(tmpidx), p); 1907 vm_page_lock_queues(); 1908 vm_page_wakeup(p); 1909 } 1910 vm_page_unlock_queues(); 1911 } 1912 } 1913 pmap_install(oldpmap); 1914 return; 1915} 1916 1917/* 1918 * pmap_prefault provides a quick way of clustering 1919 * pagefaults into a processes address space. It is a "cousin" 1920 * of pmap_object_init_pt, except it runs at page fault time instead 1921 * of mmap time. 1922 */ 1923#define PFBAK 4 1924#define PFFOR 4 1925#define PAGEORDER_SIZE (PFBAK+PFFOR) 1926 1927static int pmap_prefault_pageorder[] = { 1928 -1 * PAGE_SIZE, 1 * PAGE_SIZE, 1929 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 1930 -3 * PAGE_SIZE, 3 * PAGE_SIZE, 1931 -4 * PAGE_SIZE, 4 * PAGE_SIZE 1932}; 1933 1934void 1935pmap_prefault(pmap, addra, entry) 1936 pmap_t pmap; 1937 vm_offset_t addra; 1938 vm_map_entry_t entry; 1939{ 1940 int i; 1941 vm_offset_t starta; 1942 vm_offset_t addr; 1943 vm_pindex_t pindex; 1944 vm_page_t m, mpte; 1945 vm_object_t object; 1946 1947 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 1948 return; 1949 1950 object = entry->object.vm_object; 1951 1952 starta = addra - PFBAK * PAGE_SIZE; 1953 if (starta < entry->start) { 1954 starta = entry->start; 1955 } else if (starta > addra) { 1956 starta = 0; 1957 } 1958 1959 mpte = NULL; 1960 for (i = 0; i < PAGEORDER_SIZE; i++) { 1961 vm_object_t lobject; 1962 struct ia64_lpte *pte; 1963 1964 addr = addra + pmap_prefault_pageorder[i]; 1965 if (addr > addra + (PFFOR * PAGE_SIZE)) 1966 addr = 0; 1967 1968 if (addr < starta || addr >= entry->end) 1969 continue; 1970 1971 pte = pmap_find_vhpt(addr); 1972 if (pte && pte->pte_p) 1973 continue; 1974 1975 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 1976 lobject = object; 1977 for (m = vm_page_lookup(lobject, pindex); 1978 (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); 1979 lobject = lobject->backing_object) { 1980 if (lobject->backing_object_offset & PAGE_MASK) 1981 break; 1982 pindex += (lobject->backing_object_offset >> PAGE_SHIFT); 1983 m = vm_page_lookup(lobject->backing_object, pindex); 1984 } 1985 1986 /* 1987 * give-up when a page is not in memory 1988 */ 1989 if (m == NULL) 1990 break; 1991 vm_page_lock_queues(); 1992 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 1993 (m->busy == 0) && 1994 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 1995 1996 if ((m->queue - m->pc) == PQ_CACHE) { 1997 vm_page_deactivate(m); 1998 } 1999 vm_page_busy(m); 2000 vm_page_unlock_queues(); 2001 pmap_enter_quick(pmap, addr, m); 2002 vm_page_lock_queues(); 2003 vm_page_wakeup(m); 2004 } 2005 vm_page_unlock_queues(); 2006 } 2007} 2008 2009/* 2010 * Routine: pmap_change_wiring 2011 * Function: Change the wiring attribute for a map/virtual-address 2012 * pair. 2013 * In/out conditions: 2014 * The mapping must already exist in the pmap. 2015 */ 2016void 2017pmap_change_wiring(pmap, va, wired) 2018 register pmap_t pmap; 2019 vm_offset_t va; 2020 boolean_t wired; 2021{ 2022 pmap_t oldpmap; 2023 struct ia64_lpte *pte; 2024 2025 if (pmap == NULL) 2026 return; 2027 2028 oldpmap = pmap_install(pmap); 2029 2030 pte = pmap_find_vhpt(va); 2031 2032 if (wired && !pmap_pte_w(pte)) 2033 pmap->pm_stats.wired_count++; 2034 else if (!wired && pmap_pte_w(pte)) 2035 pmap->pm_stats.wired_count--; 2036 2037 /* 2038 * Wiring is not a hardware characteristic so there is no need to 2039 * invalidate TLB. 2040 */ 2041 pmap_pte_set_w(pte, wired); 2042 2043 pmap_install(oldpmap); 2044} 2045 2046 2047 2048/* 2049 * Copy the range specified by src_addr/len 2050 * from the source map to the range dst_addr/len 2051 * in the destination map. 2052 * 2053 * This routine is only advisory and need not do anything. 2054 */ 2055 2056void 2057pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2058 vm_offset_t src_addr) 2059{ 2060} 2061 2062 2063/* 2064 * pmap_zero_page zeros the specified hardware page by 2065 * mapping it into virtual memory and using bzero to clear 2066 * its contents. 2067 */ 2068 2069void 2070pmap_zero_page(vm_page_t m) 2071{ 2072 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 2073 bzero((caddr_t) va, PAGE_SIZE); 2074} 2075 2076 2077/* 2078 * pmap_zero_page_area zeros the specified hardware page by 2079 * mapping it into virtual memory and using bzero to clear 2080 * its contents. 2081 * 2082 * off and size must reside within a single page. 2083 */ 2084 2085void 2086pmap_zero_page_area(vm_page_t m, int off, int size) 2087{ 2088 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 2089 bzero((char *)(caddr_t)va + off, size); 2090} 2091 2092 2093/* 2094 * pmap_zero_page_idle zeros the specified hardware page by 2095 * mapping it into virtual memory and using bzero to clear 2096 * its contents. This is for the vm_idlezero process. 2097 */ 2098 2099void 2100pmap_zero_page_idle(vm_page_t m) 2101{ 2102 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 2103 bzero((caddr_t) va, PAGE_SIZE); 2104} 2105 2106 2107/* 2108 * pmap_copy_page copies the specified (machine independent) 2109 * page by mapping the page into virtual memory and using 2110 * bcopy to copy the page, one machine dependent page at a 2111 * time. 2112 */ 2113void 2114pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2115{ 2116 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc)); 2117 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst)); 2118 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE); 2119} 2120 2121/* 2122 * Returns true if the pmap's pv is one of the first 2123 * 16 pvs linked to from this page. This count may 2124 * be changed upwards or downwards in the future; it 2125 * is only necessary that true be returned for a small 2126 * subset of pmaps for proper page aging. 2127 */ 2128boolean_t 2129pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2130{ 2131 pv_entry_t pv; 2132 int loops = 0; 2133 int s; 2134 2135 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2136 return FALSE; 2137 2138 s = splvm(); 2139 2140 /* 2141 * Not found, check current mappings returning immediately if found. 2142 */ 2143 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2144 if (pv->pv_pmap == pmap) { 2145 splx(s); 2146 return TRUE; 2147 } 2148 loops++; 2149 if (loops >= 16) 2150 break; 2151 } 2152 splx(s); 2153 return (FALSE); 2154} 2155 2156#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2157/* 2158 * Remove all pages from specified address space 2159 * this aids process exit speeds. Also, this code 2160 * is special cased for current process only, but 2161 * can have the more generic (and slightly slower) 2162 * mode enabled. This is much faster than pmap_remove 2163 * in the case of running down an entire address space. 2164 */ 2165void 2166pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2167{ 2168 pv_entry_t pv, npv; 2169 int s; 2170 2171#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2172 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2173 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2174 return; 2175 } 2176#endif 2177 2178 s = splvm(); 2179 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); 2180 pv; 2181 pv = npv) { 2182 struct ia64_lpte *pte; 2183 2184 npv = TAILQ_NEXT(pv, pv_plist); 2185 2186 if (pv->pv_va >= eva || pv->pv_va < sva) { 2187 continue; 2188 } 2189 2190 pte = pmap_find_vhpt(pv->pv_va); 2191 if (!pte) 2192 panic("pmap_remove_pages: page on pm_pvlist has no pte\n"); 2193 2194 2195/* 2196 * We cannot remove wired pages from a process' mapping at this time 2197 */ 2198 if (pte->pte_ig & PTE_IG_WIRED) { 2199 continue; 2200 } 2201 2202 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1); 2203 } 2204 splx(s); 2205 2206 pmap_invalidate_all(pmap); 2207} 2208 2209/* 2210 * pmap_page_protect: 2211 * 2212 * Lower the permission for all mappings to a given page. 2213 */ 2214void 2215pmap_page_protect(vm_page_t m, vm_prot_t prot) 2216{ 2217 pv_entry_t pv; 2218 2219 if ((prot & VM_PROT_WRITE) != 0) 2220 return; 2221 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2222 if ((m->flags & PG_WRITEABLE) == 0) 2223 return; 2224 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2225 int newprot = pte_prot(pv->pv_pmap, prot); 2226 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2227 struct ia64_lpte *pte; 2228 pte = pmap_find_vhpt(pv->pv_va); 2229 pmap_pte_set_prot(pte, newprot); 2230 pmap_update_vhpt(pte, pv->pv_va); 2231 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2232 pmap_install(oldpmap); 2233 } 2234 vm_page_flag_clear(m, PG_WRITEABLE); 2235 } else { 2236 pmap_remove_all(m); 2237 } 2238} 2239 2240vm_offset_t 2241pmap_phys_address(int ppn) 2242{ 2243 return (ia64_ptob(ppn)); 2244} 2245 2246/* 2247 * pmap_ts_referenced: 2248 * 2249 * Return a count of reference bits for a page, clearing those bits. 2250 * It is not necessary for every reference bit to be cleared, but it 2251 * is necessary that 0 only be returned when there are truly no 2252 * reference bits set. 2253 * 2254 * XXX: The exact number of bits to check and clear is a matter that 2255 * should be tested and standardized at some point in the future for 2256 * optimal aging of shared pages. 2257 */ 2258int 2259pmap_ts_referenced(vm_page_t m) 2260{ 2261 pv_entry_t pv; 2262 int count = 0; 2263 2264 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2265 return 0; 2266 2267 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2268 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2269 struct ia64_lpte *pte; 2270 pte = pmap_find_vhpt(pv->pv_va); 2271 if (pte->pte_a) { 2272 count++; 2273 pte->pte_a = 0; 2274 pmap_update_vhpt(pte, pv->pv_va); 2275 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2276 } 2277 pmap_install(oldpmap); 2278 } 2279 2280 return count; 2281} 2282 2283#if 0 2284/* 2285 * pmap_is_referenced: 2286 * 2287 * Return whether or not the specified physical page was referenced 2288 * in any physical maps. 2289 */ 2290static boolean_t 2291pmap_is_referenced(vm_page_t m) 2292{ 2293 pv_entry_t pv; 2294 2295 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2296 return FALSE; 2297 2298 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2299 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2300 struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va); 2301 pmap_install(oldpmap); 2302 if (pte->pte_a) 2303 return 1; 2304 } 2305 2306 return 0; 2307} 2308#endif 2309 2310/* 2311 * pmap_is_modified: 2312 * 2313 * Return whether or not the specified physical page was modified 2314 * in any physical maps. 2315 */ 2316boolean_t 2317pmap_is_modified(vm_page_t m) 2318{ 2319 pv_entry_t pv; 2320 2321 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2322 return FALSE; 2323 2324 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2325 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2326 struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va); 2327 pmap_install(oldpmap); 2328 if (pte->pte_d) 2329 return 1; 2330 } 2331 2332 return 0; 2333} 2334 2335/* 2336 * Clear the modify bits on the specified physical page. 2337 */ 2338void 2339pmap_clear_modify(vm_page_t m) 2340{ 2341 pv_entry_t pv; 2342 2343 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2344 return; 2345 2346 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2347 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2348 struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va); 2349 if (pte->pte_d) { 2350 pte->pte_d = 0; 2351 pmap_update_vhpt(pte, pv->pv_va); 2352 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2353 } 2354 pmap_install(oldpmap); 2355 } 2356} 2357 2358/* 2359 * pmap_clear_reference: 2360 * 2361 * Clear the reference bit on the specified physical page. 2362 */ 2363void 2364pmap_clear_reference(vm_page_t m) 2365{ 2366 pv_entry_t pv; 2367 2368 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2369 return; 2370 2371 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2372 pmap_t oldpmap = pmap_install(pv->pv_pmap); 2373 struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va); 2374 if (pte->pte_a) { 2375 pte->pte_a = 0; 2376 pmap_update_vhpt(pte, pv->pv_va); 2377 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2378 } 2379 pmap_install(oldpmap); 2380 } 2381} 2382 2383/* 2384 * Miscellaneous support routines follow 2385 */ 2386 2387static void 2388ia64_protection_init() 2389{ 2390 int prot, *kp, *up; 2391 2392 kp = protection_codes[0]; 2393 up = protection_codes[1]; 2394 2395 for (prot = 0; prot < 8; prot++) { 2396 switch (prot) { 2397 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2398 *kp++ = (PTE_AR_R << 2) | PTE_PL_KERN; 2399 *up++ = (PTE_AR_R << 2) | PTE_PL_KERN; 2400 break; 2401 2402 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2403 *kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN; 2404 *up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER; 2405 break; 2406 2407 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2408 *kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN; 2409 *up++ = (PTE_AR_RW << 2) | PTE_PL_USER; 2410 break; 2411 2412 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2413 *kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN; 2414 *up++ = (PTE_AR_RWX << 2) | PTE_PL_USER; 2415 break; 2416 2417 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2418 *kp++ = (PTE_AR_R << 2) | PTE_PL_KERN; 2419 *up++ = (PTE_AR_R << 2) | PTE_PL_USER; 2420 break; 2421 2422 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2423 *kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN; 2424 *up++ = (PTE_AR_RX << 2) | PTE_PL_USER; 2425 break; 2426 2427 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2428 *kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN; 2429 *up++ = (PTE_AR_RW << 2) | PTE_PL_USER; 2430 break; 2431 2432 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2433 *kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN; 2434 *up++ = (PTE_AR_RWX << 2) | PTE_PL_USER; 2435 break; 2436 } 2437 } 2438} 2439 2440/* 2441 * Map a set of physical memory pages into the kernel virtual 2442 * address space. Return a pointer to where it is mapped. This 2443 * routine is intended to be used for mapping device memory, 2444 * NOT real memory. 2445 */ 2446void * 2447pmap_mapdev(vm_offset_t pa, vm_size_t size) 2448{ 2449 return (void*) IA64_PHYS_TO_RR6(pa); 2450} 2451 2452/* 2453 * 'Unmap' a range mapped by pmap_mapdev(). 2454 */ 2455void 2456pmap_unmapdev(vm_offset_t va, vm_size_t size) 2457{ 2458 return; 2459} 2460 2461/* 2462 * perform the pmap work for mincore 2463 */ 2464int 2465pmap_mincore(pmap_t pmap, vm_offset_t addr) 2466{ 2467 pmap_t oldpmap; 2468 struct ia64_lpte *pte; 2469 int val = 0; 2470 2471 oldpmap = pmap_install(pmap); 2472 pte = pmap_find_vhpt(addr); 2473 pmap_install(oldpmap); 2474 2475 if (!pte) 2476 return 0; 2477 2478 if (pmap_pte_v(pte)) { 2479 vm_page_t m; 2480 vm_offset_t pa; 2481 2482 val = MINCORE_INCORE; 2483 if ((pte->pte_ig & PTE_IG_MANAGED) == 0) 2484 return val; 2485 2486 pa = pmap_pte_pa(pte); 2487 2488 m = PHYS_TO_VM_PAGE(pa); 2489 2490 /* 2491 * Modified by us 2492 */ 2493 if (pte->pte_d) 2494 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2495 /* 2496 * Modified by someone 2497 */ 2498 else if (pmap_is_modified(m)) 2499 val |= MINCORE_MODIFIED_OTHER; 2500 /* 2501 * Referenced by us 2502 */ 2503 if (pte->pte_a) 2504 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2505 2506 /* 2507 * Referenced by someone 2508 */ 2509 else if (pmap_ts_referenced(m)) { 2510 val |= MINCORE_REFERENCED_OTHER; 2511 vm_page_flag_set(m, PG_REFERENCED); 2512 } 2513 } 2514 return val; 2515} 2516 2517void 2518pmap_activate(struct thread *td) 2519{ 2520 pmap_install(vmspace_pmap(td->td_proc->p_vmspace)); 2521} 2522 2523pmap_t 2524pmap_install(pmap_t pmap) 2525{ 2526 pmap_t oldpmap; 2527 int i; 2528 2529 critical_enter(); 2530 2531 oldpmap = PCPU_GET(current_pmap); 2532 2533 if (pmap == oldpmap || pmap == kernel_pmap) { 2534 critical_exit(); 2535 return pmap; 2536 } 2537 2538 if (oldpmap) { 2539 atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask)); 2540 } 2541 2542 PCPU_SET(current_pmap, pmap); 2543 if (!pmap) { 2544 /* 2545 * RIDs 0..4 have no mappings to make sure we generate 2546 * page faults on accesses. 2547 */ 2548 ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1); 2549 ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1); 2550 ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1); 2551 ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1); 2552 ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1); 2553 critical_exit(); 2554 return oldpmap; 2555 } 2556 2557 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); 2558 2559 for (i = 0; i < 5; i++) 2560 ia64_set_rr(IA64_RR_BASE(i), 2561 (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1); 2562 2563 critical_exit(); 2564 return oldpmap; 2565} 2566 2567vm_offset_t 2568pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2569{ 2570 2571 return addr; 2572} 2573 2574#include "opt_ddb.h" 2575 2576#ifdef DDB 2577 2578#include <ddb/ddb.h> 2579 2580static const char* psnames[] = { 2581 "1B", "2B", "4B", "8B", 2582 "16B", "32B", "64B", "128B", 2583 "256B", "512B", "1K", "2K", 2584 "4K", "8K", "16K", "32K", 2585 "64K", "128K", "256K", "512K", 2586 "1M", "2M", "4M", "8M", 2587 "16M", "32M", "64M", "128M", 2588 "256M", "512M", "1G", "2G" 2589}; 2590 2591static void 2592print_trs(int type) 2593{ 2594 struct ia64_pal_result res; 2595 int i, maxtr; 2596 struct { 2597 struct ia64_pte pte; 2598 struct ia64_itir itir; 2599 struct ia64_ifa ifa; 2600 struct ia64_rr rr; 2601 } buf; 2602 static const char* manames[] = { 2603 "WB", "bad", "bad", "bad", 2604 "UC", "UCE", "WC", "NaT", 2605 2606 }; 2607 2608 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 2609 if (res.pal_status != 0) { 2610 db_printf("Can't get VM summary\n"); 2611 return; 2612 } 2613 2614 if (type == 0) 2615 maxtr = (res.pal_result[0] >> 40) & 0xff; 2616 else 2617 maxtr = (res.pal_result[0] >> 32) & 0xff; 2618 2619 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n"); 2620 for (i = 0; i <= maxtr; i++) { 2621 bzero(&buf, sizeof(buf)); 2622 res = ia64_call_pal_stacked_physical 2623 (PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf)); 2624 if (!(res.pal_result[0] & 1)) 2625 buf.pte.pte_ar = 0; 2626 if (!(res.pal_result[0] & 2)) 2627 buf.pte.pte_pl = 0; 2628 if (!(res.pal_result[0] & 4)) 2629 buf.pte.pte_d = 0; 2630 if (!(res.pal_result[0] & 8)) 2631 buf.pte.pte_ma = 0; 2632 db_printf( 2633 "%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s %d %06x\n", 2634 buf.ifa.ifa_ig & 1, 2635 buf.rr.rr_rid, 2636 buf.ifa.ifa_vpn, 2637 buf.pte.pte_ppn, 2638 psnames[buf.itir.itir_ps], 2639 buf.pte.pte_ed, 2640 buf.pte.pte_ar, 2641 buf.pte.pte_pl, 2642 buf.pte.pte_d, 2643 buf.pte.pte_a, 2644 manames[buf.pte.pte_ma], 2645 buf.pte.pte_p, 2646 buf.itir.itir_key); 2647 } 2648} 2649 2650DB_COMMAND(itr, db_itr) 2651{ 2652 print_trs(0); 2653} 2654 2655DB_COMMAND(dtr, db_dtr) 2656{ 2657 print_trs(1); 2658} 2659 2660DB_COMMAND(rr, db_rr) 2661{ 2662 int i; 2663 u_int64_t t; 2664 struct ia64_rr rr; 2665 2666 printf("RR RID PgSz VE\n"); 2667 for (i = 0; i < 8; i++) { 2668 __asm __volatile ("mov %0=rr[%1]" 2669 : "=r"(t) 2670 : "r"(IA64_RR_BASE(i))); 2671 *(u_int64_t *) &rr = t; 2672 printf("%d %06x %4s %d\n", 2673 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve); 2674 } 2675} 2676 2677DB_COMMAND(thash, db_thash) 2678{ 2679 if (!have_addr) 2680 return; 2681 2682 db_printf("%p\n", (void *) ia64_thash(addr)); 2683} 2684 2685DB_COMMAND(ttag, db_ttag) 2686{ 2687 if (!have_addr) 2688 return; 2689 2690 db_printf("0x%lx\n", ia64_ttag(addr)); 2691} 2692 2693#endif 2694