1/* $NetBSD: pmap.c,v 1.257 2012/02/02 18:59:44 para Exp $ */ 2 3/*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center and by Chris G. Demetriou. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (c) 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 66 */ 67 68/* 69 * DEC Alpha physical map management code. 70 * 71 * History: 72 * 73 * This pmap started life as a Motorola 68851/68030 pmap, 74 * written by Mike Hibler at the University of Utah. 75 * 76 * It was modified for the DEC Alpha by Chris Demetriou 77 * at Carnegie Mellon University. 78 * 79 * Support for non-contiguous physical memory was added by 80 * Jason R. Thorpe of the Numerical Aerospace Simulation 81 * Facility, NASA Ames Research Center and Chris Demetriou. 82 * 83 * Page table management and a major cleanup were undertaken 84 * by Jason R. Thorpe, with lots of help from Ross Harvey of 85 * Avalon Computer Systems and from Chris Demetriou. 86 * 87 * Support for the new UVM pmap interface was written by 88 * Jason R. Thorpe. 89 * 90 * Support for ASNs was written by Jason R. Thorpe, again 91 * with help from Chris Demetriou and Ross Harvey. 92 * 93 * The locking protocol was written by Jason R. Thorpe, 94 * using Chuck Cranor's i386 pmap for UVM as a model. 95 * 96 * TLB shootdown code was written by Jason R. Thorpe. 97 * 98 * Multiprocessor modifications by Andrew Doran. 99 * 100 * Notes: 101 * 102 * All page table access is done via K0SEG. The one exception 103 * to this is for kernel mappings. Since all kernel page 104 * tables are pre-allocated, we can use the Virtual Page Table 105 * to access PTEs that map K1SEG addresses. 106 * 107 * Kernel page table pages are statically allocated in 108 * pmap_bootstrap(), and are never freed. In the future, 109 * support for dynamically adding additional kernel page 110 * table pages may be added. User page table pages are 111 * dynamically allocated and freed. 112 * 113 * Bugs/misfeatures: 114 * 115 * - Some things could be optimized. 116 */ 117 118/* 119 * Manages physical address maps. 120 * 121 * Since the information managed by this module is 122 * also stored by the logical address mapping module, 123 * this module may throw away valid virtual-to-physical 124 * mappings at almost any time. However, invalidations 125 * of virtual-to-physical mappings must be done as 126 * requested. 127 * 128 * In order to cope with hardware architectures which 129 * make virtual-to-physical map invalidates expensive, 130 * this module may delay invalidate or reduced protection 131 * operations until such time as they are actually 132 * necessary. This module is given full information as 133 * to which processors are currently using which maps, 134 * and to when physical maps must be made correct. 135 */ 136 137#include "opt_lockdebug.h" 138#include "opt_sysv.h" 139#include "opt_multiprocessor.h" 140 141#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 142 143__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.257 2012/02/02 18:59:44 para Exp $"); 144 145#include <sys/param.h> 146#include <sys/systm.h> 147#include <sys/kernel.h> 148#include <sys/proc.h> 149#include <sys/malloc.h> 150#include <sys/pool.h> 151#include <sys/buf.h> 152#include <sys/shm.h> 153#include <sys/atomic.h> 154#include <sys/cpu.h> 155 156#include <uvm/uvm.h> 157 158#if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) 159#include <machine/rpb.h> 160#endif 161 162#ifdef DEBUG 163#define PDB_FOLLOW 0x0001 164#define PDB_INIT 0x0002 165#define PDB_ENTER 0x0004 166#define PDB_REMOVE 0x0008 167#define PDB_CREATE 0x0010 168#define PDB_PTPAGE 0x0020 169#define PDB_ASN 0x0040 170#define PDB_BITS 0x0080 171#define PDB_COLLECT 0x0100 172#define PDB_PROTECT 0x0200 173#define PDB_BOOTSTRAP 0x1000 174#define PDB_PARANOIA 0x2000 175#define PDB_WIRING 0x4000 176#define PDB_PVDUMP 0x8000 177 178int debugmap = 0; 179int pmapdebug = PDB_PARANOIA; 180#endif 181 182/* 183 * Given a map and a machine independent protection code, 184 * convert to an alpha protection code. 185 */ 186#define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 187static int protection_codes[2][8]; 188 189/* 190 * kernel_lev1map: 191 * 192 * Kernel level 1 page table. This maps all kernel level 2 193 * page table pages, and is used as a template for all user 194 * pmap level 1 page tables. When a new user level 1 page 195 * table is allocated, all kernel_lev1map PTEs for kernel 196 * addresses are copied to the new map. 197 * 198 * The kernel also has an initial set of kernel level 2 page 199 * table pages. These map the kernel level 3 page table pages. 200 * As kernel level 3 page table pages are added, more level 2 201 * page table pages may be added to map them. These pages are 202 * never freed. 203 * 204 * Finally, the kernel also has an initial set of kernel level 205 * 3 page table pages. These map pages in K1SEG. More level 206 * 3 page table pages may be added at run-time if additional 207 * K1SEG address space is required. These pages are never freed. 208 * 209 * NOTE: When mappings are inserted into the kernel pmap, all 210 * level 2 and level 3 page table pages must already be allocated 211 * and mapped into the parent page table. 212 */ 213pt_entry_t *kernel_lev1map; 214 215/* 216 * Virtual Page Table. 217 */ 218static pt_entry_t *VPT; 219 220static struct pmap kernel_pmap_store 221 [(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1) 222 / sizeof(struct pmap)]; 223struct pmap *const kernel_pmap_ptr = kernel_pmap_store; 224 225paddr_t avail_start; /* PA of first available physical page */ 226paddr_t avail_end; /* PA of last available physical page */ 227static vaddr_t virtual_end; /* VA of last avail page (end of kernel AS) */ 228 229static bool pmap_initialized; /* Has pmap_init completed? */ 230 231u_long pmap_pages_stolen; /* instrumentation */ 232 233/* 234 * This variable contains the number of CPU IDs we need to allocate 235 * space for when allocating the pmap structure. It is used to 236 * size a per-CPU array of ASN and ASN Generation number. 237 */ 238static u_long pmap_ncpuids; 239 240#ifndef PMAP_PV_LOWAT 241#define PMAP_PV_LOWAT 16 242#endif 243int pmap_pv_lowat = PMAP_PV_LOWAT; 244 245/* 246 * List of all pmaps, used to update them when e.g. additional kernel 247 * page tables are allocated. This list is kept LRU-ordered by 248 * pmap_activate(). 249 */ 250static TAILQ_HEAD(, pmap) pmap_all_pmaps; 251 252/* 253 * The pools from which pmap structures and sub-structures are allocated. 254 */ 255static struct pool_cache pmap_pmap_cache; 256static struct pool_cache pmap_l1pt_cache; 257static struct pool_cache pmap_pv_cache; 258 259/* 260 * Address Space Numbers. 261 * 262 * On many implementations of the Alpha architecture, the TLB entries and 263 * I-cache blocks are tagged with a unique number within an implementation- 264 * specified range. When a process context becomes active, the ASN is used 265 * to match TLB entries; if a TLB entry for a particular VA does not match 266 * the current ASN, it is ignored (one could think of the processor as 267 * having a collection of <max ASN> separate TLBs). This allows operating 268 * system software to skip the TLB flush that would otherwise be necessary 269 * at context switch time. 270 * 271 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 272 * causes TLB entries to match any ASN. The PALcode also provides 273 * a TBI (Translation Buffer Invalidate) operation that flushes all 274 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 275 * mappings, so that invalidation of all user mappings does not invalidate 276 * kernel mappings (which are consistent across all processes). 277 * 278 * pmap_next_asn always indicates to the next ASN to use. When 279 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 280 * 281 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 282 * TLB entries and the I-cache are flushed, the generation number is bumped, 283 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 284 * 285 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 286 * prevents the following scenario: 287 * 288 * * New ASN generation starts, and process A is given ASN #0. 289 * 290 * * A new process B (and thus new pmap) is created. The ASN, 291 * for lack of a better value, is initialized to 0. 292 * 293 * * Process B runs. It is now using the TLB entries tagged 294 * by process A. *poof* 295 * 296 * In the scenario above, in addition to the processor using using incorrect 297 * TLB entires, the PALcode might use incorrect information to service a 298 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table 299 * to locate the PTE for a faulting address, and tagged TLB entires exist 300 * for the Virtual Page Table addresses in order to speed up this procedure, 301 * as well.) 302 * 303 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that 304 * new pmaps will initially run with no TLB entries for user addresses 305 * or VPT mappings that map user page tables. Since kernel_lev1map only 306 * contains mappings for kernel addresses, and since those mappings 307 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is 308 * safe (since PG_ASM mappings match any ASN). 309 * 310 * On processors that do not support ASNs, the PALcode invalidates 311 * the TLB and I-cache automatically on swpctx. We still still go 312 * through the motions of assigning an ASN (really, just refreshing 313 * the ASN generation in this particular case) to keep the logic sane 314 * in other parts of the code. 315 */ 316static u_int pmap_max_asn; /* max ASN supported by the system */ 317 /* next ASN and cur ASN generation */ 318static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; 319 320/* 321 * Locking: 322 * 323 * READ/WRITE LOCKS 324 * ---------------- 325 * 326 * * pmap_main_lock - This lock is used to prevent deadlock and/or 327 * provide mutex access to the pmap module. Most operations lock 328 * the pmap first, then PV lists as needed. However, some operations, 329 * such as pmap_page_protect(), lock the PV lists before locking 330 * the pmaps. To prevent deadlock, we require a mutex lock on the 331 * pmap module if locking in the PV->pmap direction. This is 332 * implemented by acquiring a (shared) read lock on pmap_main_lock 333 * if locking pmap->PV and a (exclusive) write lock if locking in 334 * the PV->pmap direction. Since only one thread can hold a write 335 * lock at a time, this provides the mutex. 336 * 337 * MUTEXES 338 * ------- 339 * 340 * * pm_lock (per-pmap) - This lock protects all of the members 341 * of the pmap structure itself. This lock will be asserted 342 * in pmap_activate() and pmap_deactivate() from a critical 343 * section of mi_switch(), and must never sleep. Note that 344 * in the case of the kernel pmap, interrupts which cause 345 * memory allocation *must* be blocked while this lock is 346 * asserted. 347 * 348 * * pvh_lock (global hash) - These locks protects the PV lists 349 * for managed pages. 350 * 351 * * pmap_all_pmaps_lock - This lock protects the global list of 352 * all pmaps. Note that a pm_lock must never be held while this 353 * lock is held. 354 * 355 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 356 * and the virtual_end variable. 357 * 358 * There is a lock ordering constraint for pmap_growkernel_lock. 359 * pmap_growkernel() acquires the locks in the following order: 360 * 361 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 362 * pmap->pm_lock 363 * 364 * We need to ensure consistency between user pmaps and the 365 * kernel_lev1map. For this reason, pmap_growkernel_lock must 366 * be held to prevent kernel_lev1map changing across pmaps 367 * being added to / removed from the global pmaps list. 368 * 369 * Address space number management (global ASN counters and per-pmap 370 * ASN state) are not locked; they use arrays of values indexed 371 * per-processor. 372 * 373 * All internal functions which operate on a pmap are called 374 * with the pmap already locked by the caller (which will be 375 * an interface function). 376 */ 377static krwlock_t pmap_main_lock; 378static kmutex_t pmap_all_pmaps_lock; 379static krwlock_t pmap_growkernel_lock; 380 381#define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 382#define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 383#define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 384#define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 385 386struct { 387 kmutex_t lock; 388} __aligned(64) static pmap_pvh_locks[64] __aligned(64); 389 390static inline kmutex_t * 391pmap_pvh_lock(struct vm_page *pg) 392{ 393 394 /* Cut bits 11-6 out of page address and use directly as offset. */ 395 return (kmutex_t *)((uintptr_t)&pmap_pvh_locks + 396 ((uintptr_t)pg & (63 << 6))); 397} 398 399#if defined(MULTIPROCESSOR) 400/* 401 * TLB Shootdown: 402 * 403 * When a mapping is changed in a pmap, the TLB entry corresponding to 404 * the virtual address must be invalidated on all processors. In order 405 * to accomplish this on systems with multiple processors, messages are 406 * sent from the processor which performs the mapping change to all 407 * processors on which the pmap is active. For other processors, the 408 * ASN generation numbers for that processor is invalidated, so that 409 * the next time the pmap is activated on that processor, a new ASN 410 * will be allocated (which implicitly invalidates all TLB entries). 411 * 412 * Note, we can use the pool allocator to allocate job entries 413 * since pool pages are mapped with K0SEG, not with the TLB. 414 */ 415struct pmap_tlb_shootdown_job { 416 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list; 417 vaddr_t pj_va; /* virtual address */ 418 pmap_t pj_pmap; /* the pmap which maps the address */ 419 pt_entry_t pj_pte; /* the PTE bits */ 420}; 421 422static struct pmap_tlb_shootdown_q { 423 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head; /* queue 16b */ 424 kmutex_t pq_lock; /* spin lock on queue 16b */ 425 int pq_pte; /* aggregate PTE bits 4b */ 426 int pq_count; /* number of pending requests 4b */ 427 int pq_tbia; /* pending global flush 4b */ 428 uint8_t pq_pad[64-16-16-4-4-4]; /* pad to 64 bytes */ 429} pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE); 430 431/* If we have more pending jobs than this, we just nail the whole TLB. */ 432#define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 433 434static struct pool_cache pmap_tlb_shootdown_job_cache; 435#endif /* MULTIPROCESSOR */ 436 437/* 438 * Internal routines 439 */ 440static void alpha_protection_init(void); 441static bool pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long); 442static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long); 443 444/* 445 * PT page management functions. 446 */ 447static int pmap_lev1map_create(pmap_t, long); 448static void pmap_lev1map_destroy(pmap_t, long); 449static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 450static void pmap_ptpage_free(pmap_t, pt_entry_t *); 451static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long); 452static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long); 453static void pmap_l1pt_delref(pmap_t, pt_entry_t *, long); 454 455static void *pmap_l1pt_alloc(struct pool *, int); 456static void pmap_l1pt_free(struct pool *, void *); 457 458static struct pool_allocator pmap_l1pt_allocator = { 459 pmap_l1pt_alloc, pmap_l1pt_free, 0, 460}; 461 462static int pmap_l1pt_ctor(void *, void *, int); 463 464/* 465 * PV table management functions. 466 */ 467static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 468 bool); 469static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool); 470static void *pmap_pv_page_alloc(struct pool *, int); 471static void pmap_pv_page_free(struct pool *, void *); 472 473static struct pool_allocator pmap_pv_page_allocator = { 474 pmap_pv_page_alloc, pmap_pv_page_free, 0, 475}; 476 477#ifdef DEBUG 478void pmap_pv_dump(paddr_t); 479#endif 480 481#define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 482#define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 483 484/* 485 * ASN management functions. 486 */ 487static void pmap_asn_alloc(pmap_t, long); 488 489/* 490 * Misc. functions. 491 */ 492static bool pmap_physpage_alloc(int, paddr_t *); 493static void pmap_physpage_free(paddr_t); 494static int pmap_physpage_addref(void *); 495static int pmap_physpage_delref(void *); 496 497/* 498 * PMAP_ISACTIVE{,_TEST}: 499 * 500 * Check to see if a pmap is active on the current processor. 501 */ 502#define PMAP_ISACTIVE_TEST(pm, cpu_id) \ 503 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0) 504 505#if defined(DEBUG) && !defined(MULTIPROCESSOR) 506#define PMAP_ISACTIVE(pm, cpu_id) \ 507({ \ 508 /* \ 509 * XXX This test is not MP-safe. \ 510 */ \ 511 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \ 512 \ 513 if ((curlwp->l_flag & LW_IDLE) != 0 && \ 514 curproc->p_vmspace != NULL && \ 515 ((curproc->p_sflag & PS_WEXIT) == 0) && \ 516 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \ 517 panic("PMAP_ISACTIVE"); \ 518 (isactive_); \ 519}) 520#else 521#define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id) 522#endif /* DEBUG && !MULTIPROCESSOR */ 523 524/* 525 * PMAP_ACTIVATE_ASN_SANITY: 526 * 527 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE. 528 */ 529#ifdef DEBUG 530#define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \ 531do { \ 532 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \ 533 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \ 534 \ 535 if ((pmap)->pm_lev1map == kernel_lev1map) { \ 536 /* \ 537 * This pmap implementation also ensures that pmaps \ 538 * referencing kernel_lev1map use a reserved ASN \ 539 * ASN to prevent the PALcode from servicing a TLB \ 540 * miss with the wrong PTE. \ 541 */ \ 542 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \ 543 printf("kernel_lev1map with non-reserved ASN " \ 544 "(line %d)\n", __LINE__); \ 545 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 546 } \ 547 } else { \ 548 if (__pma->pma_asngen != __cpma->pma_asngen) { \ 549 /* \ 550 * ASN generation number isn't valid! \ 551 */ \ 552 printf("pmap asngen %lu, current %lu " \ 553 "(line %d)\n", \ 554 __pma->pma_asngen, \ 555 __cpma->pma_asngen, \ 556 __LINE__); \ 557 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 558 } \ 559 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \ 560 /* \ 561 * DANGER WILL ROBINSON! We're going to \ 562 * pollute the VPT TLB entries! \ 563 */ \ 564 printf("Using reserved ASN! (line %d)\n", \ 565 __LINE__); \ 566 panic("PMAP_ACTIVATE_ASN_SANITY"); \ 567 } \ 568 } \ 569} while (/*CONSTCOND*/0) 570#else 571#define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */ 572#endif 573 574/* 575 * PMAP_ACTIVATE: 576 * 577 * This is essentially the guts of pmap_activate(), without 578 * ASN allocation. This is used by pmap_activate(), 579 * pmap_lev1map_create(), and pmap_lev1map_destroy(). 580 * 581 * This is called only when it is known that a pmap is "active" 582 * on the current processor; the ASN must already be valid. 583 */ 584#define PMAP_ACTIVATE(pmap, l, cpu_id) \ 585do { \ 586 struct pcb *pcb = lwp_getpcb(l); \ 587 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \ 588 \ 589 pcb->pcb_hw.apcb_ptbr = \ 590 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \ 591 pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn; \ 592 \ 593 if ((l) == curlwp) { \ 594 /* \ 595 * Page table base register has changed; switch to \ 596 * our own context again so that it will take effect. \ 597 */ \ 598 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); \ 599 } \ 600} while (/*CONSTCOND*/0) 601 602/* 603 * PMAP_SET_NEEDISYNC: 604 * 605 * Mark that a user pmap needs an I-stream synch on its 606 * way back out to userspace. 607 */ 608#define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL 609 610/* 611 * PMAP_SYNC_ISTREAM: 612 * 613 * Synchronize the I-stream for the specified pmap. For user 614 * pmaps, this is deferred until a process using the pmap returns 615 * to userspace. 616 */ 617#if defined(MULTIPROCESSOR) 618#define PMAP_SYNC_ISTREAM_KERNEL() \ 619do { \ 620 alpha_pal_imb(); \ 621 alpha_broadcast_ipi(ALPHA_IPI_IMB); \ 622} while (/*CONSTCOND*/0) 623 624#define PMAP_SYNC_ISTREAM_USER(pmap) \ 625do { \ 626 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \ 627 /* for curcpu, will happen in userret() */ \ 628} while (/*CONSTCOND*/0) 629#else 630#define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb() 631#define PMAP_SYNC_ISTREAM_USER(pmap) /* will happen in userret() */ 632#endif /* MULTIPROCESSOR */ 633 634#define PMAP_SYNC_ISTREAM(pmap) \ 635do { \ 636 if ((pmap) == pmap_kernel()) \ 637 PMAP_SYNC_ISTREAM_KERNEL(); \ 638 else \ 639 PMAP_SYNC_ISTREAM_USER(pmap); \ 640} while (/*CONSTCOND*/0) 641 642/* 643 * PMAP_INVALIDATE_ASN: 644 * 645 * Invalidate the specified pmap's ASN, so as to force allocation 646 * of a new one the next time pmap_asn_alloc() is called. 647 * 648 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING 649 * CONDITIONS ARE true: 650 * 651 * (1) The pmap references the global kernel_lev1map. 652 * 653 * (2) The pmap is not active on the current processor. 654 */ 655#define PMAP_INVALIDATE_ASN(pmap, cpu_id) \ 656do { \ 657 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \ 658} while (/*CONSTCOND*/0) 659 660/* 661 * PMAP_INVALIDATE_TLB: 662 * 663 * Invalidate the TLB entry for the pmap/va pair. 664 */ 665#define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \ 666do { \ 667 if ((hadasm) || (isactive)) { \ 668 /* \ 669 * Simply invalidating the TLB entry and I-cache \ 670 * works in this case. \ 671 */ \ 672 ALPHA_TBIS((va)); \ 673 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \ 674 pmap_asn_info[(cpu_id)].pma_asngen) { \ 675 /* \ 676 * We can't directly invalidate the TLB entry \ 677 * in this case, so we have to force allocation \ 678 * of a new ASN the next time this pmap becomes \ 679 * active. \ 680 */ \ 681 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \ 682 } \ 683 /* \ 684 * Nothing to do in this case; the next time the \ 685 * pmap becomes active on this processor, a new \ 686 * ASN will be allocated anyway. \ 687 */ \ 688} while (/*CONSTCOND*/0) 689 690/* 691 * PMAP_KERNEL_PTE: 692 * 693 * Get a kernel PTE. 694 * 695 * If debugging, do a table walk. If not debugging, just use 696 * the Virtual Page Table, since all kernel page tables are 697 * pre-allocated and mapped in. 698 */ 699#ifdef DEBUG 700#define PMAP_KERNEL_PTE(va) \ 701({ \ 702 pt_entry_t *l1pte_, *l2pte_; \ 703 \ 704 l1pte_ = pmap_l1pte(pmap_kernel(), va); \ 705 if (pmap_pte_v(l1pte_) == 0) { \ 706 printf("kernel level 1 PTE not valid, va 0x%lx " \ 707 "(line %d)\n", (va), __LINE__); \ 708 panic("PMAP_KERNEL_PTE"); \ 709 } \ 710 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \ 711 if (pmap_pte_v(l2pte_) == 0) { \ 712 printf("kernel level 2 PTE not valid, va 0x%lx " \ 713 "(line %d)\n", (va), __LINE__); \ 714 panic("PMAP_KERNEL_PTE"); \ 715 } \ 716 pmap_l3pte(pmap_kernel(), va, l2pte_); \ 717}) 718#else 719#define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 720#endif 721 722/* 723 * PMAP_SET_PTE: 724 * 725 * Set a PTE to a specified value. 726 */ 727#define PMAP_SET_PTE(ptep, val) *(ptep) = (val) 728 729/* 730 * PMAP_STAT_{INCR,DECR}: 731 * 732 * Increment or decrement a pmap statistic. 733 */ 734#define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 735#define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 736 737/* 738 * pmap_bootstrap: 739 * 740 * Bootstrap the system to run with virtual memory. 741 * 742 * Note: no locking is necessary in this function. 743 */ 744void 745pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 746{ 747 vsize_t lev2mapsize, lev3mapsize; 748 pt_entry_t *lev2map, *lev3map; 749 pt_entry_t pte; 750 vsize_t bufsz; 751 struct pcb *pcb; 752 int i; 753 754#ifdef DEBUG 755 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 756 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 757#endif 758 759 /* 760 * Compute the number of pages kmem_arena will have. 761 */ 762 kmeminit_nkmempages(); 763 764 /* 765 * Figure out how many initial PTE's are necessary to map the 766 * kernel. We also reserve space for kmem_alloc_pageable() 767 * for vm_fork(). 768 */ 769 770 /* Get size of buffer cache and set an upper limit */ 771 bufsz = buf_memcalc(); 772 buf_setvalimit(bufsz); 773 774 lev3mapsize = 775 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 776 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 777 (maxproc * UPAGES) + nkmempages; 778 779#ifdef SYSVSHM 780 lev3mapsize += shminfo.shmall; 781#endif 782 lev3mapsize = roundup(lev3mapsize, NPTEPG); 783 784 /* 785 * Initialize `FYI' variables. Note we're relying on 786 * the fact that BSEARCH sorts the vm_physmem[] array 787 * for us. 788 */ 789 avail_start = ptoa(VM_PHYSMEM_PTR(0)->start); 790 avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end); 791 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 792 793#if 0 794 printf("avail_start = 0x%lx\n", avail_start); 795 printf("avail_end = 0x%lx\n", avail_end); 796 printf("virtual_end = 0x%lx\n", virtual_end); 797#endif 798 799 /* 800 * Allocate a level 1 PTE table for the kernel. 801 * This is always one page long. 802 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 803 */ 804 kernel_lev1map = (pt_entry_t *) 805 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 806 807 /* 808 * Allocate a level 2 PTE table for the kernel. 809 * These must map all of the level3 PTEs. 810 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 811 */ 812 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 813 lev2map = (pt_entry_t *) 814 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 815 816 /* 817 * Allocate a level 3 PTE table for the kernel. 818 * Contains lev3mapsize PTEs. 819 */ 820 lev3map = (pt_entry_t *) 821 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 822 823 /* 824 * Set up level 1 page table 825 */ 826 827 /* Map all of the level 2 pte pages */ 828 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 829 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 830 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 831 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 832 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 833 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 834 } 835 836 /* Map the virtual page table */ 837 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 838 << PG_SHIFT; 839 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 840 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 841 VPT = (pt_entry_t *)VPTBASE; 842 843#ifdef _PMAP_MAY_USE_PROM_CONSOLE 844 { 845 extern pt_entry_t prom_pte; /* XXX */ 846 extern int prom_mapped; /* XXX */ 847 848 if (pmap_uses_prom_console()) { 849 /* 850 * XXX Save old PTE so we can remap the PROM, if 851 * XXX necessary. 852 */ 853 prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM; 854 } 855 prom_mapped = 0; 856 857 /* 858 * Actually, this code lies. The prom is still mapped, and will 859 * remain so until the context switch after alpha_init() returns. 860 */ 861 } 862#endif 863 864 /* 865 * Set up level 2 page table. 866 */ 867 /* Map all of the level 3 pte pages */ 868 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 869 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 870 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 871 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 872 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+ 873 (i*PAGE_SIZE*NPTEPG))] = pte; 874 } 875 876 /* Initialize the pmap_growkernel_lock. */ 877 rw_init(&pmap_growkernel_lock); 878 879 /* 880 * Set up level three page table (lev3map) 881 */ 882 /* Nothing to do; it's already zero'd */ 883 884 /* 885 * Initialize the pmap pools and list. 886 */ 887 pmap_ncpuids = ncpuids; 888 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0, 889 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 890 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 891 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 892 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 893 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 894 NULL, NULL); 895 896 TAILQ_INIT(&pmap_all_pmaps); 897 898 /* 899 * Initialize the ASN logic. 900 */ 901 pmap_max_asn = maxasn; 902 for (i = 0; i < ALPHA_MAXPROCS; i++) { 903 pmap_asn_info[i].pma_asn = 1; 904 pmap_asn_info[i].pma_asngen = 0; 905 } 906 907 /* 908 * Initialize the locks. 909 */ 910 rw_init(&pmap_main_lock); 911 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 912 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 913 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 914 } 915 916 /* 917 * Initialize kernel pmap. Note that all kernel mappings 918 * have PG_ASM set, so the ASN doesn't really matter for 919 * the kernel pmap. Also, since the kernel pmap always 920 * references kernel_lev1map, it always has an invalid ASN 921 * generation. 922 */ 923 memset(pmap_kernel(), 0, sizeof(struct pmap)); 924 pmap_kernel()->pm_lev1map = kernel_lev1map; 925 pmap_kernel()->pm_count = 1; 926 for (i = 0; i < ALPHA_MAXPROCS; i++) { 927 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 928 pmap_kernel()->pm_asni[i].pma_asngen = 929 pmap_asn_info[i].pma_asngen; 930 } 931 mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); 932 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 933 934#if defined(MULTIPROCESSOR) 935 /* 936 * Initialize the TLB shootdown queues. 937 */ 938 pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache, 939 sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE, 940 0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL); 941 for (i = 0; i < ALPHA_MAXPROCS; i++) { 942 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); 943 mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, 944 IPL_SCHED); 945 } 946#endif 947 948 /* 949 * Set up lwp0's PCB such that the ptbr points to the right place 950 * and has the kernel pmap's (really unused) ASN. 951 */ 952 pcb = lwp_getpcb(&lwp0); 953 pcb->pcb_hw.apcb_ptbr = 954 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 955 pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn; 956 957 /* 958 * Mark the kernel pmap `active' on this processor. 959 */ 960 atomic_or_ulong(&pmap_kernel()->pm_cpus, 961 (1UL << cpu_number())); 962} 963 964#ifdef _PMAP_MAY_USE_PROM_CONSOLE 965int 966pmap_uses_prom_console(void) 967{ 968 969 return (cputype == ST_DEC_21000); 970} 971#endif /* _PMAP_MAY_USE_PROM_CONSOLE */ 972 973/* 974 * pmap_virtual_space: [ INTERFACE ] 975 * 976 * Define the initial bounds of the kernel virtual address space. 977 */ 978void 979pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 980{ 981 982 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 983 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 984} 985 986/* 987 * pmap_steal_memory: [ INTERFACE ] 988 * 989 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 990 * This function allows for early dynamic memory allocation until the 991 * virtual memory system has been bootstrapped. After that point, either 992 * kmem_alloc or malloc should be used. This function works by stealing 993 * pages from the (to be) managed page pool, then implicitly mapping the 994 * pages (by using their k0seg addresses) and zeroing them. 995 * 996 * It may be used once the physical memory segments have been pre-loaded 997 * into the vm_physmem[] array. Early memory allocation MUST use this 998 * interface! This cannot be used after vm_page_startup(), and will 999 * generate a panic if tried. 1000 * 1001 * Note that this memory will never be freed, and in essence it is wired 1002 * down. 1003 * 1004 * We must adjust *vstartp and/or *vendp iff we use address space 1005 * from the kernel virtual address range defined by pmap_virtual_space(). 1006 * 1007 * Note: no locking is necessary in this function. 1008 */ 1009vaddr_t 1010pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1011{ 1012 int bank, npgs, x; 1013 vaddr_t va; 1014 paddr_t pa; 1015 1016 size = round_page(size); 1017 npgs = atop(size); 1018 1019#if 0 1020 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1021#endif 1022 1023 for (bank = 0; bank < vm_nphysseg; bank++) { 1024 if (uvm.page_init_done == true) 1025 panic("pmap_steal_memory: called _after_ bootstrap"); 1026 1027#if 0 1028 printf(" bank %d: avail_start 0x%lx, start 0x%lx, " 1029 "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start, 1030 VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end); 1031#endif 1032 1033 if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start || 1034 VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end) 1035 continue; 1036 1037#if 0 1038 printf(" avail_end - avail_start = 0x%lx\n", 1039 VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start); 1040#endif 1041 1042 if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start) 1043 < npgs) 1044 continue; 1045 1046 /* 1047 * There are enough pages here; steal them! 1048 */ 1049 pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start); 1050 VM_PHYSMEM_PTR(bank)->avail_start += npgs; 1051 VM_PHYSMEM_PTR(bank)->start += npgs; 1052 1053 /* 1054 * Have we used up this segment? 1055 */ 1056 if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) { 1057 if (vm_nphysseg == 1) 1058 panic("pmap_steal_memory: out of memory!"); 1059 1060 /* Remove this segment from the list. */ 1061 vm_nphysseg--; 1062 for (x = bank; x < vm_nphysseg; x++) { 1063 /* structure copy */ 1064 VM_PHYSMEM_PTR_SWAP(x, x + 1); 1065 } 1066 } 1067 1068 va = ALPHA_PHYS_TO_K0SEG(pa); 1069 memset((void *)va, 0, size); 1070 pmap_pages_stolen += npgs; 1071 return (va); 1072 } 1073 1074 /* 1075 * If we got here, this was no memory left. 1076 */ 1077 panic("pmap_steal_memory: no memory to steal"); 1078} 1079 1080/* 1081 * pmap_init: [ INTERFACE ] 1082 * 1083 * Initialize the pmap module. Called by vm_init(), to initialize any 1084 * structures that the pmap system needs to map virtual memory. 1085 * 1086 * Note: no locking is necessary in this function. 1087 */ 1088void 1089pmap_init(void) 1090{ 1091 1092#ifdef DEBUG 1093 if (pmapdebug & PDB_FOLLOW) 1094 printf("pmap_init()\n"); 1095#endif 1096 1097 /* initialize protection array */ 1098 alpha_protection_init(); 1099 1100 /* 1101 * Set a low water mark on the pv_entry pool, so that we are 1102 * more likely to have these around even in extreme memory 1103 * starvation. 1104 */ 1105 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1106 1107 /* 1108 * Now it is safe to enable pv entry recording. 1109 */ 1110 pmap_initialized = true; 1111 1112#if 0 1113 for (bank = 0; bank < vm_nphysseg; bank++) { 1114 printf("bank %d\n", bank); 1115 printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start)); 1116 printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end)); 1117 printf("\tavail_start = 0x%x\n", 1118 ptoa(VM_PHYSMEM_PTR(bank)->avail_start)); 1119 printf("\tavail_end = 0x%x\n", 1120 ptoa(VM_PHYSMEM_PTR(bank)->avail_end)); 1121 } 1122#endif 1123} 1124 1125/* 1126 * pmap_create: [ INTERFACE ] 1127 * 1128 * Create and return a physical map. 1129 * 1130 * Note: no locking is necessary in this function. 1131 */ 1132pmap_t 1133pmap_create(void) 1134{ 1135 pmap_t pmap; 1136 int i; 1137 1138#ifdef DEBUG 1139 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1140 printf("pmap_create()\n"); 1141#endif 1142 1143 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1144 memset(pmap, 0, sizeof(*pmap)); 1145 1146 /* 1147 * Defer allocation of a new level 1 page table until 1148 * the first new mapping is entered; just take a reference 1149 * to the kernel kernel_lev1map. 1150 */ 1151 pmap->pm_lev1map = kernel_lev1map; 1152 1153 pmap->pm_count = 1; 1154 for (i = 0; i < pmap_ncpuids; i++) { 1155 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED; 1156 /* XXX Locking? */ 1157 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; 1158 } 1159 mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1160 1161 try_again: 1162 rw_enter(&pmap_growkernel_lock, RW_READER); 1163 1164 if (pmap_lev1map_create(pmap, cpu_number()) != 0) { 1165 rw_exit(&pmap_growkernel_lock); 1166 (void) kpause("pmap_create", false, hz >> 2, NULL); 1167 goto try_again; 1168 } 1169 1170 mutex_enter(&pmap_all_pmaps_lock); 1171 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1172 mutex_exit(&pmap_all_pmaps_lock); 1173 1174 rw_exit(&pmap_growkernel_lock); 1175 1176 return (pmap); 1177} 1178 1179/* 1180 * pmap_destroy: [ INTERFACE ] 1181 * 1182 * Drop the reference count on the specified pmap, releasing 1183 * all resources if the reference count drops to zero. 1184 */ 1185void 1186pmap_destroy(pmap_t pmap) 1187{ 1188 1189#ifdef DEBUG 1190 if (pmapdebug & PDB_FOLLOW) 1191 printf("pmap_destroy(%p)\n", pmap); 1192#endif 1193 1194 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1195 return; 1196 1197 rw_enter(&pmap_growkernel_lock, RW_READER); 1198 1199 /* 1200 * Remove it from the global list of all pmaps. 1201 */ 1202 mutex_enter(&pmap_all_pmaps_lock); 1203 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1204 mutex_exit(&pmap_all_pmaps_lock); 1205 1206 pmap_lev1map_destroy(pmap, cpu_number()); 1207 1208 rw_exit(&pmap_growkernel_lock); 1209 1210 /* 1211 * Since the pmap is supposed to contain no valid 1212 * mappings at this point, we should always see 1213 * kernel_lev1map here. 1214 */ 1215 KASSERT(pmap->pm_lev1map == kernel_lev1map); 1216 1217 mutex_destroy(&pmap->pm_lock); 1218 pool_cache_put(&pmap_pmap_cache, pmap); 1219} 1220 1221/* 1222 * pmap_reference: [ INTERFACE ] 1223 * 1224 * Add a reference to the specified pmap. 1225 */ 1226void 1227pmap_reference(pmap_t pmap) 1228{ 1229 1230#ifdef DEBUG 1231 if (pmapdebug & PDB_FOLLOW) 1232 printf("pmap_reference(%p)\n", pmap); 1233#endif 1234 1235 atomic_inc_uint(&pmap->pm_count); 1236} 1237 1238/* 1239 * pmap_remove: [ INTERFACE ] 1240 * 1241 * Remove the given range of addresses from the specified map. 1242 * 1243 * It is assumed that the start and end are properly 1244 * rounded to the page size. 1245 */ 1246void 1247pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1248{ 1249 pt_entry_t *l1pte, *l2pte, *l3pte; 1250 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte; 1251 vaddr_t l1eva, l2eva, vptva; 1252 bool needisync = false; 1253 long cpu_id = cpu_number(); 1254 1255#ifdef DEBUG 1256 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1257 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1258#endif 1259 1260 /* 1261 * If this is the kernel pmap, we can use a faster method 1262 * for accessing the PTEs (since the PT pages are always 1263 * resident). 1264 * 1265 * Note that this routine should NEVER be called from an 1266 * interrupt context; pmap_kremove() is used for that. 1267 */ 1268 if (pmap == pmap_kernel()) { 1269 PMAP_MAP_TO_HEAD_LOCK(); 1270 PMAP_LOCK(pmap); 1271 1272 while (sva < eva) { 1273 l3pte = PMAP_KERNEL_PTE(sva); 1274 if (pmap_pte_v(l3pte)) { 1275#ifdef DIAGNOSTIC 1276 if (uvm_pageismanaged(pmap_pte_pa(l3pte)) && 1277 pmap_pte_pv(l3pte) == 0) 1278 panic("pmap_remove: managed page " 1279 "without PG_PVLIST for 0x%lx", 1280 sva); 1281#endif 1282 needisync |= pmap_remove_mapping(pmap, sva, 1283 l3pte, true, cpu_id); 1284 } 1285 sva += PAGE_SIZE; 1286 } 1287 1288 PMAP_UNLOCK(pmap); 1289 PMAP_MAP_TO_HEAD_UNLOCK(); 1290 1291 if (needisync) 1292 PMAP_SYNC_ISTREAM_KERNEL(); 1293 return; 1294 } 1295 1296#ifdef DIAGNOSTIC 1297 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS) 1298 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel " 1299 "address range", sva, eva); 1300#endif 1301 1302 PMAP_MAP_TO_HEAD_LOCK(); 1303 PMAP_LOCK(pmap); 1304 1305 /* 1306 * If we're already referencing the kernel_lev1map, there 1307 * is no work for us to do. 1308 */ 1309 if (pmap->pm_lev1map == kernel_lev1map) 1310 goto out; 1311 1312 saved_l1pte = l1pte = pmap_l1pte(pmap, sva); 1313 1314 /* 1315 * Add a reference to the L1 table to it won't get 1316 * removed from under us. 1317 */ 1318 pmap_physpage_addref(saved_l1pte); 1319 1320 for (; sva < eva; sva = l1eva, l1pte++) { 1321 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1322 if (pmap_pte_v(l1pte)) { 1323 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte); 1324 1325 /* 1326 * Add a reference to the L2 table so it won't 1327 * get removed from under us. 1328 */ 1329 pmap_physpage_addref(saved_l2pte); 1330 1331 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1332 l2eva = 1333 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1334 if (pmap_pte_v(l2pte)) { 1335 saved_l3pte = l3pte = 1336 pmap_l3pte(pmap, sva, l2pte); 1337 1338 /* 1339 * Add a reference to the L3 table so 1340 * it won't get removed from under us. 1341 */ 1342 pmap_physpage_addref(saved_l3pte); 1343 1344 /* 1345 * Remember this sva; if the L3 table 1346 * gets removed, we need to invalidate 1347 * the VPT TLB entry for it. 1348 */ 1349 vptva = sva; 1350 1351 for (; sva < l2eva && sva < eva; 1352 sva += PAGE_SIZE, l3pte++) { 1353 if (!pmap_pte_v(l3pte)) { 1354 continue; 1355 } 1356 needisync |= 1357 pmap_remove_mapping( 1358 pmap, sva, 1359 l3pte, true, 1360 cpu_id); 1361 } 1362 1363 /* 1364 * Remove the reference to the L3 1365 * table that we added above. This 1366 * may free the L3 table. 1367 */ 1368 pmap_l3pt_delref(pmap, vptva, 1369 saved_l3pte, cpu_id); 1370 } 1371 } 1372 1373 /* 1374 * Remove the reference to the L2 table that we 1375 * added above. This may free the L2 table. 1376 */ 1377 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id); 1378 } 1379 } 1380 1381 /* 1382 * Remove the reference to the L1 table that we added above. 1383 * This may free the L1 table. 1384 */ 1385 pmap_l1pt_delref(pmap, saved_l1pte, cpu_id); 1386 1387 if (needisync) 1388 PMAP_SYNC_ISTREAM_USER(pmap); 1389 1390 out: 1391 PMAP_UNLOCK(pmap); 1392 PMAP_MAP_TO_HEAD_UNLOCK(); 1393} 1394 1395/* 1396 * pmap_page_protect: [ INTERFACE ] 1397 * 1398 * Lower the permission for all mappings to a given page to 1399 * the permissions specified. 1400 */ 1401void 1402pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1403{ 1404 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1405 pmap_t pmap; 1406 pv_entry_t pv, nextpv; 1407 bool needkisync = false; 1408 long cpu_id = cpu_number(); 1409 kmutex_t *lock; 1410 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1411#ifdef DEBUG 1412 paddr_t pa = VM_PAGE_TO_PHYS(pg); 1413 1414 1415 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1416 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1417 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1418#endif 1419 1420 switch (prot) { 1421 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1422 case VM_PROT_READ|VM_PROT_WRITE: 1423 return; 1424 1425 /* copy_on_write */ 1426 case VM_PROT_READ|VM_PROT_EXECUTE: 1427 case VM_PROT_READ: 1428 PMAP_HEAD_TO_MAP_LOCK(); 1429 lock = pmap_pvh_lock(pg); 1430 mutex_enter(lock); 1431 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 1432 PMAP_LOCK(pv->pv_pmap); 1433 if (*pv->pv_pte & (PG_KWE | PG_UWE)) { 1434 *pv->pv_pte &= ~(PG_KWE | PG_UWE); 1435 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va, 1436 pmap_pte_asm(pv->pv_pte), 1437 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id); 1438 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va, 1439 pmap_pte_asm(pv->pv_pte)); 1440 } 1441 PMAP_UNLOCK(pv->pv_pmap); 1442 } 1443 mutex_exit(lock); 1444 PMAP_HEAD_TO_MAP_UNLOCK(); 1445 PMAP_TLB_SHOOTNOW(); 1446 return; 1447 1448 /* remove_all */ 1449 default: 1450 break; 1451 } 1452 1453 PMAP_HEAD_TO_MAP_LOCK(); 1454 lock = pmap_pvh_lock(pg); 1455 mutex_enter(lock); 1456 for (pv = md->pvh_list; pv != NULL; pv = nextpv) { 1457 nextpv = pv->pv_next; 1458 pmap = pv->pv_pmap; 1459 1460 PMAP_LOCK(pmap); 1461#ifdef DEBUG 1462 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 || 1463 pmap_pte_pa(pv->pv_pte) != pa) 1464 panic("pmap_page_protect: bad mapping"); 1465#endif 1466 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte, 1467 false, cpu_id) == true) { 1468 if (pmap == pmap_kernel()) 1469 needkisync |= true; 1470 else 1471 PMAP_SYNC_ISTREAM_USER(pmap); 1472 } 1473 PMAP_UNLOCK(pmap); 1474 } 1475 1476 if (needkisync) 1477 PMAP_SYNC_ISTREAM_KERNEL(); 1478 1479 mutex_exit(lock); 1480 PMAP_HEAD_TO_MAP_UNLOCK(); 1481} 1482 1483/* 1484 * pmap_protect: [ INTERFACE ] 1485 * 1486 * Set the physical protection on the specified range of this map 1487 * as requested. 1488 */ 1489void 1490pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1491{ 1492 pt_entry_t *l1pte, *l2pte, *l3pte, bits; 1493 bool isactive; 1494 bool hadasm; 1495 vaddr_t l1eva, l2eva; 1496 long cpu_id = cpu_number(); 1497 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1498 1499#ifdef DEBUG 1500 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 1501 printf("pmap_protect(%p, %lx, %lx, %x)\n", 1502 pmap, sva, eva, prot); 1503#endif 1504 1505 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1506 pmap_remove(pmap, sva, eva); 1507 return; 1508 } 1509 1510 PMAP_LOCK(pmap); 1511 1512 bits = pte_prot(pmap, prot); 1513 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1514 1515 l1pte = pmap_l1pte(pmap, sva); 1516 for (; sva < eva; sva = l1eva, l1pte++) { 1517 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1518 if (pmap_pte_v(l1pte)) { 1519 l2pte = pmap_l2pte(pmap, sva, l1pte); 1520 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1521 l2eva = 1522 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1523 if (pmap_pte_v(l2pte)) { 1524 l3pte = pmap_l3pte(pmap, sva, l2pte); 1525 for (; sva < l2eva && sva < eva; 1526 sva += PAGE_SIZE, l3pte++) { 1527 if (pmap_pte_v(l3pte) && 1528 pmap_pte_prot_chg(l3pte, 1529 bits)) { 1530 hadasm = 1531 (pmap_pte_asm(l3pte) 1532 != 0); 1533 pmap_pte_set_prot(l3pte, 1534 bits); 1535 PMAP_INVALIDATE_TLB( 1536 pmap, sva, hadasm, 1537 isactive, cpu_id); 1538 PMAP_TLB_SHOOTDOWN( 1539 pmap, sva, 1540 hadasm ? PG_ASM : 0); 1541 } 1542 } 1543 } 1544 } 1545 } 1546 } 1547 1548 PMAP_TLB_SHOOTNOW(); 1549 1550 if (prot & VM_PROT_EXECUTE) 1551 PMAP_SYNC_ISTREAM(pmap); 1552 1553 PMAP_UNLOCK(pmap); 1554} 1555 1556/* 1557 * pmap_enter: [ INTERFACE ] 1558 * 1559 * Insert the given physical page (p) at 1560 * the specified virtual address (v) in the 1561 * target physical map with the protection requested. 1562 * 1563 * If specified, the page will be wired down, meaning 1564 * that the related pte can not be reclaimed. 1565 * 1566 * Note: This is the only routine which MAY NOT lazy-evaluate 1567 * or lose information. That is, this routine must actually 1568 * insert this page into the given map NOW. 1569 */ 1570int 1571pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1572{ 1573 struct vm_page *pg; /* if != NULL, managed page */ 1574 pt_entry_t *pte, npte, opte; 1575 paddr_t opa; 1576 bool tflush = true; 1577 bool hadasm = false; /* XXX gcc -Wuninitialized */ 1578 bool needisync = false; 1579 bool setisync = false; 1580 bool isactive; 1581 bool wired; 1582 long cpu_id = cpu_number(); 1583 int error = 0; 1584 kmutex_t *lock; 1585 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1586 1587#ifdef DEBUG 1588 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1589 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 1590 pmap, va, pa, prot, flags); 1591#endif 1592 pg = PHYS_TO_VM_PAGE(pa); 1593 isactive = PMAP_ISACTIVE(pmap, cpu_id); 1594 wired = (flags & PMAP_WIRED) != 0; 1595 1596 /* 1597 * Determine what we need to do about the I-stream. If 1598 * VM_PROT_EXECUTE is set, we mark a user pmap as needing 1599 * an I-sync on the way back out to userspace. We always 1600 * need an immediate I-sync for the kernel pmap. 1601 */ 1602 if (prot & VM_PROT_EXECUTE) { 1603 if (pmap == pmap_kernel()) 1604 needisync = true; 1605 else { 1606 setisync = true; 1607 needisync = (pmap->pm_cpus != 0); 1608 } 1609 } 1610 1611 PMAP_MAP_TO_HEAD_LOCK(); 1612 PMAP_LOCK(pmap); 1613 1614 if (pmap == pmap_kernel()) { 1615#ifdef DIAGNOSTIC 1616 /* 1617 * Sanity check the virtual address. 1618 */ 1619 if (va < VM_MIN_KERNEL_ADDRESS) 1620 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va); 1621#endif 1622 pte = PMAP_KERNEL_PTE(va); 1623 } else { 1624 pt_entry_t *l1pte, *l2pte; 1625 1626#ifdef DIAGNOSTIC 1627 /* 1628 * Sanity check the virtual address. 1629 */ 1630 if (va >= VM_MAXUSER_ADDRESS) 1631 panic("pmap_enter: user pmap, invalid va 0x%lx", va); 1632#endif 1633 1634 KASSERT(pmap->pm_lev1map != kernel_lev1map); 1635 1636 /* 1637 * Check to see if the level 1 PTE is valid, and 1638 * allocate a new level 2 page table page if it's not. 1639 * A reference will be added to the level 2 table when 1640 * the level 3 table is created. 1641 */ 1642 l1pte = pmap_l1pte(pmap, va); 1643 if (pmap_pte_v(l1pte) == 0) { 1644 pmap_physpage_addref(l1pte); 1645 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 1646 if (error) { 1647 pmap_l1pt_delref(pmap, l1pte, cpu_id); 1648 if (flags & PMAP_CANFAIL) 1649 goto out; 1650 panic("pmap_enter: unable to create L2 PT " 1651 "page"); 1652 } 1653#ifdef DEBUG 1654 if (pmapdebug & PDB_PTPAGE) 1655 printf("pmap_enter: new level 2 table at " 1656 "0x%lx\n", pmap_pte_pa(l1pte)); 1657#endif 1658 } 1659 1660 /* 1661 * Check to see if the level 2 PTE is valid, and 1662 * allocate a new level 3 page table page if it's not. 1663 * A reference will be added to the level 3 table when 1664 * the mapping is validated. 1665 */ 1666 l2pte = pmap_l2pte(pmap, va, l1pte); 1667 if (pmap_pte_v(l2pte) == 0) { 1668 pmap_physpage_addref(l2pte); 1669 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 1670 if (error) { 1671 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 1672 if (flags & PMAP_CANFAIL) 1673 goto out; 1674 panic("pmap_enter: unable to create L3 PT " 1675 "page"); 1676 } 1677#ifdef DEBUG 1678 if (pmapdebug & PDB_PTPAGE) 1679 printf("pmap_enter: new level 3 table at " 1680 "0x%lx\n", pmap_pte_pa(l2pte)); 1681#endif 1682 } 1683 1684 /* 1685 * Get the PTE that will map the page. 1686 */ 1687 pte = pmap_l3pte(pmap, va, l2pte); 1688 } 1689 1690 /* Remember all of the old PTE; used for TBI check later. */ 1691 opte = *pte; 1692 1693 /* 1694 * Check to see if the old mapping is valid. If not, validate the 1695 * new one immediately. 1696 */ 1697 if (pmap_pte_v(pte) == 0) { 1698 /* 1699 * No need to invalidate the TLB in this case; an invalid 1700 * mapping won't be in the TLB, and a previously valid 1701 * mapping would have been flushed when it was invalidated. 1702 */ 1703 tflush = false; 1704 1705 /* 1706 * No need to synchronize the I-stream, either, for basically 1707 * the same reason. 1708 */ 1709 setisync = needisync = false; 1710 1711 if (pmap != pmap_kernel()) { 1712 /* 1713 * New mappings gain a reference on the level 3 1714 * table. 1715 */ 1716 pmap_physpage_addref(pte); 1717 } 1718 goto validate_enterpv; 1719 } 1720 1721 opa = pmap_pte_pa(pte); 1722 hadasm = (pmap_pte_asm(pte) != 0); 1723 1724 if (opa == pa) { 1725 /* 1726 * Mapping has not changed; must be a protection or 1727 * wiring change. 1728 */ 1729 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 1730#ifdef DEBUG 1731 if (pmapdebug & PDB_ENTER) 1732 printf("pmap_enter: wiring change -> %d\n", 1733 wired); 1734#endif 1735 /* 1736 * Adjust the wiring count. 1737 */ 1738 if (wired) 1739 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1740 else 1741 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1742 } 1743 1744 /* 1745 * Set the PTE. 1746 */ 1747 goto validate; 1748 } 1749 1750 /* 1751 * The mapping has changed. We need to invalidate the 1752 * old mapping before creating the new one. 1753 */ 1754#ifdef DEBUG 1755 if (pmapdebug & PDB_ENTER) 1756 printf("pmap_enter: removing old mapping 0x%lx\n", va); 1757#endif 1758 if (pmap != pmap_kernel()) { 1759 /* 1760 * Gain an extra reference on the level 3 table. 1761 * pmap_remove_mapping() will delete a reference, 1762 * and we don't want the table to be erroneously 1763 * freed. 1764 */ 1765 pmap_physpage_addref(pte); 1766 } 1767 needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id); 1768 1769 validate_enterpv: 1770 /* 1771 * Enter the mapping into the pv_table if appropriate. 1772 */ 1773 if (pg != NULL) { 1774 error = pmap_pv_enter(pmap, pg, va, pte, true); 1775 if (error) { 1776 pmap_l3pt_delref(pmap, va, pte, cpu_id); 1777 if (flags & PMAP_CANFAIL) 1778 goto out; 1779 panic("pmap_enter: unable to enter mapping in PV " 1780 "table"); 1781 } 1782 } 1783 1784 /* 1785 * Increment counters. 1786 */ 1787 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1788 if (wired) 1789 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 1790 1791 validate: 1792 /* 1793 * Build the new PTE. 1794 */ 1795 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 1796 if (pg != NULL) { 1797 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 1798 int attrs; 1799 1800#ifdef DIAGNOSTIC 1801 if ((flags & VM_PROT_ALL) & ~prot) 1802 panic("pmap_enter: access type exceeds prot"); 1803#endif 1804 lock = pmap_pvh_lock(pg); 1805 mutex_enter(lock); 1806 if (flags & VM_PROT_WRITE) 1807 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 1808 else if (flags & VM_PROT_ALL) 1809 md->pvh_attrs |= PGA_REFERENCED; 1810 attrs = md->pvh_attrs; 1811 mutex_exit(lock); 1812 1813 /* 1814 * Set up referenced/modified emulation for new mapping. 1815 */ 1816 if ((attrs & PGA_REFERENCED) == 0) 1817 npte |= PG_FOR | PG_FOW | PG_FOE; 1818 else if ((attrs & PGA_MODIFIED) == 0) 1819 npte |= PG_FOW; 1820 1821 /* 1822 * Mapping was entered on PV list. 1823 */ 1824 npte |= PG_PVLIST; 1825 } 1826 if (wired) 1827 npte |= PG_WIRED; 1828#ifdef DEBUG 1829 if (pmapdebug & PDB_ENTER) 1830 printf("pmap_enter: new pte = 0x%lx\n", npte); 1831#endif 1832 1833 /* 1834 * If the PALcode portion of the new PTE is the same as the 1835 * old PTE, no TBI is necessary. 1836 */ 1837 if (PG_PALCODE(opte) == PG_PALCODE(npte)) 1838 tflush = false; 1839 1840 /* 1841 * Set the new PTE. 1842 */ 1843 PMAP_SET_PTE(pte, npte); 1844 1845 /* 1846 * Invalidate the TLB entry for this VA and any appropriate 1847 * caches. 1848 */ 1849 if (tflush) { 1850 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 1851 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 1852 PMAP_TLB_SHOOTNOW(); 1853 } 1854 if (setisync) 1855 PMAP_SET_NEEDISYNC(pmap); 1856 if (needisync) 1857 PMAP_SYNC_ISTREAM(pmap); 1858 1859out: 1860 PMAP_UNLOCK(pmap); 1861 PMAP_MAP_TO_HEAD_UNLOCK(); 1862 1863 return error; 1864} 1865 1866/* 1867 * pmap_kenter_pa: [ INTERFACE ] 1868 * 1869 * Enter a va -> pa mapping into the kernel pmap without any 1870 * physical->virtual tracking. 1871 * 1872 * Note: no locking is necessary in this function. 1873 */ 1874void 1875pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1876{ 1877 pt_entry_t *pte, npte; 1878 long cpu_id = cpu_number(); 1879 bool needisync = false; 1880 pmap_t pmap = pmap_kernel(); 1881 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1882 1883#ifdef DEBUG 1884 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1885 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 1886 va, pa, prot); 1887#endif 1888 1889#ifdef DIAGNOSTIC 1890 /* 1891 * Sanity check the virtual address. 1892 */ 1893 if (va < VM_MIN_KERNEL_ADDRESS) 1894 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va); 1895#endif 1896 1897 pte = PMAP_KERNEL_PTE(va); 1898 1899 if (pmap_pte_v(pte) == 0) 1900 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 1901 if (pmap_pte_w(pte) == 0) 1902 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1903 1904 if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte)) 1905 needisync = true; 1906 1907 /* 1908 * Build the new PTE. 1909 */ 1910 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 1911 PG_V | PG_WIRED; 1912 1913 /* 1914 * Set the new PTE. 1915 */ 1916 PMAP_SET_PTE(pte, npte); 1917#if defined(MULTIPROCESSOR) 1918 alpha_mb(); /* XXX alpha_wmb()? */ 1919#endif 1920 1921 /* 1922 * Invalidate the TLB entry for this VA and any appropriate 1923 * caches. 1924 */ 1925 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1926 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1927 PMAP_TLB_SHOOTNOW(); 1928 1929 if (needisync) 1930 PMAP_SYNC_ISTREAM_KERNEL(); 1931} 1932 1933/* 1934 * pmap_kremove: [ INTERFACE ] 1935 * 1936 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1937 * for size bytes (assumed to be page rounded). 1938 */ 1939void 1940pmap_kremove(vaddr_t va, vsize_t size) 1941{ 1942 pt_entry_t *pte; 1943 bool needisync = false; 1944 long cpu_id = cpu_number(); 1945 pmap_t pmap = pmap_kernel(); 1946 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 1947 1948#ifdef DEBUG 1949 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 1950 printf("pmap_kremove(%lx, %lx)\n", 1951 va, size); 1952#endif 1953 1954#ifdef DIAGNOSTIC 1955 if (va < VM_MIN_KERNEL_ADDRESS) 1956 panic("pmap_kremove: user address"); 1957#endif 1958 1959 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 1960 pte = PMAP_KERNEL_PTE(va); 1961 if (pmap_pte_v(pte)) { 1962#ifdef DIAGNOSTIC 1963 if (pmap_pte_pv(pte)) 1964 panic("pmap_kremove: PG_PVLIST mapping for " 1965 "0x%lx", va); 1966#endif 1967 if (pmap_pte_exec(pte)) 1968 needisync = true; 1969 1970 /* Zap the mapping. */ 1971 PMAP_SET_PTE(pte, PG_NV); 1972#if defined(MULTIPROCESSOR) 1973 alpha_mb(); /* XXX alpha_wmb()? */ 1974#endif 1975 PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id); 1976 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM); 1977 1978 /* Update stats. */ 1979 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 1980 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 1981 } 1982 } 1983 1984 PMAP_TLB_SHOOTNOW(); 1985 1986 if (needisync) 1987 PMAP_SYNC_ISTREAM_KERNEL(); 1988} 1989 1990/* 1991 * pmap_unwire: [ INTERFACE ] 1992 * 1993 * Clear the wired attribute for a map/virtual-address pair. 1994 * 1995 * The mapping must already exist in the pmap. 1996 */ 1997void 1998pmap_unwire(pmap_t pmap, vaddr_t va) 1999{ 2000 pt_entry_t *pte; 2001 2002#ifdef DEBUG 2003 if (pmapdebug & PDB_FOLLOW) 2004 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2005#endif 2006 2007 PMAP_LOCK(pmap); 2008 2009 pte = pmap_l3pte(pmap, va, NULL); 2010#ifdef DIAGNOSTIC 2011 if (pte == NULL || pmap_pte_v(pte) == 0) 2012 panic("pmap_unwire"); 2013#endif 2014 2015 /* 2016 * If wiring actually changed (always?) clear the wire bit and 2017 * update the wire count. Note that wiring is not a hardware 2018 * characteristic so there is no need to invalidate the TLB. 2019 */ 2020 if (pmap_pte_w_chg(pte, 0)) { 2021 pmap_pte_set_w(pte, false); 2022 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2023 } 2024#ifdef DIAGNOSTIC 2025 else { 2026 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2027 "didn't change!\n", pmap, va); 2028 } 2029#endif 2030 2031 PMAP_UNLOCK(pmap); 2032} 2033 2034/* 2035 * pmap_extract: [ INTERFACE ] 2036 * 2037 * Extract the physical address associated with the given 2038 * pmap/virtual address pair. 2039 */ 2040bool 2041pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2042{ 2043 pt_entry_t *l1pte, *l2pte, *l3pte; 2044 paddr_t pa; 2045 2046#ifdef DEBUG 2047 if (pmapdebug & PDB_FOLLOW) 2048 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2049#endif 2050 2051 /* 2052 * Take a faster path for the kernel pmap. Avoids locking, 2053 * handles K0SEG. 2054 */ 2055 if (pmap == pmap_kernel()) { 2056 pa = vtophys(va); 2057 if (pap != NULL) 2058 *pap = pa; 2059#ifdef DEBUG 2060 if (pmapdebug & PDB_FOLLOW) 2061 printf("0x%lx (kernel vtophys)\n", pa); 2062#endif 2063 return (pa != 0); /* XXX */ 2064 } 2065 2066 PMAP_LOCK(pmap); 2067 2068 l1pte = pmap_l1pte(pmap, va); 2069 if (pmap_pte_v(l1pte) == 0) 2070 goto out; 2071 2072 l2pte = pmap_l2pte(pmap, va, l1pte); 2073 if (pmap_pte_v(l2pte) == 0) 2074 goto out; 2075 2076 l3pte = pmap_l3pte(pmap, va, l2pte); 2077 if (pmap_pte_v(l3pte) == 0) 2078 goto out; 2079 2080 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2081 PMAP_UNLOCK(pmap); 2082 if (pap != NULL) 2083 *pap = pa; 2084#ifdef DEBUG 2085 if (pmapdebug & PDB_FOLLOW) 2086 printf("0x%lx\n", pa); 2087#endif 2088 return (true); 2089 2090 out: 2091 PMAP_UNLOCK(pmap); 2092#ifdef DEBUG 2093 if (pmapdebug & PDB_FOLLOW) 2094 printf("failed\n"); 2095#endif 2096 return (false); 2097} 2098 2099/* 2100 * pmap_copy: [ INTERFACE ] 2101 * 2102 * Copy the mapping range specified by src_addr/len 2103 * from the source map to the range dst_addr/len 2104 * in the destination map. 2105 * 2106 * This routine is only advisory and need not do anything. 2107 */ 2108/* call deleted in <machine/pmap.h> */ 2109 2110/* 2111 * pmap_update: [ INTERFACE ] 2112 * 2113 * Require that all active physical maps contain no 2114 * incorrect entries NOW, by processing any deferred 2115 * pmap operations. 2116 */ 2117/* call deleted in <machine/pmap.h> */ 2118 2119/* 2120 * pmap_activate: [ INTERFACE ] 2121 * 2122 * Activate the pmap used by the specified process. This includes 2123 * reloading the MMU context if the current process, and marking 2124 * the pmap in use by the processor. 2125 */ 2126void 2127pmap_activate(struct lwp *l) 2128{ 2129 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2130 long cpu_id = cpu_number(); 2131 2132#ifdef DEBUG 2133 if (pmapdebug & PDB_FOLLOW) 2134 printf("pmap_activate(%p)\n", l); 2135#endif 2136 2137 /* Mark the pmap in use by this processor. */ 2138 atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); 2139 2140 /* Allocate an ASN. */ 2141 pmap_asn_alloc(pmap, cpu_id); 2142 2143 PMAP_ACTIVATE(pmap, l, cpu_id); 2144} 2145 2146/* 2147 * pmap_deactivate: [ INTERFACE ] 2148 * 2149 * Mark that the pmap used by the specified process is no longer 2150 * in use by the processor. 2151 * 2152 * The comment above pmap_activate() wrt. locking applies here, 2153 * as well. Note that we use only a single `atomic' operation, 2154 * so no locking is necessary. 2155 */ 2156void 2157pmap_deactivate(struct lwp *l) 2158{ 2159 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2160 2161#ifdef DEBUG 2162 if (pmapdebug & PDB_FOLLOW) 2163 printf("pmap_deactivate(%p)\n", l); 2164#endif 2165 2166 /* 2167 * Mark the pmap no longer in use by this processor. 2168 */ 2169 atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); 2170} 2171 2172/* 2173 * pmap_zero_page: [ INTERFACE ] 2174 * 2175 * Zero the specified (machine independent) page by mapping the page 2176 * into virtual memory and clear its contents, one machine dependent 2177 * page at a time. 2178 * 2179 * Note: no locking is necessary in this function. 2180 */ 2181void 2182pmap_zero_page(paddr_t phys) 2183{ 2184 u_long *p0, *p1, *pend; 2185 2186#ifdef DEBUG 2187 if (pmapdebug & PDB_FOLLOW) 2188 printf("pmap_zero_page(%lx)\n", phys); 2189#endif 2190 2191 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys); 2192 p1 = NULL; 2193 pend = (u_long *)((u_long)p0 + PAGE_SIZE); 2194 2195 /* 2196 * Unroll the loop a bit, doing 16 quadwords per iteration. 2197 * Do only 8 back-to-back stores, and alternate registers. 2198 */ 2199 do { 2200 __asm volatile( 2201 "# BEGIN loop body\n" 2202 " addq %2, (8 * 8), %1 \n" 2203 " stq $31, (0 * 8)(%0) \n" 2204 " stq $31, (1 * 8)(%0) \n" 2205 " stq $31, (2 * 8)(%0) \n" 2206 " stq $31, (3 * 8)(%0) \n" 2207 " stq $31, (4 * 8)(%0) \n" 2208 " stq $31, (5 * 8)(%0) \n" 2209 " stq $31, (6 * 8)(%0) \n" 2210 " stq $31, (7 * 8)(%0) \n" 2211 " \n" 2212 " addq %3, (8 * 8), %0 \n" 2213 " stq $31, (0 * 8)(%1) \n" 2214 " stq $31, (1 * 8)(%1) \n" 2215 " stq $31, (2 * 8)(%1) \n" 2216 " stq $31, (3 * 8)(%1) \n" 2217 " stq $31, (4 * 8)(%1) \n" 2218 " stq $31, (5 * 8)(%1) \n" 2219 " stq $31, (6 * 8)(%1) \n" 2220 " stq $31, (7 * 8)(%1) \n" 2221 " # END loop body" 2222 : "=r" (p0), "=r" (p1) 2223 : "0" (p0), "1" (p1) 2224 : "memory"); 2225 } while (p0 < pend); 2226} 2227 2228/* 2229 * pmap_copy_page: [ INTERFACE ] 2230 * 2231 * Copy the specified (machine independent) page by mapping the page 2232 * into virtual memory and using memcpy to copy the page, one machine 2233 * dependent page at a time. 2234 * 2235 * Note: no locking is necessary in this function. 2236 */ 2237void 2238pmap_copy_page(paddr_t src, paddr_t dst) 2239{ 2240 const void *s; 2241 void *d; 2242 2243#ifdef DEBUG 2244 if (pmapdebug & PDB_FOLLOW) 2245 printf("pmap_copy_page(%lx, %lx)\n", src, dst); 2246#endif 2247 s = (const void *)ALPHA_PHYS_TO_K0SEG(src); 2248 d = (void *)ALPHA_PHYS_TO_K0SEG(dst); 2249 memcpy(d, s, PAGE_SIZE); 2250} 2251 2252/* 2253 * pmap_pageidlezero: [ INTERFACE ] 2254 * 2255 * Page zero'er for the idle loop. Returns true if the 2256 * page was zero'd, FLASE if we aborted for some reason. 2257 */ 2258bool 2259pmap_pageidlezero(paddr_t pa) 2260{ 2261 u_long *ptr; 2262 int i, cnt = PAGE_SIZE / sizeof(u_long); 2263 2264 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2265 if (sched_curcpu_runnable_p()) { 2266 /* 2267 * An LWP has become ready. Abort now, 2268 * so we don't keep it waiting while we 2269 * finish zeroing the page. 2270 */ 2271 return (false); 2272 } 2273 *ptr++ = 0; 2274 } 2275 2276 return (true); 2277} 2278 2279/* 2280 * pmap_clear_modify: [ INTERFACE ] 2281 * 2282 * Clear the modify bits on the specified physical page. 2283 */ 2284bool 2285pmap_clear_modify(struct vm_page *pg) 2286{ 2287 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2288 bool rv = false; 2289 long cpu_id = cpu_number(); 2290 kmutex_t *lock; 2291 2292#ifdef DEBUG 2293 if (pmapdebug & PDB_FOLLOW) 2294 printf("pmap_clear_modify(%p)\n", pg); 2295#endif 2296 2297 PMAP_HEAD_TO_MAP_LOCK(); 2298 lock = pmap_pvh_lock(pg); 2299 mutex_enter(lock); 2300 2301 if (md->pvh_attrs & PGA_MODIFIED) { 2302 rv = true; 2303 pmap_changebit(pg, PG_FOW, ~0, cpu_id); 2304 md->pvh_attrs &= ~PGA_MODIFIED; 2305 } 2306 2307 mutex_exit(lock); 2308 PMAP_HEAD_TO_MAP_UNLOCK(); 2309 2310 return (rv); 2311} 2312 2313/* 2314 * pmap_clear_reference: [ INTERFACE ] 2315 * 2316 * Clear the reference bit on the specified physical page. 2317 */ 2318bool 2319pmap_clear_reference(struct vm_page *pg) 2320{ 2321 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2322 bool rv = false; 2323 long cpu_id = cpu_number(); 2324 kmutex_t *lock; 2325 2326#ifdef DEBUG 2327 if (pmapdebug & PDB_FOLLOW) 2328 printf("pmap_clear_reference(%p)\n", pg); 2329#endif 2330 2331 PMAP_HEAD_TO_MAP_LOCK(); 2332 lock = pmap_pvh_lock(pg); 2333 mutex_enter(lock); 2334 2335 if (md->pvh_attrs & PGA_REFERENCED) { 2336 rv = true; 2337 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id); 2338 md->pvh_attrs &= ~PGA_REFERENCED; 2339 } 2340 2341 mutex_exit(lock); 2342 PMAP_HEAD_TO_MAP_UNLOCK(); 2343 2344 return (rv); 2345} 2346 2347/* 2348 * pmap_is_referenced: [ INTERFACE ] 2349 * 2350 * Return whether or not the specified physical page is referenced 2351 * by any physical maps. 2352 */ 2353/* See <machine/pmap.h> */ 2354 2355/* 2356 * pmap_is_modified: [ INTERFACE ] 2357 * 2358 * Return whether or not the specified physical page is modified 2359 * by any physical maps. 2360 */ 2361/* See <machine/pmap.h> */ 2362 2363/* 2364 * pmap_phys_address: [ INTERFACE ] 2365 * 2366 * Return the physical address corresponding to the specified 2367 * cookie. Used by the device pager to decode a device driver's 2368 * mmap entry point return value. 2369 * 2370 * Note: no locking is necessary in this function. 2371 */ 2372paddr_t 2373pmap_phys_address(paddr_t ppn) 2374{ 2375 2376 return (alpha_ptob(ppn)); 2377} 2378 2379/* 2380 * Miscellaneous support routines follow 2381 */ 2382 2383/* 2384 * alpha_protection_init: 2385 * 2386 * Initialize Alpha protection code array. 2387 * 2388 * Note: no locking is necessary in this function. 2389 */ 2390static void 2391alpha_protection_init(void) 2392{ 2393 int prot, *kp, *up; 2394 2395 kp = protection_codes[0]; 2396 up = protection_codes[1]; 2397 2398 for (prot = 0; prot < 8; prot++) { 2399 kp[prot] = PG_ASM; 2400 up[prot] = 0; 2401 2402 if (prot & VM_PROT_READ) { 2403 kp[prot] |= PG_KRE; 2404 up[prot] |= PG_KRE | PG_URE; 2405 } 2406 if (prot & VM_PROT_WRITE) { 2407 kp[prot] |= PG_KWE; 2408 up[prot] |= PG_KWE | PG_UWE; 2409 } 2410 if (prot & VM_PROT_EXECUTE) { 2411 kp[prot] |= PG_EXEC | PG_KRE; 2412 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2413 } else { 2414 kp[prot] |= PG_FOE; 2415 up[prot] |= PG_FOE; 2416 } 2417 } 2418} 2419 2420/* 2421 * pmap_remove_mapping: 2422 * 2423 * Invalidate a single page denoted by pmap/va. 2424 * 2425 * If (pte != NULL), it is the already computed PTE for the page. 2426 * 2427 * Note: locking in this function is complicated by the fact 2428 * that we can be called when the PV list is already locked. 2429 * (pmap_page_protect()). In this case, the caller must be 2430 * careful to get the next PV entry while we remove this entry 2431 * from beneath it. We assume that the pmap itself is already 2432 * locked; dolock applies only to the PV list. 2433 * 2434 * Returns true or false, indicating if an I-stream sync needs 2435 * to be initiated (for this CPU or for other CPUs). 2436 */ 2437static bool 2438pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2439 bool dolock, long cpu_id) 2440{ 2441 paddr_t pa; 2442 struct vm_page *pg; /* if != NULL, page is managed */ 2443 bool onpv; 2444 bool hadasm; 2445 bool isactive; 2446 bool needisync = false; 2447 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2448 2449#ifdef DEBUG 2450 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2451 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n", 2452 pmap, va, pte, dolock, cpu_id); 2453#endif 2454 2455 /* 2456 * PTE not provided, compute it from pmap and va. 2457 */ 2458 if (pte == NULL) { 2459 pte = pmap_l3pte(pmap, va, NULL); 2460 if (pmap_pte_v(pte) == 0) 2461 return (false); 2462 } 2463 2464 pa = pmap_pte_pa(pte); 2465 onpv = (pmap_pte_pv(pte) != 0); 2466 hadasm = (pmap_pte_asm(pte) != 0); 2467 isactive = PMAP_ISACTIVE(pmap, cpu_id); 2468 2469 /* 2470 * Determine what we need to do about the I-stream. If 2471 * PG_EXEC was set, we mark a user pmap as needing an 2472 * I-sync on the way out to userspace. We always need 2473 * an immediate I-sync for the kernel pmap. 2474 */ 2475 if (pmap_pte_exec(pte)) { 2476 if (pmap == pmap_kernel()) 2477 needisync = true; 2478 else { 2479 PMAP_SET_NEEDISYNC(pmap); 2480 needisync = (pmap->pm_cpus != 0); 2481 } 2482 } 2483 2484 /* 2485 * Update statistics 2486 */ 2487 if (pmap_pte_w(pte)) 2488 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2489 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2490 2491 /* 2492 * Invalidate the PTE after saving the reference modify info. 2493 */ 2494#ifdef DEBUG 2495 if (pmapdebug & PDB_REMOVE) 2496 printf("remove: invalidating pte at %p\n", pte); 2497#endif 2498 PMAP_SET_PTE(pte, PG_NV); 2499 2500 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id); 2501 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0); 2502 PMAP_TLB_SHOOTNOW(); 2503 2504 /* 2505 * If we're removing a user mapping, check to see if we 2506 * can free page table pages. 2507 */ 2508 if (pmap != pmap_kernel()) { 2509 /* 2510 * Delete the reference on the level 3 table. It will 2511 * delete references on the level 2 and 1 tables as 2512 * appropriate. 2513 */ 2514 pmap_l3pt_delref(pmap, va, pte, cpu_id); 2515 } 2516 2517 /* 2518 * If the mapping wasn't entered on the PV list, we're all done. 2519 */ 2520 if (onpv == false) 2521 return (needisync); 2522 2523 /* 2524 * Remove it from the PV table. 2525 */ 2526 pg = PHYS_TO_VM_PAGE(pa); 2527 KASSERT(pg != NULL); 2528 pmap_pv_remove(pmap, pg, va, dolock); 2529 2530 return (needisync); 2531} 2532 2533/* 2534 * pmap_changebit: 2535 * 2536 * Set or clear the specified PTE bits for all mappings on the 2537 * specified page. 2538 * 2539 * Note: we assume that the pv_head is already locked, and that 2540 * the caller has acquired a PV->pmap mutex so that we can lock 2541 * the pmaps as we encounter them. 2542 */ 2543static void 2544pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id) 2545{ 2546 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2547 pv_entry_t pv; 2548 pt_entry_t *pte, npte; 2549 vaddr_t va; 2550 bool hadasm, isactive; 2551 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 2552 2553#ifdef DEBUG 2554 if (pmapdebug & PDB_BITS) 2555 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 2556 pg, set, mask); 2557#endif 2558 2559 /* 2560 * Loop over all current mappings setting/clearing as apropos. 2561 */ 2562 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2563 va = pv->pv_va; 2564 2565 PMAP_LOCK(pv->pv_pmap); 2566 2567 pte = pv->pv_pte; 2568 npte = (*pte | set) & mask; 2569 if (*pte != npte) { 2570 hadasm = (pmap_pte_asm(pte) != 0); 2571 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id); 2572 PMAP_SET_PTE(pte, npte); 2573 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive, 2574 cpu_id); 2575 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va, 2576 hadasm ? PG_ASM : 0); 2577 } 2578 PMAP_UNLOCK(pv->pv_pmap); 2579 } 2580 2581 PMAP_TLB_SHOOTNOW(); 2582} 2583 2584/* 2585 * pmap_emulate_reference: 2586 * 2587 * Emulate reference and/or modified bit hits. 2588 * Return 1 if this was an execute fault on a non-exec mapping, 2589 * otherwise return 0. 2590 */ 2591int 2592pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 2593{ 2594 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 2595 pt_entry_t faultoff, *pte; 2596 struct vm_page *pg; 2597 paddr_t pa; 2598 bool didlock = false; 2599 bool exec = false; 2600 long cpu_id = cpu_number(); 2601 kmutex_t *lock; 2602 2603#ifdef DEBUG 2604 if (pmapdebug & PDB_FOLLOW) 2605 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 2606 l, v, user, type); 2607#endif 2608 2609 /* 2610 * Convert process and virtual address to physical address. 2611 */ 2612 if (v >= VM_MIN_KERNEL_ADDRESS) { 2613 if (user) 2614 panic("pmap_emulate_reference: user ref to kernel"); 2615 /* 2616 * No need to lock here; kernel PT pages never go away. 2617 */ 2618 pte = PMAP_KERNEL_PTE(v); 2619 } else { 2620#ifdef DIAGNOSTIC 2621 if (l == NULL) 2622 panic("pmap_emulate_reference: bad proc"); 2623 if (l->l_proc->p_vmspace == NULL) 2624 panic("pmap_emulate_reference: bad p_vmspace"); 2625#endif 2626 PMAP_LOCK(pmap); 2627 didlock = true; 2628 pte = pmap_l3pte(pmap, v, NULL); 2629 /* 2630 * We'll unlock below where we're done with the PTE. 2631 */ 2632 } 2633 exec = pmap_pte_exec(pte); 2634 if (!exec && type == ALPHA_MMCSR_FOE) { 2635 if (didlock) 2636 PMAP_UNLOCK(pmap); 2637 return (1); 2638 } 2639#ifdef DEBUG 2640 if (pmapdebug & PDB_FOLLOW) { 2641 printf("\tpte = %p, ", pte); 2642 printf("*pte = 0x%lx\n", *pte); 2643 } 2644#endif 2645#ifdef DEBUG /* These checks are more expensive */ 2646 if (!pmap_pte_v(pte)) 2647 panic("pmap_emulate_reference: invalid pte"); 2648 if (type == ALPHA_MMCSR_FOW) { 2649 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) 2650 panic("pmap_emulate_reference: write but unwritable"); 2651 if (!(*pte & PG_FOW)) 2652 panic("pmap_emulate_reference: write but not FOW"); 2653 } else { 2654 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) 2655 panic("pmap_emulate_reference: !write but unreadable"); 2656 if (!(*pte & (PG_FOR | PG_FOE))) 2657 panic("pmap_emulate_reference: !write but not FOR|FOE"); 2658 } 2659 /* Other diagnostics? */ 2660#endif 2661 pa = pmap_pte_pa(pte); 2662 2663 /* 2664 * We're now done with the PTE. If it was a user pmap, unlock 2665 * it now. 2666 */ 2667 if (didlock) 2668 PMAP_UNLOCK(pmap); 2669 2670#ifdef DEBUG 2671 if (pmapdebug & PDB_FOLLOW) 2672 printf("\tpa = 0x%lx\n", pa); 2673#endif 2674#ifdef DIAGNOSTIC 2675 if (!uvm_pageismanaged(pa)) 2676 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 2677 "pa 0x%lx not managed", l, v, user, type, pa); 2678#endif 2679 2680 /* 2681 * Twiddle the appropriate bits to reflect the reference 2682 * and/or modification.. 2683 * 2684 * The rules: 2685 * (1) always mark page as used, and 2686 * (2) if it was a write fault, mark page as modified. 2687 */ 2688 pg = PHYS_TO_VM_PAGE(pa); 2689 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2690 2691 PMAP_HEAD_TO_MAP_LOCK(); 2692 lock = pmap_pvh_lock(pg); 2693 mutex_enter(lock); 2694 2695 if (type == ALPHA_MMCSR_FOW) { 2696 md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2697 faultoff = PG_FOR | PG_FOW; 2698 } else { 2699 md->pvh_attrs |= PGA_REFERENCED; 2700 faultoff = PG_FOR; 2701 if (exec) { 2702 faultoff |= PG_FOE; 2703 } 2704 } 2705 pmap_changebit(pg, 0, ~faultoff, cpu_id); 2706 2707 mutex_exit(lock); 2708 PMAP_HEAD_TO_MAP_UNLOCK(); 2709 return (0); 2710} 2711 2712#ifdef DEBUG 2713/* 2714 * pmap_pv_dump: 2715 * 2716 * Dump the physical->virtual data for the specified page. 2717 */ 2718void 2719pmap_pv_dump(paddr_t pa) 2720{ 2721 struct vm_page *pg; 2722 struct vm_page_md *md; 2723 pv_entry_t pv; 2724 kmutex_t *lock; 2725 2726 pg = PHYS_TO_VM_PAGE(pa); 2727 md = VM_PAGE_TO_MD(pg); 2728 2729 lock = pmap_pvh_lock(pg); 2730 mutex_enter(lock); 2731 2732 printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs); 2733 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) 2734 printf(" pmap %p, va 0x%lx\n", 2735 pv->pv_pmap, pv->pv_va); 2736 printf("\n"); 2737 2738 mutex_exit(lock); 2739} 2740#endif 2741 2742/* 2743 * vtophys: 2744 * 2745 * Return the physical address corresponding to the K0SEG or 2746 * K1SEG address provided. 2747 * 2748 * Note: no locking is necessary in this function. 2749 */ 2750paddr_t 2751vtophys(vaddr_t vaddr) 2752{ 2753 pt_entry_t *pte; 2754 paddr_t paddr = 0; 2755 2756 if (vaddr < ALPHA_K0SEG_BASE) 2757 printf("vtophys: invalid vaddr 0x%lx", vaddr); 2758 else if (vaddr <= ALPHA_K0SEG_END) 2759 paddr = ALPHA_K0SEG_TO_PHYS(vaddr); 2760 else { 2761 pte = PMAP_KERNEL_PTE(vaddr); 2762 if (pmap_pte_v(pte)) 2763 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET); 2764 } 2765 2766#if 0 2767 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr); 2768#endif 2769 2770 return (paddr); 2771} 2772 2773/******************** pv_entry management ********************/ 2774 2775/* 2776 * pmap_pv_enter: 2777 * 2778 * Add a physical->virtual entry to the pv_table. 2779 */ 2780static int 2781pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 2782 bool dolock) 2783{ 2784 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2785 pv_entry_t newpv; 2786 kmutex_t *lock; 2787 2788 /* 2789 * Allocate and fill in the new pv_entry. 2790 */ 2791 newpv = pmap_pv_alloc(); 2792 if (newpv == NULL) 2793 return ENOMEM; 2794 newpv->pv_va = va; 2795 newpv->pv_pmap = pmap; 2796 newpv->pv_pte = pte; 2797 2798 if (dolock) { 2799 lock = pmap_pvh_lock(pg); 2800 mutex_enter(lock); 2801 } 2802 2803#ifdef DEBUG 2804 { 2805 pv_entry_t pv; 2806 /* 2807 * Make sure the entry doesn't already exist. 2808 */ 2809 for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) { 2810 if (pmap == pv->pv_pmap && va == pv->pv_va) { 2811 printf("pmap = %p, va = 0x%lx\n", pmap, va); 2812 panic("pmap_pv_enter: already in pv table"); 2813 } 2814 } 2815 } 2816#endif 2817 2818 /* 2819 * ...and put it in the list. 2820 */ 2821 newpv->pv_next = md->pvh_list; 2822 md->pvh_list = newpv; 2823 2824 if (dolock) { 2825 mutex_exit(lock); 2826 } 2827 2828 return 0; 2829} 2830 2831/* 2832 * pmap_pv_remove: 2833 * 2834 * Remove a physical->virtual entry from the pv_table. 2835 */ 2836static void 2837pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) 2838{ 2839 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2840 pv_entry_t pv, *pvp; 2841 kmutex_t *lock; 2842 2843 if (dolock) { 2844 lock = pmap_pvh_lock(pg); 2845 mutex_enter(lock); 2846 } else { 2847 lock = NULL; /* XXX stupid gcc */ 2848 } 2849 2850 /* 2851 * Find the entry to remove. 2852 */ 2853 for (pvp = &md->pvh_list, pv = *pvp; 2854 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 2855 if (pmap == pv->pv_pmap && va == pv->pv_va) 2856 break; 2857 2858#ifdef DEBUG 2859 if (pv == NULL) 2860 panic("pmap_pv_remove: not in pv table"); 2861#endif 2862 2863 *pvp = pv->pv_next; 2864 2865 if (dolock) { 2866 mutex_exit(lock); 2867 } 2868 2869 pmap_pv_free(pv); 2870} 2871 2872/* 2873 * pmap_pv_page_alloc: 2874 * 2875 * Allocate a page for the pv_entry pool. 2876 */ 2877static void * 2878pmap_pv_page_alloc(struct pool *pp, int flags) 2879{ 2880 paddr_t pg; 2881 2882 if (pmap_physpage_alloc(PGU_PVENT, &pg)) 2883 return ((void *)ALPHA_PHYS_TO_K0SEG(pg)); 2884 return (NULL); 2885} 2886 2887/* 2888 * pmap_pv_page_free: 2889 * 2890 * Free a pv_entry pool page. 2891 */ 2892static void 2893pmap_pv_page_free(struct pool *pp, void *v) 2894{ 2895 2896 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 2897} 2898 2899/******************** misc. functions ********************/ 2900 2901/* 2902 * pmap_physpage_alloc: 2903 * 2904 * Allocate a single page from the VM system and return the 2905 * physical address for that page. 2906 */ 2907static bool 2908pmap_physpage_alloc(int usage, paddr_t *pap) 2909{ 2910 struct vm_page *pg; 2911 paddr_t pa; 2912 2913 /* 2914 * Don't ask for a zero'd page in the L1PT case -- we will 2915 * properly initialize it in the constructor. 2916 */ 2917 2918 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 2919 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2920 if (pg != NULL) { 2921 pa = VM_PAGE_TO_PHYS(pg); 2922#ifdef DEBUG 2923 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2924 if (md->pvh_refcnt != 0) { 2925 printf("pmap_physpage_alloc: page 0x%lx has " 2926 "%d references\n", pa, md->pvh_refcnt); 2927 panic("pmap_physpage_alloc"); 2928 } 2929#endif 2930 *pap = pa; 2931 return (true); 2932 } 2933 return (false); 2934} 2935 2936/* 2937 * pmap_physpage_free: 2938 * 2939 * Free the single page table page at the specified physical address. 2940 */ 2941static void 2942pmap_physpage_free(paddr_t pa) 2943{ 2944 struct vm_page *pg; 2945 2946 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 2947 panic("pmap_physpage_free: bogus physical page address"); 2948 2949#ifdef DEBUG 2950 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2951 if (md->pvh_refcnt != 0) 2952 panic("pmap_physpage_free: page still has references"); 2953#endif 2954 2955 uvm_pagefree(pg); 2956} 2957 2958/* 2959 * pmap_physpage_addref: 2960 * 2961 * Add a reference to the specified special use page. 2962 */ 2963static int 2964pmap_physpage_addref(void *kva) 2965{ 2966 struct vm_page *pg; 2967 struct vm_page_md *md; 2968 paddr_t pa; 2969 2970 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2971 pg = PHYS_TO_VM_PAGE(pa); 2972 md = VM_PAGE_TO_MD(pg); 2973 2974 KASSERT((int)md->pvh_refcnt >= 0); 2975 2976 return atomic_inc_uint_nv(&md->pvh_refcnt); 2977} 2978 2979/* 2980 * pmap_physpage_delref: 2981 * 2982 * Delete a reference to the specified special use page. 2983 */ 2984static int 2985pmap_physpage_delref(void *kva) 2986{ 2987 struct vm_page *pg; 2988 struct vm_page_md *md; 2989 paddr_t pa; 2990 2991 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 2992 pg = PHYS_TO_VM_PAGE(pa); 2993 md = VM_PAGE_TO_MD(pg); 2994 2995 KASSERT((int)md->pvh_refcnt > 0); 2996 2997 return atomic_dec_uint_nv(&md->pvh_refcnt); 2998} 2999 3000/******************** page table page management ********************/ 3001 3002/* 3003 * pmap_growkernel: [ INTERFACE ] 3004 * 3005 * Grow the kernel address space. This is a hint from the 3006 * upper layer to pre-allocate more kernel PT pages. 3007 */ 3008vaddr_t 3009pmap_growkernel(vaddr_t maxkvaddr) 3010{ 3011 struct pmap *kpm = pmap_kernel(), *pm; 3012 paddr_t ptaddr; 3013 pt_entry_t *l1pte, *l2pte, pte; 3014 vaddr_t va; 3015 int l1idx; 3016 3017 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3018 3019 if (maxkvaddr <= virtual_end) 3020 goto out; /* we are OK */ 3021 3022 va = virtual_end; 3023 3024 while (va < maxkvaddr) { 3025 /* 3026 * If there is no valid L1 PTE (i.e. no L2 PT page), 3027 * allocate a new L2 PT page and insert it into the 3028 * L1 map. 3029 */ 3030 l1pte = pmap_l1pte(kpm, va); 3031 if (pmap_pte_v(l1pte) == 0) { 3032 /* 3033 * XXX PGU_NORMAL? It's not a "traditional" PT page. 3034 */ 3035 if (uvm.page_init_done == false) { 3036 /* 3037 * We're growing the kernel pmap early (from 3038 * uvm_pageboot_alloc()). This case must 3039 * be handled a little differently. 3040 */ 3041 ptaddr = ALPHA_K0SEG_TO_PHYS( 3042 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3043 } else if (pmap_physpage_alloc(PGU_NORMAL, 3044 &ptaddr) == false) 3045 goto die; 3046 pte = (atop(ptaddr) << PG_SHIFT) | 3047 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3048 *l1pte = pte; 3049 3050 l1idx = l1pte_index(va); 3051 3052 /* Update all the user pmaps. */ 3053 mutex_enter(&pmap_all_pmaps_lock); 3054 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3055 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3056 /* Skip the kernel pmap. */ 3057 if (pm == pmap_kernel()) 3058 continue; 3059 3060 PMAP_LOCK(pm); 3061 if (pm->pm_lev1map == kernel_lev1map) { 3062 PMAP_UNLOCK(pm); 3063 continue; 3064 } 3065 pm->pm_lev1map[l1idx] = pte; 3066 PMAP_UNLOCK(pm); 3067 } 3068 mutex_exit(&pmap_all_pmaps_lock); 3069 } 3070 3071 /* 3072 * Have an L2 PT page now, add the L3 PT page. 3073 */ 3074 l2pte = pmap_l2pte(kpm, va, l1pte); 3075 KASSERT(pmap_pte_v(l2pte) == 0); 3076 if (uvm.page_init_done == false) { 3077 /* 3078 * See above. 3079 */ 3080 ptaddr = ALPHA_K0SEG_TO_PHYS( 3081 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3082 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false) 3083 goto die; 3084 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3085 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3086 va += ALPHA_L2SEG_SIZE; 3087 } 3088 3089 /* Invalidate the L1 PT cache. */ 3090 pool_cache_invalidate(&pmap_l1pt_cache); 3091 3092 virtual_end = va; 3093 3094 out: 3095 rw_exit(&pmap_growkernel_lock); 3096 3097 return (virtual_end); 3098 3099 die: 3100 panic("pmap_growkernel: out of memory"); 3101} 3102 3103/* 3104 * pmap_lev1map_create: 3105 * 3106 * Create a new level 1 page table for the specified pmap. 3107 * 3108 * Note: growkernel must already be held and the pmap either 3109 * already locked or unreferenced globally. 3110 */ 3111static int 3112pmap_lev1map_create(pmap_t pmap, long cpu_id) 3113{ 3114 pt_entry_t *l1pt; 3115 3116 KASSERT(pmap != pmap_kernel()); 3117 3118 KASSERT(pmap->pm_lev1map == kernel_lev1map); 3119 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED); 3120 3121 /* Don't sleep -- we're called with locks held. */ 3122 l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 3123 if (l1pt == NULL) 3124 return (ENOMEM); 3125 3126 pmap->pm_lev1map = l1pt; 3127 return (0); 3128} 3129 3130/* 3131 * pmap_lev1map_destroy: 3132 * 3133 * Destroy the level 1 page table for the specified pmap. 3134 * 3135 * Note: growkernel must be held and the pmap must already be 3136 * locked or not globally referenced. 3137 */ 3138static void 3139pmap_lev1map_destroy(pmap_t pmap, long cpu_id) 3140{ 3141 pt_entry_t *l1pt = pmap->pm_lev1map; 3142 3143 KASSERT(pmap != pmap_kernel()); 3144 3145 /* 3146 * Go back to referencing the global kernel_lev1map. 3147 */ 3148 pmap->pm_lev1map = kernel_lev1map; 3149 3150 /* 3151 * Free the old level 1 page table page. 3152 */ 3153 pool_cache_put(&pmap_l1pt_cache, l1pt); 3154} 3155 3156/* 3157 * pmap_l1pt_ctor: 3158 * 3159 * Pool cache constructor for L1 PT pages. 3160 * 3161 * Note: The growkernel lock is held across allocations 3162 * from our pool_cache, so we don't need to acquire it 3163 * ourselves. 3164 */ 3165static int 3166pmap_l1pt_ctor(void *arg, void *object, int flags) 3167{ 3168 pt_entry_t *l1pt = object, pte; 3169 int i; 3170 3171 /* 3172 * Initialize the new level 1 table by zeroing the 3173 * user portion and copying the kernel mappings into 3174 * the kernel portion. 3175 */ 3176 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3177 l1pt[i] = 0; 3178 3179 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3180 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3181 l1pt[i] = kernel_lev1map[i]; 3182 3183 /* 3184 * Now, map the new virtual page table. NOTE: NO ASM! 3185 */ 3186 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3187 PG_V | PG_KRE | PG_KWE; 3188 l1pt[l1pte_index(VPTBASE)] = pte; 3189 3190 return (0); 3191} 3192 3193/* 3194 * pmap_l1pt_alloc: 3195 * 3196 * Page alloctaor for L1 PT pages. 3197 */ 3198static void * 3199pmap_l1pt_alloc(struct pool *pp, int flags) 3200{ 3201 paddr_t ptpa; 3202 3203 /* 3204 * Attempt to allocate a free page. 3205 */ 3206 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false) 3207 return (NULL); 3208 3209 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa)); 3210} 3211 3212/* 3213 * pmap_l1pt_free: 3214 * 3215 * Page freer for L1 PT pages. 3216 */ 3217static void 3218pmap_l1pt_free(struct pool *pp, void *v) 3219{ 3220 3221 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3222} 3223 3224/* 3225 * pmap_ptpage_alloc: 3226 * 3227 * Allocate a level 2 or level 3 page table page, and 3228 * initialize the PTE that references it. 3229 * 3230 * Note: the pmap must already be locked. 3231 */ 3232static int 3233pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage) 3234{ 3235 paddr_t ptpa; 3236 3237 /* 3238 * Allocate the page table page. 3239 */ 3240 if (pmap_physpage_alloc(usage, &ptpa) == false) 3241 return (ENOMEM); 3242 3243 /* 3244 * Initialize the referencing PTE. 3245 */ 3246 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) | 3247 PG_V | PG_KRE | PG_KWE | PG_WIRED | 3248 (pmap == pmap_kernel() ? PG_ASM : 0)); 3249 3250 return (0); 3251} 3252 3253/* 3254 * pmap_ptpage_free: 3255 * 3256 * Free the level 2 or level 3 page table page referenced 3257 * be the provided PTE. 3258 * 3259 * Note: the pmap must already be locked. 3260 */ 3261static void 3262pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte) 3263{ 3264 paddr_t ptpa; 3265 3266 /* 3267 * Extract the physical address of the page from the PTE 3268 * and clear the entry. 3269 */ 3270 ptpa = pmap_pte_pa(pte); 3271 PMAP_SET_PTE(pte, PG_NV); 3272 3273#ifdef DEBUG 3274 pmap_zero_page(ptpa); 3275#endif 3276 pmap_physpage_free(ptpa); 3277} 3278 3279/* 3280 * pmap_l3pt_delref: 3281 * 3282 * Delete a reference on a level 3 PT page. If the reference drops 3283 * to zero, free it. 3284 * 3285 * Note: the pmap must already be locked. 3286 */ 3287static void 3288pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id) 3289{ 3290 pt_entry_t *l1pte, *l2pte; 3291 PMAP_TLB_SHOOTDOWN_CPUSET_DECL 3292 3293 l1pte = pmap_l1pte(pmap, va); 3294 l2pte = pmap_l2pte(pmap, va, l1pte); 3295 3296#ifdef DIAGNOSTIC 3297 if (pmap == pmap_kernel()) 3298 panic("pmap_l3pt_delref: kernel pmap"); 3299#endif 3300 3301 if (pmap_physpage_delref(l3pte) == 0) { 3302 /* 3303 * No more mappings; we can free the level 3 table. 3304 */ 3305#ifdef DEBUG 3306 if (pmapdebug & PDB_PTPAGE) 3307 printf("pmap_l3pt_delref: freeing level 3 table at " 3308 "0x%lx\n", pmap_pte_pa(l2pte)); 3309#endif 3310 pmap_ptpage_free(pmap, l2pte); 3311 3312 /* 3313 * We've freed a level 3 table, so we must 3314 * invalidate the TLB entry for that PT page 3315 * in the Virtual Page Table VA range, because 3316 * otherwise the PALcode will service a TLB 3317 * miss using the stale VPT TLB entry it entered 3318 * behind our back to shortcut to the VA's PTE. 3319 */ 3320 PMAP_INVALIDATE_TLB(pmap, 3321 (vaddr_t)(&VPT[VPT_INDEX(va)]), false, 3322 PMAP_ISACTIVE(pmap, cpu_id), cpu_id); 3323 PMAP_TLB_SHOOTDOWN(pmap, 3324 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0); 3325 PMAP_TLB_SHOOTNOW(); 3326 3327 /* 3328 * We've freed a level 3 table, so delete the reference 3329 * on the level 2 table. 3330 */ 3331 pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id); 3332 } 3333} 3334 3335/* 3336 * pmap_l2pt_delref: 3337 * 3338 * Delete a reference on a level 2 PT page. If the reference drops 3339 * to zero, free it. 3340 * 3341 * Note: the pmap must already be locked. 3342 */ 3343static void 3344pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3345 long cpu_id) 3346{ 3347 3348#ifdef DIAGNOSTIC 3349 if (pmap == pmap_kernel()) 3350 panic("pmap_l2pt_delref: kernel pmap"); 3351#endif 3352 3353 if (pmap_physpage_delref(l2pte) == 0) { 3354 /* 3355 * No more mappings in this segment; we can free the 3356 * level 2 table. 3357 */ 3358#ifdef DEBUG 3359 if (pmapdebug & PDB_PTPAGE) 3360 printf("pmap_l2pt_delref: freeing level 2 table at " 3361 "0x%lx\n", pmap_pte_pa(l1pte)); 3362#endif 3363 pmap_ptpage_free(pmap, l1pte); 3364 3365 /* 3366 * We've freed a level 2 table, so delete the reference 3367 * on the level 1 table. 3368 */ 3369 pmap_l1pt_delref(pmap, l1pte, cpu_id); 3370 } 3371} 3372 3373/* 3374 * pmap_l1pt_delref: 3375 * 3376 * Delete a reference on a level 1 PT page. If the reference drops 3377 * to zero, free it. 3378 * 3379 * Note: the pmap must already be locked. 3380 */ 3381static void 3382pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) 3383{ 3384 3385#ifdef DIAGNOSTIC 3386 if (pmap == pmap_kernel()) 3387 panic("pmap_l1pt_delref: kernel pmap"); 3388#endif 3389 3390 (void)pmap_physpage_delref(l1pte); 3391} 3392 3393/******************** Address Space Number management ********************/ 3394 3395/* 3396 * pmap_asn_alloc: 3397 * 3398 * Allocate and assign an ASN to the specified pmap. 3399 * 3400 * Note: the pmap must already be locked. This may be called from 3401 * an interprocessor interrupt, and in that case, the sender of 3402 * the IPI has the pmap lock. 3403 */ 3404static void 3405pmap_asn_alloc(pmap_t pmap, long cpu_id) 3406{ 3407 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id]; 3408 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id]; 3409 3410#ifdef DEBUG 3411 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3412 printf("pmap_asn_alloc(%p)\n", pmap); 3413#endif 3414 3415 /* 3416 * If the pmap is still using the global kernel_lev1map, there 3417 * is no need to assign an ASN at this time, because only 3418 * kernel mappings exist in that map, and all kernel mappings 3419 * have PG_ASM set. If the pmap eventually gets its own 3420 * lev1map, an ASN will be allocated at that time. 3421 * 3422 * Only the kernel pmap will reference kernel_lev1map. Do the 3423 * same old fixups, but note that we no longer need the pmap 3424 * to be locked if we're in this mode, since pm_lev1map will 3425 * never change. 3426 * #endif 3427 */ 3428 if (pmap->pm_lev1map == kernel_lev1map) { 3429#ifdef DEBUG 3430 if (pmapdebug & PDB_ASN) 3431 printf("pmap_asn_alloc: still references " 3432 "kernel_lev1map\n"); 3433#endif 3434#if defined(MULTIPROCESSOR) 3435 /* 3436 * In a multiprocessor system, it's possible to 3437 * get here without having PMAP_ASN_RESERVED in 3438 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy(). 3439 * 3440 * So, what we do here, is simply assign the reserved 3441 * ASN for kernel_lev1map users and let things 3442 * continue on. We do, however, let uniprocessor 3443 * configurations continue to make its assertion. 3444 */ 3445 pma->pma_asn = PMAP_ASN_RESERVED; 3446#else 3447 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED); 3448#endif /* MULTIPROCESSOR */ 3449 return; 3450 } 3451 3452 /* 3453 * On processors which do not implement ASNs, the swpctx PALcode 3454 * operation will automatically invalidate the TLB and I-cache, 3455 * so we don't need to do that here. 3456 */ 3457 if (pmap_max_asn == 0) { 3458 /* 3459 * Refresh the pmap's generation number, to 3460 * simplify logic elsewhere. 3461 */ 3462 pma->pma_asngen = cpma->pma_asngen; 3463#ifdef DEBUG 3464 if (pmapdebug & PDB_ASN) 3465 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n", 3466 pma->pma_asngen); 3467#endif 3468 return; 3469 } 3470 3471 /* 3472 * Hopefully, we can continue using the one we have... 3473 */ 3474 if (pma->pma_asn != PMAP_ASN_RESERVED && 3475 pma->pma_asngen == cpma->pma_asngen) { 3476 /* 3477 * ASN is still in the current generation; keep on using it. 3478 */ 3479#ifdef DEBUG 3480 if (pmapdebug & PDB_ASN) 3481 printf("pmap_asn_alloc: same generation, keeping %u\n", 3482 pma->pma_asn); 3483#endif 3484 return; 3485 } 3486 3487 /* 3488 * Need to assign a new ASN. Grab the next one, incrementing 3489 * the generation number if we have to. 3490 */ 3491 if (cpma->pma_asn > pmap_max_asn) { 3492 /* 3493 * Invalidate all non-PG_ASM TLB entries and the 3494 * I-cache, and bump the generation number. 3495 */ 3496 ALPHA_TBIAP(); 3497 alpha_pal_imb(); 3498 3499 cpma->pma_asn = 1; 3500 cpma->pma_asngen++; 3501#ifdef DIAGNOSTIC 3502 if (cpma->pma_asngen == 0) { 3503 /* 3504 * The generation number has wrapped. We could 3505 * handle this scenario by traversing all of 3506 * the pmaps, and invalidating the generation 3507 * number on those which are not currently 3508 * in use by this processor. 3509 * 3510 * However... considering that we're using 3511 * an unsigned 64-bit integer for generation 3512 * numbers, on non-ASN CPUs, we won't wrap 3513 * for approx. 585 million years, or 75 billion 3514 * years on a 128-ASN CPU (assuming 1000 switch 3515 * operations per second). 3516 * 3517 * So, we don't bother. 3518 */ 3519 panic("pmap_asn_alloc: too much uptime"); 3520 } 3521#endif 3522#ifdef DEBUG 3523 if (pmapdebug & PDB_ASN) 3524 printf("pmap_asn_alloc: generation bumped to %lu\n", 3525 cpma->pma_asngen); 3526#endif 3527 } 3528 3529 /* 3530 * Assign the new ASN and validate the generation number. 3531 */ 3532 pma->pma_asn = cpma->pma_asn++; 3533 pma->pma_asngen = cpma->pma_asngen; 3534 3535#ifdef DEBUG 3536 if (pmapdebug & PDB_ASN) 3537 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3538 pma->pma_asn, pmap); 3539#endif 3540 3541 /* 3542 * Have a new ASN, so there's no need to sync the I-stream 3543 * on the way back out to userspace. 3544 */ 3545 atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); 3546} 3547 3548#if defined(MULTIPROCESSOR) 3549/******************** TLB shootdown code ********************/ 3550 3551/* 3552 * pmap_tlb_shootdown: 3553 * 3554 * Cause the TLB entry for pmap/va to be shot down. 3555 * 3556 * NOTE: The pmap must be locked here. 3557 */ 3558void 3559pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) 3560{ 3561 struct pmap_tlb_shootdown_q *pq; 3562 struct pmap_tlb_shootdown_job *pj; 3563 struct cpu_info *ci, *self = curcpu(); 3564 u_long cpumask; 3565 CPU_INFO_ITERATOR cii; 3566 3567 KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); 3568 3569 cpumask = 0; 3570 3571 for (CPU_INFO_FOREACH(cii, ci)) { 3572 if (ci == self) 3573 continue; 3574 3575 /* 3576 * The pmap must be locked (unless its the kernel 3577 * pmap, in which case it is okay for it to be 3578 * unlocked), which prevents it from becoming 3579 * active on any additional processors. This makes 3580 * it safe to check for activeness. If it's not 3581 * active on the processor in question, then just 3582 * mark it as needing a new ASN the next time it 3583 * does, saving the IPI. We always have to send 3584 * the IPI for the kernel pmap. 3585 * 3586 * Note if it's marked active now, and it becomes 3587 * inactive by the time the processor receives 3588 * the IPI, that's okay, because it does the right 3589 * thing with it later. 3590 */ 3591 if (pmap != pmap_kernel() && 3592 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) { 3593 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid); 3594 continue; 3595 } 3596 3597 cpumask |= 1UL << ci->ci_cpuid; 3598 3599 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; 3600 mutex_spin_enter(&pq->pq_lock); 3601 3602 /* 3603 * Allocate a job. 3604 */ 3605 if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) { 3606 pj = pool_cache_get(&pmap_tlb_shootdown_job_cache, 3607 PR_NOWAIT); 3608 } else { 3609 pj = NULL; 3610 } 3611 3612 /* 3613 * If a global flush is already pending, we 3614 * don't really have to do anything else. 3615 */ 3616 pq->pq_pte |= pte; 3617 if (pq->pq_tbia) { 3618 mutex_spin_exit(&pq->pq_lock); 3619 if (pj != NULL) { 3620 pool_cache_put(&pmap_tlb_shootdown_job_cache, 3621 pj); 3622 } 3623 continue; 3624 } 3625 if (pj == NULL) { 3626 /* 3627 * Couldn't allocate a job entry. Just 3628 * tell the processor to kill everything. 3629 */ 3630 pq->pq_tbia = 1; 3631 } else { 3632 pj->pj_pmap = pmap; 3633 pj->pj_va = va; 3634 pj->pj_pte = pte; 3635 pq->pq_count++; 3636 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list); 3637 } 3638 mutex_spin_exit(&pq->pq_lock); 3639 } 3640 3641 *cpumaskp |= cpumask; 3642} 3643 3644/* 3645 * pmap_tlb_shootnow: 3646 * 3647 * Process the TLB shootdowns that we have been accumulating 3648 * for the specified processor set. 3649 */ 3650void 3651pmap_tlb_shootnow(u_long cpumask) 3652{ 3653 3654 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN); 3655} 3656 3657/* 3658 * pmap_do_tlb_shootdown: 3659 * 3660 * Process pending TLB shootdown operations for this processor. 3661 */ 3662void 3663pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) 3664{ 3665 u_long cpu_id = ci->ci_cpuid; 3666 u_long cpu_mask = (1UL << cpu_id); 3667 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; 3668 struct pmap_tlb_shootdown_job *pj, *next; 3669 TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs; 3670 3671 TAILQ_INIT(&jobs); 3672 3673 mutex_spin_enter(&pq->pq_lock); 3674 TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list); 3675 if (pq->pq_tbia) { 3676 if (pq->pq_pte & PG_ASM) 3677 ALPHA_TBIA(); 3678 else 3679 ALPHA_TBIAP(); 3680 pq->pq_tbia = 0; 3681 pq->pq_pte = 0; 3682 } else { 3683 TAILQ_FOREACH(pj, &jobs, pj_list) { 3684 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va, 3685 pj->pj_pte & PG_ASM, 3686 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id); 3687 } 3688 pq->pq_pte = 0; 3689 } 3690 pq->pq_count = 0; 3691 mutex_spin_exit(&pq->pq_lock); 3692 3693 /* Free jobs back to the cache. */ 3694 for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) { 3695 next = TAILQ_NEXT(pj, pj_list); 3696 pool_cache_put(&pmap_tlb_shootdown_job_cache, pj); 3697 } 3698} 3699#endif /* MULTIPROCESSOR */ 3700