pmap.c revision 305879
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014-2016 The FreeBSD Foundation 15 * All rights reserved. 16 * 17 * This code is derived from software contributed to Berkeley by 18 * the Systems Programming Group of the University of Utah Computer 19 * Science Department and William Jolitz of UUNET Technologies Inc. 20 * 21 * This software was developed by Andrew Turner under sponsorship from 22 * the FreeBSD Foundation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. All advertising materials mentioning features or use of this software 33 * must display the following acknowledgement: 34 * This product includes software developed by the University of 35 * California, Berkeley and its contributors. 36 * 4. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54/*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86#include <sys/cdefs.h> 87__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 305879 2016-09-16 12:17:01Z andrew $"); 88 89/* 90 * Manages physical address maps. 91 * 92 * Since the information managed by this module is 93 * also stored by the logical address mapping module, 94 * this module may throw away valid virtual-to-physical 95 * mappings at almost any time. However, invalidations 96 * of virtual-to-physical mappings must be done as 97 * requested. 98 * 99 * In order to cope with hardware architectures which 100 * make virtual-to-physical map invalidates expensive, 101 * this module may delay invalidate or reduced protection 102 * operations until such time as they are actually 103 * necessary. This module is given full information as 104 * to which processors are currently using which maps, 105 * and to when physical maps must be made correct. 106 */ 107 108#include <sys/param.h> 109#include <sys/bus.h> 110#include <sys/systm.h> 111#include <sys/kernel.h> 112#include <sys/ktr.h> 113#include <sys/lock.h> 114#include <sys/malloc.h> 115#include <sys/mman.h> 116#include <sys/msgbuf.h> 117#include <sys/mutex.h> 118#include <sys/proc.h> 119#include <sys/rwlock.h> 120#include <sys/sx.h> 121#include <sys/vmem.h> 122#include <sys/vmmeter.h> 123#include <sys/sched.h> 124#include <sys/sysctl.h> 125#include <sys/_unrhdr.h> 126#include <sys/smp.h> 127 128#include <vm/vm.h> 129#include <vm/vm_param.h> 130#include <vm/vm_kern.h> 131#include <vm/vm_page.h> 132#include <vm/vm_map.h> 133#include <vm/vm_object.h> 134#include <vm/vm_extern.h> 135#include <vm/vm_pageout.h> 136#include <vm/vm_pager.h> 137#include <vm/vm_radix.h> 138#include <vm/vm_reserv.h> 139#include <vm/uma.h> 140 141#include <machine/machdep.h> 142#include <machine/md_var.h> 143#include <machine/pcb.h> 144 145#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 146#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 147#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 148#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 149 150#define NUL0E L0_ENTRIES 151#define NUL1E (NUL0E * NL1PG) 152#define NUL2E (NUL1E * NL2PG) 153 154#if !defined(DIAGNOSTIC) 155#ifdef __GNUC_GNU_INLINE__ 156#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 157#else 158#define PMAP_INLINE extern inline 159#endif 160#else 161#define PMAP_INLINE 162#endif 163 164/* 165 * These are configured by the mair_el1 register. This is set up in locore.S 166 */ 167#define DEVICE_MEMORY 0 168#define UNCACHED_MEMORY 1 169#define CACHED_MEMORY 2 170 171 172#ifdef PV_STATS 173#define PV_STAT(x) do { x ; } while (0) 174#else 175#define PV_STAT(x) do { } while (0) 176#endif 177 178#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 179 180#define NPV_LIST_LOCKS MAXCPU 181 182#define PHYS_TO_PV_LIST_LOCK(pa) \ 183 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 184 185#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 186 struct rwlock **_lockp = (lockp); \ 187 struct rwlock *_new_lock; \ 188 \ 189 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 190 if (_new_lock != *_lockp) { \ 191 if (*_lockp != NULL) \ 192 rw_wunlock(*_lockp); \ 193 *_lockp = _new_lock; \ 194 rw_wlock(*_lockp); \ 195 } \ 196} while (0) 197 198#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 199 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 200 201#define RELEASE_PV_LIST_LOCK(lockp) do { \ 202 struct rwlock **_lockp = (lockp); \ 203 \ 204 if (*_lockp != NULL) { \ 205 rw_wunlock(*_lockp); \ 206 *_lockp = NULL; \ 207 } \ 208} while (0) 209 210#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 211 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 212 213struct pmap kernel_pmap_store; 214 215vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 216vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 217vm_offset_t kernel_vm_end = 0; 218 219struct msgbuf *msgbufp = NULL; 220 221vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 222vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 223vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 224 225/* This code assumes all L1 DMAP entries will be used */ 226CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 227CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 228 229#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 230extern pt_entry_t pagetable_dmap[]; 231 232/* 233 * Data for the pv entry allocation mechanism 234 */ 235static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 236static struct mtx pv_chunks_mutex; 237static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 238 239static void free_pv_chunk(struct pv_chunk *pc); 240static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 241static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 242static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 243static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 244static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 245 vm_offset_t va); 246static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 247 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 248static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 249 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 250static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 251 vm_page_t m, struct rwlock **lockp); 252 253static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 254 struct rwlock **lockp); 255 256static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 257 struct spglist *free); 258static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 259 260/* 261 * These load the old table data and store the new value. 262 * They need to be atomic as the System MMU may write to the table at 263 * the same time as the CPU. 264 */ 265#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 266#define pmap_set(table, mask) atomic_set_64(table, mask) 267#define pmap_load_clear(table) atomic_swap_64(table, 0) 268#define pmap_load(table) (*table) 269 270/********************/ 271/* Inline functions */ 272/********************/ 273 274static __inline void 275pagecopy(void *s, void *d) 276{ 277 278 memcpy(d, s, PAGE_SIZE); 279} 280 281#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) 282#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 283#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 284#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 285 286static __inline pd_entry_t * 287pmap_l0(pmap_t pmap, vm_offset_t va) 288{ 289 290 return (&pmap->pm_l0[pmap_l0_index(va)]); 291} 292 293static __inline pd_entry_t * 294pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 295{ 296 pd_entry_t *l1; 297 298 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 299 return (&l1[pmap_l1_index(va)]); 300} 301 302static __inline pd_entry_t * 303pmap_l1(pmap_t pmap, vm_offset_t va) 304{ 305 pd_entry_t *l0; 306 307 l0 = pmap_l0(pmap, va); 308 if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 309 return (NULL); 310 311 return (pmap_l0_to_l1(l0, va)); 312} 313 314static __inline pd_entry_t * 315pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 316{ 317 pd_entry_t *l2; 318 319 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 320 return (&l2[pmap_l2_index(va)]); 321} 322 323static __inline pd_entry_t * 324pmap_l2(pmap_t pmap, vm_offset_t va) 325{ 326 pd_entry_t *l1; 327 328 l1 = pmap_l1(pmap, va); 329 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 330 return (NULL); 331 332 return (pmap_l1_to_l2(l1, va)); 333} 334 335static __inline pt_entry_t * 336pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 337{ 338 pt_entry_t *l3; 339 340 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 341 return (&l3[pmap_l3_index(va)]); 342} 343 344/* 345 * Returns the lowest valid pde for a given virtual address. 346 * The next level may or may not point to a valid page or block. 347 */ 348static __inline pd_entry_t * 349pmap_pde(pmap_t pmap, vm_offset_t va, int *level) 350{ 351 pd_entry_t *l0, *l1, *l2, desc; 352 353 l0 = pmap_l0(pmap, va); 354 desc = pmap_load(l0) & ATTR_DESCR_MASK; 355 if (desc != L0_TABLE) { 356 *level = -1; 357 return (NULL); 358 } 359 360 l1 = pmap_l0_to_l1(l0, va); 361 desc = pmap_load(l1) & ATTR_DESCR_MASK; 362 if (desc != L1_TABLE) { 363 *level = 0; 364 return (l0); 365 } 366 367 l2 = pmap_l1_to_l2(l1, va); 368 desc = pmap_load(l2) & ATTR_DESCR_MASK; 369 if (desc != L2_TABLE) { 370 *level = 1; 371 return (l1); 372 } 373 374 *level = 2; 375 return (l2); 376} 377 378/* 379 * Returns the lowest valid pte block or table entry for a given virtual 380 * address. If there are no valid entries return NULL and set the level to 381 * the first invalid level. 382 */ 383static __inline pt_entry_t * 384pmap_pte(pmap_t pmap, vm_offset_t va, int *level) 385{ 386 pd_entry_t *l1, *l2, desc; 387 pt_entry_t *l3; 388 389 l1 = pmap_l1(pmap, va); 390 if (l1 == NULL) { 391 *level = 0; 392 return (NULL); 393 } 394 desc = pmap_load(l1) & ATTR_DESCR_MASK; 395 if (desc == L1_BLOCK) { 396 *level = 1; 397 return (l1); 398 } 399 400 if (desc != L1_TABLE) { 401 *level = 1; 402 return (NULL); 403 } 404 405 l2 = pmap_l1_to_l2(l1, va); 406 desc = pmap_load(l2) & ATTR_DESCR_MASK; 407 if (desc == L2_BLOCK) { 408 *level = 2; 409 return (l2); 410 } 411 412 if (desc != L2_TABLE) { 413 *level = 2; 414 return (NULL); 415 } 416 417 *level = 3; 418 l3 = pmap_l2_to_l3(l2, va); 419 if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 420 return (NULL); 421 422 return (l3); 423} 424 425bool 426pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 427 pd_entry_t **l2, pt_entry_t **l3) 428{ 429 pd_entry_t *l0p, *l1p, *l2p; 430 431 if (pmap->pm_l0 == NULL) 432 return (false); 433 434 l0p = pmap_l0(pmap, va); 435 *l0 = l0p; 436 437 if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 438 return (false); 439 440 l1p = pmap_l0_to_l1(l0p, va); 441 *l1 = l1p; 442 443 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 444 *l2 = NULL; 445 *l3 = NULL; 446 return (true); 447 } 448 449 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 450 return (false); 451 452 l2p = pmap_l1_to_l2(l1p, va); 453 *l2 = l2p; 454 455 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 456 *l3 = NULL; 457 return (true); 458 } 459 460 *l3 = pmap_l2_to_l3(l2p, va); 461 462 return (true); 463} 464 465static __inline int 466pmap_is_current(pmap_t pmap) 467{ 468 469 return ((pmap == pmap_kernel()) || 470 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 471} 472 473static __inline int 474pmap_l3_valid(pt_entry_t l3) 475{ 476 477 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 478} 479 480static __inline int 481pmap_l3_valid_cacheable(pt_entry_t l3) 482{ 483 484 return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 485 ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 486} 487 488#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 489 490/* 491 * Checks if the page is dirty. We currently lack proper tracking of this on 492 * arm64 so for now assume is a page mapped as rw was accessed it is. 493 */ 494static inline int 495pmap_page_dirty(pt_entry_t pte) 496{ 497 498 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 499 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 500} 501 502static __inline void 503pmap_resident_count_inc(pmap_t pmap, int count) 504{ 505 506 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 507 pmap->pm_stats.resident_count += count; 508} 509 510static __inline void 511pmap_resident_count_dec(pmap_t pmap, int count) 512{ 513 514 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 515 KASSERT(pmap->pm_stats.resident_count >= count, 516 ("pmap %p resident count underflow %ld %d", pmap, 517 pmap->pm_stats.resident_count, count)); 518 pmap->pm_stats.resident_count -= count; 519} 520 521static pt_entry_t * 522pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 523 u_int *l2_slot) 524{ 525 pt_entry_t *l2; 526 pd_entry_t *l1; 527 528 l1 = (pd_entry_t *)l1pt; 529 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 530 531 /* Check locore has used a table L1 map */ 532 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 533 ("Invalid bootstrap L1 table")); 534 /* Find the address of the L2 table */ 535 l2 = (pt_entry_t *)init_pt_va; 536 *l2_slot = pmap_l2_index(va); 537 538 return (l2); 539} 540 541static vm_paddr_t 542pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 543{ 544 u_int l1_slot, l2_slot; 545 pt_entry_t *l2; 546 547 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 548 549 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 550} 551 552static void 553pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 554{ 555 vm_offset_t va; 556 vm_paddr_t pa; 557 u_int l1_slot; 558 559 pa = dmap_phys_base = min_pa & ~L1_OFFSET; 560 va = DMAP_MIN_ADDRESS; 561 for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 562 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 563 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 564 565 pmap_load_store(&pagetable_dmap[l1_slot], 566 (pa & ~L1_OFFSET) | ATTR_DEFAULT | 567 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 568 } 569 570 /* Set the upper limit of the DMAP region */ 571 dmap_phys_max = pa; 572 dmap_max_addr = va; 573 574 cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, 575 PAGE_SIZE * DMAP_TABLES); 576 cpu_tlb_flushID(); 577} 578 579static vm_offset_t 580pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 581{ 582 vm_offset_t l2pt; 583 vm_paddr_t pa; 584 pd_entry_t *l1; 585 u_int l1_slot; 586 587 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 588 589 l1 = (pd_entry_t *)l1pt; 590 l1_slot = pmap_l1_index(va); 591 l2pt = l2_start; 592 593 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 594 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 595 596 pa = pmap_early_vtophys(l1pt, l2pt); 597 pmap_load_store(&l1[l1_slot], 598 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 599 l2pt += PAGE_SIZE; 600 } 601 602 /* Clean the L2 page table */ 603 memset((void *)l2_start, 0, l2pt - l2_start); 604 cpu_dcache_wb_range(l2_start, l2pt - l2_start); 605 606 /* Flush the l1 table to ram */ 607 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 608 609 return l2pt; 610} 611 612static vm_offset_t 613pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 614{ 615 vm_offset_t l2pt, l3pt; 616 vm_paddr_t pa; 617 pd_entry_t *l2; 618 u_int l2_slot; 619 620 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 621 622 l2 = pmap_l2(kernel_pmap, va); 623 l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 624 l2pt = (vm_offset_t)l2; 625 l2_slot = pmap_l2_index(va); 626 l3pt = l3_start; 627 628 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 629 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 630 631 pa = pmap_early_vtophys(l1pt, l3pt); 632 pmap_load_store(&l2[l2_slot], 633 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 634 l3pt += PAGE_SIZE; 635 } 636 637 /* Clean the L2 page table */ 638 memset((void *)l3_start, 0, l3pt - l3_start); 639 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 640 641 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 642 643 return l3pt; 644} 645 646/* 647 * Bootstrap the system enough to run with virtual memory. 648 */ 649void 650pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 651 vm_size_t kernlen) 652{ 653 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 654 uint64_t kern_delta; 655 pt_entry_t *l2; 656 vm_offset_t va, freemempos; 657 vm_offset_t dpcpu, msgbufpv; 658 vm_paddr_t pa, max_pa, min_pa; 659 int i; 660 661 kern_delta = KERNBASE - kernstart; 662 physmem = 0; 663 664 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 665 printf("%lx\n", l1pt); 666 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 667 668 /* Set this early so we can use the pagetable walking functions */ 669 kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 670 PMAP_LOCK_INIT(kernel_pmap); 671 672 /* Assume the address we were loaded to is a valid physical address */ 673 min_pa = max_pa = KERNBASE - kern_delta; 674 675 /* 676 * Find the minimum physical address. physmap is sorted, 677 * but may contain empty ranges. 678 */ 679 for (i = 0; i < (physmap_idx * 2); i += 2) { 680 if (physmap[i] == physmap[i + 1]) 681 continue; 682 if (physmap[i] <= min_pa) 683 min_pa = physmap[i]; 684 if (physmap[i + 1] > max_pa) 685 max_pa = physmap[i + 1]; 686 } 687 688 /* Create a direct map region early so we can use it for pa -> va */ 689 pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 690 691 va = KERNBASE; 692 pa = KERNBASE - kern_delta; 693 694 /* 695 * Start to initialise phys_avail by copying from physmap 696 * up to the physical address KERNBASE points at. 697 */ 698 map_slot = avail_slot = 0; 699 for (; map_slot < (physmap_idx * 2) && 700 avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 701 if (physmap[map_slot] == physmap[map_slot + 1]) 702 continue; 703 704 if (physmap[map_slot] <= pa && 705 physmap[map_slot + 1] > pa) 706 break; 707 708 phys_avail[avail_slot] = physmap[map_slot]; 709 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 710 physmem += (phys_avail[avail_slot + 1] - 711 phys_avail[avail_slot]) >> PAGE_SHIFT; 712 avail_slot += 2; 713 } 714 715 /* Add the memory before the kernel */ 716 if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 717 phys_avail[avail_slot] = physmap[map_slot]; 718 phys_avail[avail_slot + 1] = pa; 719 physmem += (phys_avail[avail_slot + 1] - 720 phys_avail[avail_slot]) >> PAGE_SHIFT; 721 avail_slot += 2; 722 } 723 used_map_slot = map_slot; 724 725 /* 726 * Read the page table to find out what is already mapped. 727 * This assumes we have mapped a block of memory from KERNBASE 728 * using a single L1 entry. 729 */ 730 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 731 732 /* Sanity check the index, KERNBASE should be the first VA */ 733 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 734 735 /* Find how many pages we have mapped */ 736 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 737 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 738 break; 739 740 /* Check locore used L2 blocks */ 741 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 742 ("Invalid bootstrap L2 table")); 743 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 744 ("Incorrect PA in L2 table")); 745 746 va += L2_SIZE; 747 pa += L2_SIZE; 748 } 749 750 va = roundup2(va, L1_SIZE); 751 752 freemempos = KERNBASE + kernlen; 753 freemempos = roundup2(freemempos, PAGE_SIZE); 754 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 755 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 756 /* And the l3 tables for the early devmap */ 757 freemempos = pmap_bootstrap_l3(l1pt, 758 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 759 760 cpu_tlb_flushID(); 761 762#define alloc_pages(var, np) \ 763 (var) = freemempos; \ 764 freemempos += (np * PAGE_SIZE); \ 765 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 766 767 /* Allocate dynamic per-cpu area. */ 768 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 769 dpcpu_init((void *)dpcpu, 0); 770 771 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 772 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 773 msgbufp = (void *)msgbufpv; 774 775 virtual_avail = roundup2(freemempos, L1_SIZE); 776 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 777 kernel_vm_end = virtual_avail; 778 779 pa = pmap_early_vtophys(l1pt, freemempos); 780 781 /* Finish initialising physmap */ 782 map_slot = used_map_slot; 783 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 784 map_slot < (physmap_idx * 2); map_slot += 2) { 785 if (physmap[map_slot] == physmap[map_slot + 1]) 786 continue; 787 788 /* Have we used the current range? */ 789 if (physmap[map_slot + 1] <= pa) 790 continue; 791 792 /* Do we need to split the entry? */ 793 if (physmap[map_slot] < pa) { 794 phys_avail[avail_slot] = pa; 795 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 796 } else { 797 phys_avail[avail_slot] = physmap[map_slot]; 798 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 799 } 800 physmem += (phys_avail[avail_slot + 1] - 801 phys_avail[avail_slot]) >> PAGE_SHIFT; 802 803 avail_slot += 2; 804 } 805 phys_avail[avail_slot] = 0; 806 phys_avail[avail_slot + 1] = 0; 807 808 /* 809 * Maxmem isn't the "maximum memory", it's one larger than the 810 * highest page of the physical address space. It should be 811 * called something like "Maxphyspage". 812 */ 813 Maxmem = atop(phys_avail[avail_slot - 1]); 814 815 cpu_tlb_flushID(); 816} 817 818/* 819 * Initialize a vm_page's machine-dependent fields. 820 */ 821void 822pmap_page_init(vm_page_t m) 823{ 824 825 TAILQ_INIT(&m->md.pv_list); 826 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 827} 828 829/* 830 * Initialize the pmap module. 831 * Called by vm_init, to initialize any structures that the pmap 832 * system needs to map virtual memory. 833 */ 834void 835pmap_init(void) 836{ 837 int i; 838 839 /* 840 * Initialize the pv chunk list mutex. 841 */ 842 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 843 844 /* 845 * Initialize the pool of pv list locks. 846 */ 847 for (i = 0; i < NPV_LIST_LOCKS; i++) 848 rw_init(&pv_list_locks[i], "pmap pv list"); 849} 850 851/* 852 * Invalidate a single TLB entry. 853 */ 854PMAP_INLINE void 855pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 856{ 857 858 sched_pin(); 859 __asm __volatile( 860 "dsb ishst \n" 861 "tlbi vaae1is, %0 \n" 862 "dsb ish \n" 863 "isb \n" 864 : : "r"(va >> PAGE_SHIFT)); 865 sched_unpin(); 866} 867 868PMAP_INLINE void 869pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 870{ 871 vm_offset_t addr; 872 873 sched_pin(); 874 dsb(ishst); 875 for (addr = sva; addr < eva; addr += PAGE_SIZE) { 876 __asm __volatile( 877 "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 878 } 879 __asm __volatile( 880 "dsb ish \n" 881 "isb \n"); 882 sched_unpin(); 883} 884 885PMAP_INLINE void 886pmap_invalidate_all(pmap_t pmap) 887{ 888 889 sched_pin(); 890 __asm __volatile( 891 "dsb ishst \n" 892 "tlbi vmalle1is \n" 893 "dsb ish \n" 894 "isb \n"); 895 sched_unpin(); 896} 897 898/* 899 * Routine: pmap_extract 900 * Function: 901 * Extract the physical page address associated 902 * with the given map/virtual_address pair. 903 */ 904vm_paddr_t 905pmap_extract(pmap_t pmap, vm_offset_t va) 906{ 907 pt_entry_t *pte, tpte; 908 vm_paddr_t pa; 909 int lvl; 910 911 pa = 0; 912 PMAP_LOCK(pmap); 913 /* 914 * Find the block or page map for this virtual address. pmap_pte 915 * will return either a valid block/page entry, or NULL. 916 */ 917 pte = pmap_pte(pmap, va, &lvl); 918 if (pte != NULL) { 919 tpte = pmap_load(pte); 920 pa = tpte & ~ATTR_MASK; 921 switch(lvl) { 922 case 1: 923 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 924 ("pmap_extract: Invalid L1 pte found: %lx", 925 tpte & ATTR_DESCR_MASK)); 926 pa |= (va & L1_OFFSET); 927 break; 928 case 2: 929 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 930 ("pmap_extract: Invalid L2 pte found: %lx", 931 tpte & ATTR_DESCR_MASK)); 932 pa |= (va & L2_OFFSET); 933 break; 934 case 3: 935 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 936 ("pmap_extract: Invalid L3 pte found: %lx", 937 tpte & ATTR_DESCR_MASK)); 938 pa |= (va & L3_OFFSET); 939 break; 940 } 941 } 942 PMAP_UNLOCK(pmap); 943 return (pa); 944} 945 946/* 947 * Routine: pmap_extract_and_hold 948 * Function: 949 * Atomically extract and hold the physical page 950 * with the given pmap and virtual address pair 951 * if that mapping permits the given protection. 952 */ 953vm_page_t 954pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 955{ 956 pt_entry_t *pte, tpte; 957 vm_paddr_t pa; 958 vm_page_t m; 959 int lvl; 960 961 pa = 0; 962 m = NULL; 963 PMAP_LOCK(pmap); 964retry: 965 pte = pmap_pte(pmap, va, &lvl); 966 if (pte != NULL) { 967 tpte = pmap_load(pte); 968 969 KASSERT(lvl > 0 && lvl <= 3, 970 ("pmap_extract_and_hold: Invalid level %d", lvl)); 971 CTASSERT(L1_BLOCK == L2_BLOCK); 972 KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 973 (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 974 ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 975 tpte & ATTR_DESCR_MASK)); 976 if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 977 ((prot & VM_PROT_WRITE) == 0)) { 978 if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) 979 goto retry; 980 m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 981 vm_page_hold(m); 982 } 983 } 984 PA_UNLOCK_COND(pa); 985 PMAP_UNLOCK(pmap); 986 return (m); 987} 988 989vm_paddr_t 990pmap_kextract(vm_offset_t va) 991{ 992 pt_entry_t *pte, tpte; 993 vm_paddr_t pa; 994 int lvl; 995 996 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 997 pa = DMAP_TO_PHYS(va); 998 } else { 999 pa = 0; 1000 pte = pmap_pte(kernel_pmap, va, &lvl); 1001 if (pte != NULL) { 1002 tpte = pmap_load(pte); 1003 pa = tpte & ~ATTR_MASK; 1004 switch(lvl) { 1005 case 1: 1006 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1007 ("pmap_kextract: Invalid L1 pte found: %lx", 1008 tpte & ATTR_DESCR_MASK)); 1009 pa |= (va & L1_OFFSET); 1010 break; 1011 case 2: 1012 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1013 ("pmap_kextract: Invalid L2 pte found: %lx", 1014 tpte & ATTR_DESCR_MASK)); 1015 pa |= (va & L2_OFFSET); 1016 break; 1017 case 3: 1018 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1019 ("pmap_kextract: Invalid L3 pte found: %lx", 1020 tpte & ATTR_DESCR_MASK)); 1021 pa |= (va & L3_OFFSET); 1022 break; 1023 } 1024 } 1025 } 1026 return (pa); 1027} 1028 1029/*************************************************** 1030 * Low level mapping routines..... 1031 ***************************************************/ 1032 1033static void 1034pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1035{ 1036 pd_entry_t *pde; 1037 pt_entry_t *pte; 1038 vm_offset_t va; 1039 int lvl; 1040 1041 KASSERT((pa & L3_OFFSET) == 0, 1042 ("pmap_kenter: Invalid physical address")); 1043 KASSERT((sva & L3_OFFSET) == 0, 1044 ("pmap_kenter: Invalid virtual address")); 1045 KASSERT((size & PAGE_MASK) == 0, 1046 ("pmap_kenter: Mapping is not page-sized")); 1047 1048 va = sva; 1049 while (size != 0) { 1050 pde = pmap_pde(kernel_pmap, va, &lvl); 1051 KASSERT(pde != NULL, 1052 ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1053 KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1054 1055 pte = pmap_l2_to_l3(pde, va); 1056 pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | 1057 ATTR_IDX(mode) | L3_PAGE); 1058 PTE_SYNC(pte); 1059 1060 va += PAGE_SIZE; 1061 pa += PAGE_SIZE; 1062 size -= PAGE_SIZE; 1063 } 1064 pmap_invalidate_range(kernel_pmap, sva, va); 1065} 1066 1067void 1068pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1069{ 1070 1071 pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1072} 1073 1074/* 1075 * Remove a page from the kernel pagetables. 1076 */ 1077PMAP_INLINE void 1078pmap_kremove(vm_offset_t va) 1079{ 1080 pt_entry_t *pte; 1081 int lvl; 1082 1083 pte = pmap_pte(kernel_pmap, va, &lvl); 1084 KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1085 KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1086 1087 if (pmap_l3_valid_cacheable(pmap_load(pte))) 1088 cpu_dcache_wb_range(va, L3_SIZE); 1089 pmap_load_clear(pte); 1090 PTE_SYNC(pte); 1091 pmap_invalidate_page(kernel_pmap, va); 1092} 1093 1094void 1095pmap_kremove_device(vm_offset_t sva, vm_size_t size) 1096{ 1097 pt_entry_t *pte; 1098 vm_offset_t va; 1099 int lvl; 1100 1101 KASSERT((sva & L3_OFFSET) == 0, 1102 ("pmap_kremove_device: Invalid virtual address")); 1103 KASSERT((size & PAGE_MASK) == 0, 1104 ("pmap_kremove_device: Mapping is not page-sized")); 1105 1106 va = sva; 1107 while (size != 0) { 1108 pte = pmap_pte(kernel_pmap, va, &lvl); 1109 KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1110 KASSERT(lvl == 3, 1111 ("Invalid device pagetable level: %d != 3", lvl)); 1112 pmap_load_clear(pte); 1113 PTE_SYNC(pte); 1114 1115 va += PAGE_SIZE; 1116 size -= PAGE_SIZE; 1117 } 1118 pmap_invalidate_range(kernel_pmap, sva, va); 1119} 1120 1121/* 1122 * Used to map a range of physical addresses into kernel 1123 * virtual address space. 1124 * 1125 * The value passed in '*virt' is a suggested virtual address for 1126 * the mapping. Architectures which can support a direct-mapped 1127 * physical to virtual region can return the appropriate address 1128 * within that region, leaving '*virt' unchanged. Other 1129 * architectures should map the pages starting at '*virt' and 1130 * update '*virt' with the first usable address after the mapped 1131 * region. 1132 */ 1133vm_offset_t 1134pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1135{ 1136 return PHYS_TO_DMAP(start); 1137} 1138 1139 1140/* 1141 * Add a list of wired pages to the kva 1142 * this routine is only used for temporary 1143 * kernel mappings that do not need to have 1144 * page modification or references recorded. 1145 * Note that old mappings are simply written 1146 * over. The page *must* be wired. 1147 * Note: SMP coherent. Uses a ranged shootdown IPI. 1148 */ 1149void 1150pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1151{ 1152 pd_entry_t *pde; 1153 pt_entry_t *pte, pa; 1154 vm_offset_t va; 1155 vm_page_t m; 1156 int i, lvl; 1157 1158 va = sva; 1159 for (i = 0; i < count; i++) { 1160 pde = pmap_pde(kernel_pmap, va, &lvl); 1161 KASSERT(pde != NULL, 1162 ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1163 KASSERT(lvl == 2, 1164 ("pmap_qenter: Invalid level %d", lvl)); 1165 1166 m = ma[i]; 1167 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1168 ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1169 pte = pmap_l2_to_l3(pde, va); 1170 pmap_load_store(pte, pa); 1171 PTE_SYNC(pte); 1172 1173 va += L3_SIZE; 1174 } 1175 pmap_invalidate_range(kernel_pmap, sva, va); 1176} 1177 1178/* 1179 * This routine tears out page mappings from the 1180 * kernel -- it is meant only for temporary mappings. 1181 */ 1182void 1183pmap_qremove(vm_offset_t sva, int count) 1184{ 1185 pt_entry_t *pte; 1186 vm_offset_t va; 1187 int lvl; 1188 1189 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1190 1191 va = sva; 1192 while (count-- > 0) { 1193 pte = pmap_pte(kernel_pmap, va, &lvl); 1194 KASSERT(lvl == 3, 1195 ("Invalid device pagetable level: %d != 3", lvl)); 1196 if (pte != NULL) { 1197 if (pmap_l3_valid_cacheable(pmap_load(pte))) 1198 cpu_dcache_wb_range(va, L3_SIZE); 1199 pmap_load_clear(pte); 1200 PTE_SYNC(pte); 1201 } 1202 1203 va += PAGE_SIZE; 1204 } 1205 pmap_invalidate_range(kernel_pmap, sva, va); 1206} 1207 1208/*************************************************** 1209 * Page table page management routines..... 1210 ***************************************************/ 1211static __inline void 1212pmap_free_zero_pages(struct spglist *free) 1213{ 1214 vm_page_t m; 1215 1216 while ((m = SLIST_FIRST(free)) != NULL) { 1217 SLIST_REMOVE_HEAD(free, plinks.s.ss); 1218 /* Preserve the page's PG_ZERO setting. */ 1219 vm_page_free_toq(m); 1220 } 1221} 1222 1223/* 1224 * Schedule the specified unused page table page to be freed. Specifically, 1225 * add the page to the specified list of pages that will be released to the 1226 * physical memory manager after the TLB has been updated. 1227 */ 1228static __inline void 1229pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1230 boolean_t set_PG_ZERO) 1231{ 1232 1233 if (set_PG_ZERO) 1234 m->flags |= PG_ZERO; 1235 else 1236 m->flags &= ~PG_ZERO; 1237 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1238} 1239 1240/* 1241 * Decrements a page table page's wire count, which is used to record the 1242 * number of valid page table entries within the page. If the wire count 1243 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1244 * page table page was unmapped and FALSE otherwise. 1245 */ 1246static inline boolean_t 1247pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1248{ 1249 1250 --m->wire_count; 1251 if (m->wire_count == 0) { 1252 _pmap_unwire_l3(pmap, va, m, free); 1253 return (TRUE); 1254 } else 1255 return (FALSE); 1256} 1257 1258static void 1259_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1260{ 1261 1262 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1263 /* 1264 * unmap the page table page 1265 */ 1266 if (m->pindex >= (NUL2E + NUL1E)) { 1267 /* l1 page */ 1268 pd_entry_t *l0; 1269 1270 l0 = pmap_l0(pmap, va); 1271 pmap_load_clear(l0); 1272 PTE_SYNC(l0); 1273 } else if (m->pindex >= NUL2E) { 1274 /* l2 page */ 1275 pd_entry_t *l1; 1276 1277 l1 = pmap_l1(pmap, va); 1278 pmap_load_clear(l1); 1279 PTE_SYNC(l1); 1280 } else { 1281 /* l3 page */ 1282 pd_entry_t *l2; 1283 1284 l2 = pmap_l2(pmap, va); 1285 pmap_load_clear(l2); 1286 PTE_SYNC(l2); 1287 } 1288 pmap_resident_count_dec(pmap, 1); 1289 if (m->pindex < NUL2E) { 1290 /* We just released an l3, unhold the matching l2 */ 1291 pd_entry_t *l1, tl1; 1292 vm_page_t l2pg; 1293 1294 l1 = pmap_l1(pmap, va); 1295 tl1 = pmap_load(l1); 1296 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1297 pmap_unwire_l3(pmap, va, l2pg, free); 1298 } else if (m->pindex < (NUL2E + NUL1E)) { 1299 /* We just released an l2, unhold the matching l1 */ 1300 pd_entry_t *l0, tl0; 1301 vm_page_t l1pg; 1302 1303 l0 = pmap_l0(pmap, va); 1304 tl0 = pmap_load(l0); 1305 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1306 pmap_unwire_l3(pmap, va, l1pg, free); 1307 } 1308 pmap_invalidate_page(pmap, va); 1309 1310 /* 1311 * This is a release store so that the ordinary store unmapping 1312 * the page table page is globally performed before TLB shoot- 1313 * down is begun. 1314 */ 1315 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1316 1317 /* 1318 * Put page on a list so that it is released after 1319 * *ALL* TLB shootdown is done 1320 */ 1321 pmap_add_delayed_free_list(m, free, TRUE); 1322} 1323 1324/* 1325 * After removing an l3 entry, this routine is used to 1326 * conditionally free the page, and manage the hold/wire counts. 1327 */ 1328static int 1329pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1330 struct spglist *free) 1331{ 1332 vm_page_t mpte; 1333 1334 if (va >= VM_MAXUSER_ADDRESS) 1335 return (0); 1336 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1337 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1338 return (pmap_unwire_l3(pmap, va, mpte, free)); 1339} 1340 1341void 1342pmap_pinit0(pmap_t pmap) 1343{ 1344 1345 PMAP_LOCK_INIT(pmap); 1346 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1347 pmap->pm_l0 = kernel_pmap->pm_l0; 1348} 1349 1350int 1351pmap_pinit(pmap_t pmap) 1352{ 1353 vm_paddr_t l0phys; 1354 vm_page_t l0pt; 1355 1356 /* 1357 * allocate the l0 page 1358 */ 1359 while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1360 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1361 VM_WAIT; 1362 1363 l0phys = VM_PAGE_TO_PHYS(l0pt); 1364 pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1365 1366 if ((l0pt->flags & PG_ZERO) == 0) 1367 pagezero(pmap->pm_l0); 1368 1369 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1370 1371 return (1); 1372} 1373 1374/* 1375 * This routine is called if the desired page table page does not exist. 1376 * 1377 * If page table page allocation fails, this routine may sleep before 1378 * returning NULL. It sleeps only if a lock pointer was given. 1379 * 1380 * Note: If a page allocation fails at page table level two or three, 1381 * one or two pages may be held during the wait, only to be released 1382 * afterwards. This conservative approach is easily argued to avoid 1383 * race conditions. 1384 */ 1385static vm_page_t 1386_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1387{ 1388 vm_page_t m, l1pg, l2pg; 1389 1390 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1391 1392 /* 1393 * Allocate a page table page. 1394 */ 1395 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1396 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1397 if (lockp != NULL) { 1398 RELEASE_PV_LIST_LOCK(lockp); 1399 PMAP_UNLOCK(pmap); 1400 VM_WAIT; 1401 PMAP_LOCK(pmap); 1402 } 1403 1404 /* 1405 * Indicate the need to retry. While waiting, the page table 1406 * page may have been allocated. 1407 */ 1408 return (NULL); 1409 } 1410 if ((m->flags & PG_ZERO) == 0) 1411 pmap_zero_page(m); 1412 1413 /* 1414 * Map the pagetable page into the process address space, if 1415 * it isn't already there. 1416 */ 1417 1418 if (ptepindex >= (NUL2E + NUL1E)) { 1419 pd_entry_t *l0; 1420 vm_pindex_t l0index; 1421 1422 l0index = ptepindex - (NUL2E + NUL1E); 1423 l0 = &pmap->pm_l0[l0index]; 1424 pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1425 PTE_SYNC(l0); 1426 } else if (ptepindex >= NUL2E) { 1427 vm_pindex_t l0index, l1index; 1428 pd_entry_t *l0, *l1; 1429 pd_entry_t tl0; 1430 1431 l1index = ptepindex - NUL2E; 1432 l0index = l1index >> L0_ENTRIES_SHIFT; 1433 1434 l0 = &pmap->pm_l0[l0index]; 1435 tl0 = pmap_load(l0); 1436 if (tl0 == 0) { 1437 /* recurse for allocating page dir */ 1438 if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1439 lockp) == NULL) { 1440 --m->wire_count; 1441 /* XXX: release mem barrier? */ 1442 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1443 vm_page_free_zero(m); 1444 return (NULL); 1445 } 1446 } else { 1447 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1448 l1pg->wire_count++; 1449 } 1450 1451 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1452 l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1453 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1454 PTE_SYNC(l1); 1455 } else { 1456 vm_pindex_t l0index, l1index; 1457 pd_entry_t *l0, *l1, *l2; 1458 pd_entry_t tl0, tl1; 1459 1460 l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1461 l0index = l1index >> L0_ENTRIES_SHIFT; 1462 1463 l0 = &pmap->pm_l0[l0index]; 1464 tl0 = pmap_load(l0); 1465 if (tl0 == 0) { 1466 /* recurse for allocating page dir */ 1467 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1468 lockp) == NULL) { 1469 --m->wire_count; 1470 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1471 vm_page_free_zero(m); 1472 return (NULL); 1473 } 1474 tl0 = pmap_load(l0); 1475 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1476 l1 = &l1[l1index & Ln_ADDR_MASK]; 1477 } else { 1478 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1479 l1 = &l1[l1index & Ln_ADDR_MASK]; 1480 tl1 = pmap_load(l1); 1481 if (tl1 == 0) { 1482 /* recurse for allocating page dir */ 1483 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1484 lockp) == NULL) { 1485 --m->wire_count; 1486 /* XXX: release mem barrier? */ 1487 atomic_subtract_int( 1488 &vm_cnt.v_wire_count, 1); 1489 vm_page_free_zero(m); 1490 return (NULL); 1491 } 1492 } else { 1493 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1494 l2pg->wire_count++; 1495 } 1496 } 1497 1498 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1499 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1500 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1501 PTE_SYNC(l2); 1502 } 1503 1504 pmap_resident_count_inc(pmap, 1); 1505 1506 return (m); 1507} 1508 1509static vm_page_t 1510pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1511{ 1512 vm_pindex_t ptepindex; 1513 pd_entry_t *pde, tpde; 1514 vm_page_t m; 1515 int lvl; 1516 1517 /* 1518 * Calculate pagetable page index 1519 */ 1520 ptepindex = pmap_l2_pindex(va); 1521retry: 1522 /* 1523 * Get the page directory entry 1524 */ 1525 pde = pmap_pde(pmap, va, &lvl); 1526 1527 /* 1528 * If the page table page is mapped, we just increment the hold count, 1529 * and activate it. If we get a level 2 pde it will point to a level 3 1530 * table. 1531 */ 1532 if (lvl == 2) { 1533 tpde = pmap_load(pde); 1534 if (tpde != 0) { 1535 m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1536 m->wire_count++; 1537 return (m); 1538 } 1539 } 1540 1541 /* 1542 * Here if the pte page isn't mapped, or if it has been deallocated. 1543 */ 1544 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1545 if (m == NULL && lockp != NULL) 1546 goto retry; 1547 1548 return (m); 1549} 1550 1551 1552/*************************************************** 1553 * Pmap allocation/deallocation routines. 1554 ***************************************************/ 1555 1556/* 1557 * Release any resources held by the given physical map. 1558 * Called when a pmap initialized by pmap_pinit is being released. 1559 * Should only be called if the map contains no valid mappings. 1560 */ 1561void 1562pmap_release(pmap_t pmap) 1563{ 1564 vm_page_t m; 1565 1566 KASSERT(pmap->pm_stats.resident_count == 0, 1567 ("pmap_release: pmap resident count %ld != 0", 1568 pmap->pm_stats.resident_count)); 1569 1570 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1571 1572 m->wire_count--; 1573 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1574 vm_page_free_zero(m); 1575} 1576 1577#if 0 1578static int 1579kvm_size(SYSCTL_HANDLER_ARGS) 1580{ 1581 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1582 1583 return sysctl_handle_long(oidp, &ksize, 0, req); 1584} 1585SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1586 0, 0, kvm_size, "LU", "Size of KVM"); 1587 1588static int 1589kvm_free(SYSCTL_HANDLER_ARGS) 1590{ 1591 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1592 1593 return sysctl_handle_long(oidp, &kfree, 0, req); 1594} 1595SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1596 0, 0, kvm_free, "LU", "Amount of KVM free"); 1597#endif /* 0 */ 1598 1599/* 1600 * grow the number of kernel page table entries, if needed 1601 */ 1602void 1603pmap_growkernel(vm_offset_t addr) 1604{ 1605 vm_paddr_t paddr; 1606 vm_page_t nkpg; 1607 pd_entry_t *l0, *l1, *l2; 1608 1609 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1610 1611 addr = roundup2(addr, L2_SIZE); 1612 if (addr - 1 >= kernel_map->max_offset) 1613 addr = kernel_map->max_offset; 1614 while (kernel_vm_end < addr) { 1615 l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1616 KASSERT(pmap_load(l0) != 0, 1617 ("pmap_growkernel: No level 0 kernel entry")); 1618 1619 l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1620 if (pmap_load(l1) == 0) { 1621 /* We need a new PDP entry */ 1622 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1623 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1624 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1625 if (nkpg == NULL) 1626 panic("pmap_growkernel: no memory to grow kernel"); 1627 if ((nkpg->flags & PG_ZERO) == 0) 1628 pmap_zero_page(nkpg); 1629 paddr = VM_PAGE_TO_PHYS(nkpg); 1630 pmap_load_store(l1, paddr | L1_TABLE); 1631 PTE_SYNC(l1); 1632 continue; /* try again */ 1633 } 1634 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1635 if ((pmap_load(l2) & ATTR_AF) != 0) { 1636 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1637 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1638 kernel_vm_end = kernel_map->max_offset; 1639 break; 1640 } 1641 continue; 1642 } 1643 1644 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1645 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1646 VM_ALLOC_ZERO); 1647 if (nkpg == NULL) 1648 panic("pmap_growkernel: no memory to grow kernel"); 1649 if ((nkpg->flags & PG_ZERO) == 0) 1650 pmap_zero_page(nkpg); 1651 paddr = VM_PAGE_TO_PHYS(nkpg); 1652 pmap_load_store(l2, paddr | L2_TABLE); 1653 PTE_SYNC(l2); 1654 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1655 1656 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1657 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1658 kernel_vm_end = kernel_map->max_offset; 1659 break; 1660 } 1661 } 1662} 1663 1664 1665/*************************************************** 1666 * page management routines. 1667 ***************************************************/ 1668 1669CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1670CTASSERT(_NPCM == 3); 1671CTASSERT(_NPCPV == 168); 1672 1673static __inline struct pv_chunk * 1674pv_to_chunk(pv_entry_t pv) 1675{ 1676 1677 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1678} 1679 1680#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1681 1682#define PC_FREE0 0xfffffffffffffffful 1683#define PC_FREE1 0xfffffffffffffffful 1684#define PC_FREE2 0x000000fffffffffful 1685 1686static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1687 1688#if 0 1689#ifdef PV_STATS 1690static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1691 1692SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1693 "Current number of pv entry chunks"); 1694SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1695 "Current number of pv entry chunks allocated"); 1696SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1697 "Current number of pv entry chunks frees"); 1698SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1699 "Number of times tried to get a chunk page but failed."); 1700 1701static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1702static int pv_entry_spare; 1703 1704SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1705 "Current number of pv entry frees"); 1706SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1707 "Current number of pv entry allocs"); 1708SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1709 "Current number of pv entries"); 1710SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1711 "Current number of spare pv entries"); 1712#endif 1713#endif /* 0 */ 1714 1715/* 1716 * We are in a serious low memory condition. Resort to 1717 * drastic measures to free some pages so we can allocate 1718 * another pv entry chunk. 1719 * 1720 * Returns NULL if PV entries were reclaimed from the specified pmap. 1721 * 1722 * We do not, however, unmap 2mpages because subsequent accesses will 1723 * allocate per-page pv entries until repromotion occurs, thereby 1724 * exacerbating the shortage of free pv entries. 1725 */ 1726static vm_page_t 1727reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1728{ 1729 1730 panic("ARM64TODO: reclaim_pv_chunk"); 1731} 1732 1733/* 1734 * free the pv_entry back to the free list 1735 */ 1736static void 1737free_pv_entry(pmap_t pmap, pv_entry_t pv) 1738{ 1739 struct pv_chunk *pc; 1740 int idx, field, bit; 1741 1742 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1743 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1744 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1745 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1746 pc = pv_to_chunk(pv); 1747 idx = pv - &pc->pc_pventry[0]; 1748 field = idx / 64; 1749 bit = idx % 64; 1750 pc->pc_map[field] |= 1ul << bit; 1751 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1752 pc->pc_map[2] != PC_FREE2) { 1753 /* 98% of the time, pc is already at the head of the list. */ 1754 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1755 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1756 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1757 } 1758 return; 1759 } 1760 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1761 free_pv_chunk(pc); 1762} 1763 1764static void 1765free_pv_chunk(struct pv_chunk *pc) 1766{ 1767 vm_page_t m; 1768 1769 mtx_lock(&pv_chunks_mutex); 1770 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1771 mtx_unlock(&pv_chunks_mutex); 1772 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1773 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1774 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1775 /* entire chunk is free, return it */ 1776 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1777 dump_drop_page(m->phys_addr); 1778 vm_page_unwire(m, PQ_NONE); 1779 vm_page_free(m); 1780} 1781 1782/* 1783 * Returns a new PV entry, allocating a new PV chunk from the system when 1784 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1785 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1786 * returned. 1787 * 1788 * The given PV list lock may be released. 1789 */ 1790static pv_entry_t 1791get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1792{ 1793 int bit, field; 1794 pv_entry_t pv; 1795 struct pv_chunk *pc; 1796 vm_page_t m; 1797 1798 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1799 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1800retry: 1801 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1802 if (pc != NULL) { 1803 for (field = 0; field < _NPCM; field++) { 1804 if (pc->pc_map[field]) { 1805 bit = ffsl(pc->pc_map[field]) - 1; 1806 break; 1807 } 1808 } 1809 if (field < _NPCM) { 1810 pv = &pc->pc_pventry[field * 64 + bit]; 1811 pc->pc_map[field] &= ~(1ul << bit); 1812 /* If this was the last item, move it to tail */ 1813 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1814 pc->pc_map[2] == 0) { 1815 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1816 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1817 pc_list); 1818 } 1819 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1820 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1821 return (pv); 1822 } 1823 } 1824 /* No free items, allocate another chunk */ 1825 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1826 VM_ALLOC_WIRED); 1827 if (m == NULL) { 1828 if (lockp == NULL) { 1829 PV_STAT(pc_chunk_tryfail++); 1830 return (NULL); 1831 } 1832 m = reclaim_pv_chunk(pmap, lockp); 1833 if (m == NULL) 1834 goto retry; 1835 } 1836 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1837 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1838 dump_add_page(m->phys_addr); 1839 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1840 pc->pc_pmap = pmap; 1841 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1842 pc->pc_map[1] = PC_FREE1; 1843 pc->pc_map[2] = PC_FREE2; 1844 mtx_lock(&pv_chunks_mutex); 1845 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1846 mtx_unlock(&pv_chunks_mutex); 1847 pv = &pc->pc_pventry[0]; 1848 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1849 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1850 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1851 return (pv); 1852} 1853 1854/* 1855 * First find and then remove the pv entry for the specified pmap and virtual 1856 * address from the specified pv list. Returns the pv entry if found and NULL 1857 * otherwise. This operation can be performed on pv lists for either 4KB or 1858 * 2MB page mappings. 1859 */ 1860static __inline pv_entry_t 1861pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1862{ 1863 pv_entry_t pv; 1864 1865 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1866 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1867 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1868 pvh->pv_gen++; 1869 break; 1870 } 1871 } 1872 return (pv); 1873} 1874 1875/* 1876 * First find and then destroy the pv entry for the specified pmap and virtual 1877 * address. This operation can be performed on pv lists for either 4KB or 2MB 1878 * page mappings. 1879 */ 1880static void 1881pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1882{ 1883 pv_entry_t pv; 1884 1885 pv = pmap_pvh_remove(pvh, pmap, va); 1886 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1887 free_pv_entry(pmap, pv); 1888} 1889 1890/* 1891 * Conditionally create the PV entry for a 4KB page mapping if the required 1892 * memory can be allocated without resorting to reclamation. 1893 */ 1894static boolean_t 1895pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1896 struct rwlock **lockp) 1897{ 1898 pv_entry_t pv; 1899 1900 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1901 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1902 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1903 pv->pv_va = va; 1904 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1905 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1906 m->md.pv_gen++; 1907 return (TRUE); 1908 } else 1909 return (FALSE); 1910} 1911 1912/* 1913 * pmap_remove_l3: do the things to unmap a page in a process 1914 */ 1915static int 1916pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1917 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1918{ 1919 pt_entry_t old_l3; 1920 vm_page_t m; 1921 1922 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1923 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1924 cpu_dcache_wb_range(va, L3_SIZE); 1925 old_l3 = pmap_load_clear(l3); 1926 PTE_SYNC(l3); 1927 pmap_invalidate_page(pmap, va); 1928 if (old_l3 & ATTR_SW_WIRED) 1929 pmap->pm_stats.wired_count -= 1; 1930 pmap_resident_count_dec(pmap, 1); 1931 if (old_l3 & ATTR_SW_MANAGED) { 1932 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1933 if (pmap_page_dirty(old_l3)) 1934 vm_page_dirty(m); 1935 if (old_l3 & ATTR_AF) 1936 vm_page_aflag_set(m, PGA_REFERENCED); 1937 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1938 pmap_pvh_free(&m->md, pmap, va); 1939 } 1940 return (pmap_unuse_l3(pmap, va, l2e, free)); 1941} 1942 1943/* 1944 * Remove the given range of addresses from the specified map. 1945 * 1946 * It is assumed that the start and end are properly 1947 * rounded to the page size. 1948 */ 1949void 1950pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1951{ 1952 struct rwlock *lock; 1953 vm_offset_t va, va_next; 1954 pd_entry_t *l0, *l1, *l2; 1955 pt_entry_t l3_paddr, *l3; 1956 struct spglist free; 1957 int anyvalid; 1958 1959 /* 1960 * Perform an unsynchronized read. This is, however, safe. 1961 */ 1962 if (pmap->pm_stats.resident_count == 0) 1963 return; 1964 1965 anyvalid = 0; 1966 SLIST_INIT(&free); 1967 1968 PMAP_LOCK(pmap); 1969 1970 lock = NULL; 1971 for (; sva < eva; sva = va_next) { 1972 1973 if (pmap->pm_stats.resident_count == 0) 1974 break; 1975 1976 l0 = pmap_l0(pmap, sva); 1977 if (pmap_load(l0) == 0) { 1978 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 1979 if (va_next < sva) 1980 va_next = eva; 1981 continue; 1982 } 1983 1984 l1 = pmap_l0_to_l1(l0, sva); 1985 if (pmap_load(l1) == 0) { 1986 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1987 if (va_next < sva) 1988 va_next = eva; 1989 continue; 1990 } 1991 1992 /* 1993 * Calculate index for next page table. 1994 */ 1995 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1996 if (va_next < sva) 1997 va_next = eva; 1998 1999 l2 = pmap_l1_to_l2(l1, sva); 2000 if (l2 == NULL) 2001 continue; 2002 2003 l3_paddr = pmap_load(l2); 2004 2005 /* 2006 * Weed out invalid mappings. 2007 */ 2008 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2009 continue; 2010 2011 /* 2012 * Limit our scan to either the end of the va represented 2013 * by the current page table page, or to the end of the 2014 * range being removed. 2015 */ 2016 if (va_next > eva) 2017 va_next = eva; 2018 2019 va = va_next; 2020 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2021 sva += L3_SIZE) { 2022 if (l3 == NULL) 2023 panic("l3 == NULL"); 2024 if (pmap_load(l3) == 0) { 2025 if (va != va_next) { 2026 pmap_invalidate_range(pmap, va, sva); 2027 va = va_next; 2028 } 2029 continue; 2030 } 2031 if (va == va_next) 2032 va = sva; 2033 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2034 &lock)) { 2035 sva += L3_SIZE; 2036 break; 2037 } 2038 } 2039 if (va != va_next) 2040 pmap_invalidate_range(pmap, va, sva); 2041 } 2042 if (lock != NULL) 2043 rw_wunlock(lock); 2044 if (anyvalid) 2045 pmap_invalidate_all(pmap); 2046 PMAP_UNLOCK(pmap); 2047 pmap_free_zero_pages(&free); 2048} 2049 2050/* 2051 * Routine: pmap_remove_all 2052 * Function: 2053 * Removes this physical page from 2054 * all physical maps in which it resides. 2055 * Reflects back modify bits to the pager. 2056 * 2057 * Notes: 2058 * Original versions of this routine were very 2059 * inefficient because they iteratively called 2060 * pmap_remove (slow...) 2061 */ 2062 2063void 2064pmap_remove_all(vm_page_t m) 2065{ 2066 pv_entry_t pv; 2067 pmap_t pmap; 2068 struct rwlock *lock; 2069 pd_entry_t *pde, tpde; 2070 pt_entry_t *pte, tpte; 2071 struct spglist free; 2072 int lvl, md_gen; 2073 2074 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2075 ("pmap_remove_all: page %p is not managed", m)); 2076 SLIST_INIT(&free); 2077 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2078retry: 2079 rw_wlock(lock); 2080 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2081 pmap = PV_PMAP(pv); 2082 if (!PMAP_TRYLOCK(pmap)) { 2083 md_gen = m->md.pv_gen; 2084 rw_wunlock(lock); 2085 PMAP_LOCK(pmap); 2086 rw_wlock(lock); 2087 if (md_gen != m->md.pv_gen) { 2088 rw_wunlock(lock); 2089 PMAP_UNLOCK(pmap); 2090 goto retry; 2091 } 2092 } 2093 pmap_resident_count_dec(pmap, 1); 2094 2095 pde = pmap_pde(pmap, pv->pv_va, &lvl); 2096 KASSERT(pde != NULL, 2097 ("pmap_remove_all: no page directory entry found")); 2098 KASSERT(lvl == 2, 2099 ("pmap_remove_all: invalid pde level %d", lvl)); 2100 tpde = pmap_load(pde); 2101 2102 pte = pmap_l2_to_l3(pde, pv->pv_va); 2103 tpte = pmap_load(pte); 2104 if (pmap_is_current(pmap) && 2105 pmap_l3_valid_cacheable(tpte)) 2106 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2107 pmap_load_clear(pte); 2108 PTE_SYNC(pte); 2109 pmap_invalidate_page(pmap, pv->pv_va); 2110 if (tpte & ATTR_SW_WIRED) 2111 pmap->pm_stats.wired_count--; 2112 if ((tpte & ATTR_AF) != 0) 2113 vm_page_aflag_set(m, PGA_REFERENCED); 2114 2115 /* 2116 * Update the vm_page_t clean and reference bits. 2117 */ 2118 if (pmap_page_dirty(tpte)) 2119 vm_page_dirty(m); 2120 pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); 2121 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2122 m->md.pv_gen++; 2123 free_pv_entry(pmap, pv); 2124 PMAP_UNLOCK(pmap); 2125 } 2126 vm_page_aflag_clear(m, PGA_WRITEABLE); 2127 rw_wunlock(lock); 2128 pmap_free_zero_pages(&free); 2129} 2130 2131/* 2132 * Set the physical protection on the 2133 * specified range of this map as requested. 2134 */ 2135void 2136pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2137{ 2138 vm_offset_t va, va_next; 2139 pd_entry_t *l0, *l1, *l2; 2140 pt_entry_t *l3p, l3; 2141 2142 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2143 pmap_remove(pmap, sva, eva); 2144 return; 2145 } 2146 2147 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 2148 return; 2149 2150 PMAP_LOCK(pmap); 2151 for (; sva < eva; sva = va_next) { 2152 2153 l0 = pmap_l0(pmap, sva); 2154 if (pmap_load(l0) == 0) { 2155 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2156 if (va_next < sva) 2157 va_next = eva; 2158 continue; 2159 } 2160 2161 l1 = pmap_l0_to_l1(l0, sva); 2162 if (pmap_load(l1) == 0) { 2163 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2164 if (va_next < sva) 2165 va_next = eva; 2166 continue; 2167 } 2168 2169 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2170 if (va_next < sva) 2171 va_next = eva; 2172 2173 l2 = pmap_l1_to_l2(l1, sva); 2174 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 2175 continue; 2176 2177 if (va_next > eva) 2178 va_next = eva; 2179 2180 va = va_next; 2181 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2182 sva += L3_SIZE) { 2183 l3 = pmap_load(l3p); 2184 if (pmap_l3_valid(l3)) { 2185 pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 2186 PTE_SYNC(l3p); 2187 /* XXX: Use pmap_invalidate_range */ 2188 pmap_invalidate_page(pmap, va); 2189 } 2190 } 2191 } 2192 PMAP_UNLOCK(pmap); 2193 2194 /* TODO: Only invalidate entries we are touching */ 2195 pmap_invalidate_all(pmap); 2196} 2197 2198/* 2199 * Insert the given physical page (p) at 2200 * the specified virtual address (v) in the 2201 * target physical map with the protection requested. 2202 * 2203 * If specified, the page will be wired down, meaning 2204 * that the related pte can not be reclaimed. 2205 * 2206 * NB: This is the only routine which MAY NOT lazy-evaluate 2207 * or lose information. That is, this routine must actually 2208 * insert this page into the given map NOW. 2209 */ 2210int 2211pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2212 u_int flags, int8_t psind __unused) 2213{ 2214 struct rwlock *lock; 2215 pd_entry_t *pde; 2216 pt_entry_t new_l3, orig_l3; 2217 pt_entry_t *l3; 2218 pv_entry_t pv; 2219 vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2220 vm_page_t mpte, om, l1_m, l2_m, l3_m; 2221 boolean_t nosleep; 2222 int lvl; 2223 2224 va = trunc_page(va); 2225 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2226 VM_OBJECT_ASSERT_LOCKED(m->object); 2227 pa = VM_PAGE_TO_PHYS(m); 2228 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2229 L3_PAGE); 2230 if ((prot & VM_PROT_WRITE) == 0) 2231 new_l3 |= ATTR_AP(ATTR_AP_RO); 2232 if ((flags & PMAP_ENTER_WIRED) != 0) 2233 new_l3 |= ATTR_SW_WIRED; 2234 if ((va >> 63) == 0) 2235 new_l3 |= ATTR_AP(ATTR_AP_USER); 2236 2237 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2238 2239 mpte = NULL; 2240 2241 lock = NULL; 2242 PMAP_LOCK(pmap); 2243 2244 if (va < VM_MAXUSER_ADDRESS) { 2245 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2246 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2247 if (mpte == NULL && nosleep) { 2248 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2249 if (lock != NULL) 2250 rw_wunlock(lock); 2251 PMAP_UNLOCK(pmap); 2252 return (KERN_RESOURCE_SHORTAGE); 2253 } 2254 pde = pmap_pde(pmap, va, &lvl); 2255 KASSERT(pde != NULL, 2256 ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2257 KASSERT(lvl == 2, 2258 ("pmap_enter: Invalid level %d", lvl)); 2259 2260 l3 = pmap_l2_to_l3(pde, va); 2261 } else { 2262 pde = pmap_pde(pmap, va, &lvl); 2263 /* 2264 * If we get a level 2 pde it must point to a level 3 entry 2265 * otherwise we will need to create the intermediate tables 2266 */ 2267 if (lvl < 2) { 2268 switch(lvl) { 2269 default: 2270 case -1: 2271 /* Get the l0 pde to update */ 2272 pde = pmap_l0(pmap, va); 2273 KASSERT(pde != NULL, ("...")); 2274 2275 l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2276 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2277 VM_ALLOC_ZERO); 2278 if (l1_m == NULL) 2279 panic("pmap_enter: l1 pte_m == NULL"); 2280 if ((l1_m->flags & PG_ZERO) == 0) 2281 pmap_zero_page(l1_m); 2282 2283 l1_pa = VM_PAGE_TO_PHYS(l1_m); 2284 pmap_load_store(pde, l1_pa | L0_TABLE); 2285 PTE_SYNC(pde); 2286 /* FALLTHROUGH */ 2287 case 0: 2288 /* Get the l1 pde to update */ 2289 pde = pmap_l1_to_l2(pde, va); 2290 KASSERT(pde != NULL, ("...")); 2291 2292 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2293 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2294 VM_ALLOC_ZERO); 2295 if (l2_m == NULL) 2296 panic("pmap_enter: l2 pte_m == NULL"); 2297 if ((l2_m->flags & PG_ZERO) == 0) 2298 pmap_zero_page(l2_m); 2299 2300 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2301 pmap_load_store(pde, l2_pa | L1_TABLE); 2302 PTE_SYNC(pde); 2303 /* FALLTHROUGH */ 2304 case 1: 2305 /* Get the l2 pde to update */ 2306 pde = pmap_l1_to_l2(pde, va); 2307 2308 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2309 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2310 VM_ALLOC_ZERO); 2311 if (l3_m == NULL) 2312 panic("pmap_enter: l3 pte_m == NULL"); 2313 if ((l3_m->flags & PG_ZERO) == 0) 2314 pmap_zero_page(l3_m); 2315 2316 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2317 pmap_load_store(pde, l3_pa | L2_TABLE); 2318 PTE_SYNC(pde); 2319 break; 2320 } 2321 } 2322 l3 = pmap_l2_to_l3(pde, va); 2323 pmap_invalidate_page(pmap, va); 2324 } 2325 2326 om = NULL; 2327 orig_l3 = pmap_load(l3); 2328 opa = orig_l3 & ~ATTR_MASK; 2329 2330 /* 2331 * Is the specified virtual address already mapped? 2332 */ 2333 if (pmap_l3_valid(orig_l3)) { 2334 /* 2335 * Wiring change, just update stats. We don't worry about 2336 * wiring PT pages as they remain resident as long as there 2337 * are valid mappings in them. Hence, if a user page is wired, 2338 * the PT page will be also. 2339 */ 2340 if ((flags & PMAP_ENTER_WIRED) != 0 && 2341 (orig_l3 & ATTR_SW_WIRED) == 0) 2342 pmap->pm_stats.wired_count++; 2343 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2344 (orig_l3 & ATTR_SW_WIRED) != 0) 2345 pmap->pm_stats.wired_count--; 2346 2347 /* 2348 * Remove the extra PT page reference. 2349 */ 2350 if (mpte != NULL) { 2351 mpte->wire_count--; 2352 KASSERT(mpte->wire_count > 0, 2353 ("pmap_enter: missing reference to page table page," 2354 " va: 0x%lx", va)); 2355 } 2356 2357 /* 2358 * Has the physical page changed? 2359 */ 2360 if (opa == pa) { 2361 /* 2362 * No, might be a protection or wiring change. 2363 */ 2364 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2365 new_l3 |= ATTR_SW_MANAGED; 2366 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2367 ATTR_AP(ATTR_AP_RW)) { 2368 vm_page_aflag_set(m, PGA_WRITEABLE); 2369 } 2370 } 2371 goto validate; 2372 } 2373 2374 /* Flush the cache, there might be uncommitted data in it */ 2375 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2376 cpu_dcache_wb_range(va, L3_SIZE); 2377 } else { 2378 /* 2379 * Increment the counters. 2380 */ 2381 if ((new_l3 & ATTR_SW_WIRED) != 0) 2382 pmap->pm_stats.wired_count++; 2383 pmap_resident_count_inc(pmap, 1); 2384 } 2385 /* 2386 * Enter on the PV list if part of our managed memory. 2387 */ 2388 if ((m->oflags & VPO_UNMANAGED) == 0) { 2389 new_l3 |= ATTR_SW_MANAGED; 2390 pv = get_pv_entry(pmap, &lock); 2391 pv->pv_va = va; 2392 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2393 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2394 m->md.pv_gen++; 2395 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2396 vm_page_aflag_set(m, PGA_WRITEABLE); 2397 } 2398 2399 /* 2400 * Update the L3 entry. 2401 */ 2402 if (orig_l3 != 0) { 2403validate: 2404 orig_l3 = pmap_load_store(l3, new_l3); 2405 PTE_SYNC(l3); 2406 opa = orig_l3 & ~ATTR_MASK; 2407 2408 if (opa != pa) { 2409 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2410 om = PHYS_TO_VM_PAGE(opa); 2411 if (pmap_page_dirty(orig_l3)) 2412 vm_page_dirty(om); 2413 if ((orig_l3 & ATTR_AF) != 0) 2414 vm_page_aflag_set(om, PGA_REFERENCED); 2415 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2416 pmap_pvh_free(&om->md, pmap, va); 2417 } 2418 } else if (pmap_page_dirty(orig_l3)) { 2419 if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2420 vm_page_dirty(m); 2421 } 2422 } else { 2423 pmap_load_store(l3, new_l3); 2424 PTE_SYNC(l3); 2425 } 2426 pmap_invalidate_page(pmap, va); 2427 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2428 cpu_icache_sync_range(va, PAGE_SIZE); 2429 2430 if (lock != NULL) 2431 rw_wunlock(lock); 2432 PMAP_UNLOCK(pmap); 2433 return (KERN_SUCCESS); 2434} 2435 2436/* 2437 * Maps a sequence of resident pages belonging to the same object. 2438 * The sequence begins with the given page m_start. This page is 2439 * mapped at the given virtual address start. Each subsequent page is 2440 * mapped at a virtual address that is offset from start by the same 2441 * amount as the page is offset from m_start within the object. The 2442 * last page in the sequence is the page with the largest offset from 2443 * m_start that can be mapped at a virtual address less than the given 2444 * virtual address end. Not every virtual page between start and end 2445 * is mapped; only those for which a resident page exists with the 2446 * corresponding offset from m_start are mapped. 2447 */ 2448void 2449pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2450 vm_page_t m_start, vm_prot_t prot) 2451{ 2452 struct rwlock *lock; 2453 vm_offset_t va; 2454 vm_page_t m, mpte; 2455 vm_pindex_t diff, psize; 2456 2457 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2458 2459 psize = atop(end - start); 2460 mpte = NULL; 2461 m = m_start; 2462 lock = NULL; 2463 PMAP_LOCK(pmap); 2464 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2465 va = start + ptoa(diff); 2466 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2467 m = TAILQ_NEXT(m, listq); 2468 } 2469 if (lock != NULL) 2470 rw_wunlock(lock); 2471 PMAP_UNLOCK(pmap); 2472} 2473 2474/* 2475 * this code makes some *MAJOR* assumptions: 2476 * 1. Current pmap & pmap exists. 2477 * 2. Not wired. 2478 * 3. Read access. 2479 * 4. No page table pages. 2480 * but is *MUCH* faster than pmap_enter... 2481 */ 2482 2483void 2484pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2485{ 2486 struct rwlock *lock; 2487 2488 lock = NULL; 2489 PMAP_LOCK(pmap); 2490 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2491 if (lock != NULL) 2492 rw_wunlock(lock); 2493 PMAP_UNLOCK(pmap); 2494} 2495 2496static vm_page_t 2497pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2498 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2499{ 2500 struct spglist free; 2501 pd_entry_t *pde; 2502 pt_entry_t *l3; 2503 vm_paddr_t pa; 2504 int lvl; 2505 2506 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2507 (m->oflags & VPO_UNMANAGED) != 0, 2508 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2509 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2510 2511 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2512 /* 2513 * In the case that a page table page is not 2514 * resident, we are creating it here. 2515 */ 2516 if (va < VM_MAXUSER_ADDRESS) { 2517 vm_pindex_t l2pindex; 2518 2519 /* 2520 * Calculate pagetable page index 2521 */ 2522 l2pindex = pmap_l2_pindex(va); 2523 if (mpte && (mpte->pindex == l2pindex)) { 2524 mpte->wire_count++; 2525 } else { 2526 /* 2527 * Get the l2 entry 2528 */ 2529 pde = pmap_pde(pmap, va, &lvl); 2530 2531 /* 2532 * If the page table page is mapped, we just increment 2533 * the hold count, and activate it. Otherwise, we 2534 * attempt to allocate a page table page. If this 2535 * attempt fails, we don't retry. Instead, we give up. 2536 */ 2537 if (lvl == 2 && pmap_load(pde) != 0) { 2538 mpte = 2539 PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 2540 mpte->wire_count++; 2541 } else { 2542 /* 2543 * Pass NULL instead of the PV list lock 2544 * pointer, because we don't intend to sleep. 2545 */ 2546 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2547 if (mpte == NULL) 2548 return (mpte); 2549 } 2550 } 2551 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2552 l3 = &l3[pmap_l3_index(va)]; 2553 } else { 2554 mpte = NULL; 2555 pde = pmap_pde(kernel_pmap, va, &lvl); 2556 KASSERT(pde != NULL, 2557 ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 2558 va)); 2559 KASSERT(lvl == 2, 2560 ("pmap_enter_quick_locked: Invalid level %d", lvl)); 2561 l3 = pmap_l2_to_l3(pde, va); 2562 } 2563 2564 if (pmap_load(l3) != 0) { 2565 if (mpte != NULL) { 2566 mpte->wire_count--; 2567 mpte = NULL; 2568 } 2569 return (mpte); 2570 } 2571 2572 /* 2573 * Enter on the PV list if part of our managed memory. 2574 */ 2575 if ((m->oflags & VPO_UNMANAGED) == 0 && 2576 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2577 if (mpte != NULL) { 2578 SLIST_INIT(&free); 2579 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2580 pmap_invalidate_page(pmap, va); 2581 pmap_free_zero_pages(&free); 2582 } 2583 mpte = NULL; 2584 } 2585 return (mpte); 2586 } 2587 2588 /* 2589 * Increment counters 2590 */ 2591 pmap_resident_count_inc(pmap, 1); 2592 2593 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2594 ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2595 2596 /* 2597 * Now validate mapping with RO protection 2598 */ 2599 if ((m->oflags & VPO_UNMANAGED) == 0) 2600 pa |= ATTR_SW_MANAGED; 2601 pmap_load_store(l3, pa); 2602 PTE_SYNC(l3); 2603 pmap_invalidate_page(pmap, va); 2604 return (mpte); 2605} 2606 2607/* 2608 * This code maps large physical mmap regions into the 2609 * processor address space. Note that some shortcuts 2610 * are taken, but the code works. 2611 */ 2612void 2613pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2614 vm_pindex_t pindex, vm_size_t size) 2615{ 2616 2617 VM_OBJECT_ASSERT_WLOCKED(object); 2618 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2619 ("pmap_object_init_pt: non-device object")); 2620} 2621 2622/* 2623 * Clear the wired attribute from the mappings for the specified range of 2624 * addresses in the given pmap. Every valid mapping within that range 2625 * must have the wired attribute set. In contrast, invalid mappings 2626 * cannot have the wired attribute set, so they are ignored. 2627 * 2628 * The wired attribute of the page table entry is not a hardware feature, 2629 * so there is no need to invalidate any TLB entries. 2630 */ 2631void 2632pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2633{ 2634 vm_offset_t va_next; 2635 pd_entry_t *l0, *l1, *l2; 2636 pt_entry_t *l3; 2637 2638 PMAP_LOCK(pmap); 2639 for (; sva < eva; sva = va_next) { 2640 l0 = pmap_l0(pmap, sva); 2641 if (pmap_load(l0) == 0) { 2642 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2643 if (va_next < sva) 2644 va_next = eva; 2645 continue; 2646 } 2647 2648 l1 = pmap_l0_to_l1(l0, sva); 2649 if (pmap_load(l1) == 0) { 2650 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2651 if (va_next < sva) 2652 va_next = eva; 2653 continue; 2654 } 2655 2656 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2657 if (va_next < sva) 2658 va_next = eva; 2659 2660 l2 = pmap_l1_to_l2(l1, sva); 2661 if (pmap_load(l2) == 0) 2662 continue; 2663 2664 if (va_next > eva) 2665 va_next = eva; 2666 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2667 sva += L3_SIZE) { 2668 if (pmap_load(l3) == 0) 2669 continue; 2670 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 2671 panic("pmap_unwire: l3 %#jx is missing " 2672 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 2673 2674 /* 2675 * PG_W must be cleared atomically. Although the pmap 2676 * lock synchronizes access to PG_W, another processor 2677 * could be setting PG_M and/or PG_A concurrently. 2678 */ 2679 atomic_clear_long(l3, ATTR_SW_WIRED); 2680 pmap->pm_stats.wired_count--; 2681 } 2682 } 2683 PMAP_UNLOCK(pmap); 2684} 2685 2686/* 2687 * Copy the range specified by src_addr/len 2688 * from the source map to the range dst_addr/len 2689 * in the destination map. 2690 * 2691 * This routine is only advisory and need not do anything. 2692 */ 2693 2694void 2695pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2696 vm_offset_t src_addr) 2697{ 2698} 2699 2700/* 2701 * pmap_zero_page zeros the specified hardware page by mapping 2702 * the page into KVM and using bzero to clear its contents. 2703 */ 2704void 2705pmap_zero_page(vm_page_t m) 2706{ 2707 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2708 2709 pagezero((void *)va); 2710} 2711 2712/* 2713 * pmap_zero_page_area zeros the specified hardware page by mapping 2714 * the page into KVM and using bzero to clear its contents. 2715 * 2716 * off and size may not cover an area beyond a single hardware page. 2717 */ 2718void 2719pmap_zero_page_area(vm_page_t m, int off, int size) 2720{ 2721 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2722 2723 if (off == 0 && size == PAGE_SIZE) 2724 pagezero((void *)va); 2725 else 2726 bzero((char *)va + off, size); 2727} 2728 2729/* 2730 * pmap_zero_page_idle zeros the specified hardware page by mapping 2731 * the page into KVM and using bzero to clear its contents. This 2732 * is intended to be called from the vm_pagezero process only and 2733 * outside of Giant. 2734 */ 2735void 2736pmap_zero_page_idle(vm_page_t m) 2737{ 2738 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2739 2740 pagezero((void *)va); 2741} 2742 2743/* 2744 * pmap_copy_page copies the specified (machine independent) 2745 * page by mapping the page into virtual memory and using 2746 * bcopy to copy the page, one machine dependent page at a 2747 * time. 2748 */ 2749void 2750pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2751{ 2752 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2753 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2754 2755 pagecopy((void *)src, (void *)dst); 2756} 2757 2758int unmapped_buf_allowed = 1; 2759 2760void 2761pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2762 vm_offset_t b_offset, int xfersize) 2763{ 2764 void *a_cp, *b_cp; 2765 vm_page_t m_a, m_b; 2766 vm_paddr_t p_a, p_b; 2767 vm_offset_t a_pg_offset, b_pg_offset; 2768 int cnt; 2769 2770 while (xfersize > 0) { 2771 a_pg_offset = a_offset & PAGE_MASK; 2772 m_a = ma[a_offset >> PAGE_SHIFT]; 2773 p_a = m_a->phys_addr; 2774 b_pg_offset = b_offset & PAGE_MASK; 2775 m_b = mb[b_offset >> PAGE_SHIFT]; 2776 p_b = m_b->phys_addr; 2777 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2778 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2779 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2780 panic("!DMAP a %lx", p_a); 2781 } else { 2782 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2783 } 2784 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2785 panic("!DMAP b %lx", p_b); 2786 } else { 2787 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2788 } 2789 bcopy(a_cp, b_cp, cnt); 2790 a_offset += cnt; 2791 b_offset += cnt; 2792 xfersize -= cnt; 2793 } 2794} 2795 2796vm_offset_t 2797pmap_quick_enter_page(vm_page_t m) 2798{ 2799 2800 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2801} 2802 2803void 2804pmap_quick_remove_page(vm_offset_t addr) 2805{ 2806} 2807 2808/* 2809 * Returns true if the pmap's pv is one of the first 2810 * 16 pvs linked to from this page. This count may 2811 * be changed upwards or downwards in the future; it 2812 * is only necessary that true be returned for a small 2813 * subset of pmaps for proper page aging. 2814 */ 2815boolean_t 2816pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2817{ 2818 struct rwlock *lock; 2819 pv_entry_t pv; 2820 int loops = 0; 2821 boolean_t rv; 2822 2823 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2824 ("pmap_page_exists_quick: page %p is not managed", m)); 2825 rv = FALSE; 2826 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2827 rw_rlock(lock); 2828 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2829 if (PV_PMAP(pv) == pmap) { 2830 rv = TRUE; 2831 break; 2832 } 2833 loops++; 2834 if (loops >= 16) 2835 break; 2836 } 2837 rw_runlock(lock); 2838 return (rv); 2839} 2840 2841/* 2842 * pmap_page_wired_mappings: 2843 * 2844 * Return the number of managed mappings to the given physical page 2845 * that are wired. 2846 */ 2847int 2848pmap_page_wired_mappings(vm_page_t m) 2849{ 2850 struct rwlock *lock; 2851 pmap_t pmap; 2852 pt_entry_t *pte; 2853 pv_entry_t pv; 2854 int count, lvl, md_gen; 2855 2856 if ((m->oflags & VPO_UNMANAGED) != 0) 2857 return (0); 2858 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2859 rw_rlock(lock); 2860restart: 2861 count = 0; 2862 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2863 pmap = PV_PMAP(pv); 2864 if (!PMAP_TRYLOCK(pmap)) { 2865 md_gen = m->md.pv_gen; 2866 rw_runlock(lock); 2867 PMAP_LOCK(pmap); 2868 rw_rlock(lock); 2869 if (md_gen != m->md.pv_gen) { 2870 PMAP_UNLOCK(pmap); 2871 goto restart; 2872 } 2873 } 2874 pte = pmap_pte(pmap, pv->pv_va, &lvl); 2875 if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 2876 count++; 2877 PMAP_UNLOCK(pmap); 2878 } 2879 rw_runlock(lock); 2880 return (count); 2881} 2882 2883/* 2884 * Destroy all managed, non-wired mappings in the given user-space 2885 * pmap. This pmap cannot be active on any processor besides the 2886 * caller. 2887 * 2888 * This function cannot be applied to the kernel pmap. Moreover, it 2889 * is not intended for general use. It is only to be used during 2890 * process termination. Consequently, it can be implemented in ways 2891 * that make it faster than pmap_remove(). First, it can more quickly 2892 * destroy mappings by iterating over the pmap's collection of PV 2893 * entries, rather than searching the page table. Second, it doesn't 2894 * have to test and clear the page table entries atomically, because 2895 * no processor is currently accessing the user address space. In 2896 * particular, a page table entry's dirty bit won't change state once 2897 * this function starts. 2898 */ 2899void 2900pmap_remove_pages(pmap_t pmap) 2901{ 2902 pd_entry_t *pde; 2903 pt_entry_t *pte, tpte; 2904 struct spglist free; 2905 vm_page_t m; 2906 pv_entry_t pv; 2907 struct pv_chunk *pc, *npc; 2908 struct rwlock *lock; 2909 int64_t bit; 2910 uint64_t inuse, bitmask; 2911 int allfree, field, freed, idx, lvl; 2912 vm_paddr_t pa; 2913 2914 lock = NULL; 2915 2916 SLIST_INIT(&free); 2917 PMAP_LOCK(pmap); 2918 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2919 allfree = 1; 2920 freed = 0; 2921 for (field = 0; field < _NPCM; field++) { 2922 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2923 while (inuse != 0) { 2924 bit = ffsl(inuse) - 1; 2925 bitmask = 1UL << bit; 2926 idx = field * 64 + bit; 2927 pv = &pc->pc_pventry[idx]; 2928 inuse &= ~bitmask; 2929 2930 pde = pmap_pde(pmap, pv->pv_va, &lvl); 2931 KASSERT(pde != NULL, 2932 ("Attempting to remove an unmapped page")); 2933 KASSERT(lvl == 2, 2934 ("Invalid page directory level: %d", lvl)); 2935 2936 pte = pmap_l2_to_l3(pde, pv->pv_va); 2937 KASSERT(pte != NULL, 2938 ("Attempting to remove an unmapped page")); 2939 2940 tpte = pmap_load(pte); 2941 2942/* 2943 * We cannot remove wired pages from a process' mapping at this time 2944 */ 2945 if (tpte & ATTR_SW_WIRED) { 2946 allfree = 0; 2947 continue; 2948 } 2949 2950 pa = tpte & ~ATTR_MASK; 2951 2952 m = PHYS_TO_VM_PAGE(pa); 2953 KASSERT(m->phys_addr == pa, 2954 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2955 m, (uintmax_t)m->phys_addr, 2956 (uintmax_t)tpte)); 2957 2958 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2959 m < &vm_page_array[vm_page_array_size], 2960 ("pmap_remove_pages: bad pte %#jx", 2961 (uintmax_t)tpte)); 2962 2963 /* XXX: assumes tpte is level 3 */ 2964 if (pmap_is_current(pmap) && 2965 pmap_l3_valid_cacheable(tpte)) 2966 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2967 pmap_load_clear(pte); 2968 PTE_SYNC(pte); 2969 pmap_invalidate_page(pmap, pv->pv_va); 2970 2971 /* 2972 * Update the vm_page_t clean/reference bits. 2973 */ 2974 if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2975 vm_page_dirty(m); 2976 2977 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2978 2979 /* Mark free */ 2980 pc->pc_map[field] |= bitmask; 2981 2982 pmap_resident_count_dec(pmap, 1); 2983 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2984 m->md.pv_gen++; 2985 2986 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), 2987 &free); 2988 freed++; 2989 } 2990 } 2991 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2992 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2993 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2994 if (allfree) { 2995 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2996 free_pv_chunk(pc); 2997 } 2998 } 2999 pmap_invalidate_all(pmap); 3000 if (lock != NULL) 3001 rw_wunlock(lock); 3002 PMAP_UNLOCK(pmap); 3003 pmap_free_zero_pages(&free); 3004} 3005 3006/* 3007 * This is used to check if a page has been accessed or modified. As we 3008 * don't have a bit to see if it has been modified we have to assume it 3009 * has been if the page is read/write. 3010 */ 3011static boolean_t 3012pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3013{ 3014 struct rwlock *lock; 3015 pv_entry_t pv; 3016 pt_entry_t *pte, mask, value; 3017 pmap_t pmap; 3018 int lvl, md_gen; 3019 boolean_t rv; 3020 3021 rv = FALSE; 3022 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3023 rw_rlock(lock); 3024restart: 3025 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3026 pmap = PV_PMAP(pv); 3027 if (!PMAP_TRYLOCK(pmap)) { 3028 md_gen = m->md.pv_gen; 3029 rw_runlock(lock); 3030 PMAP_LOCK(pmap); 3031 rw_rlock(lock); 3032 if (md_gen != m->md.pv_gen) { 3033 PMAP_UNLOCK(pmap); 3034 goto restart; 3035 } 3036 } 3037 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3038 KASSERT(lvl == 3, 3039 ("pmap_page_test_mappings: Invalid level %d", lvl)); 3040 mask = 0; 3041 value = 0; 3042 if (modified) { 3043 mask |= ATTR_AP_RW_BIT; 3044 value |= ATTR_AP(ATTR_AP_RW); 3045 } 3046 if (accessed) { 3047 mask |= ATTR_AF | ATTR_DESCR_MASK; 3048 value |= ATTR_AF | L3_PAGE; 3049 } 3050 rv = (pmap_load(pte) & mask) == value; 3051 PMAP_UNLOCK(pmap); 3052 if (rv) 3053 goto out; 3054 } 3055out: 3056 rw_runlock(lock); 3057 return (rv); 3058} 3059 3060/* 3061 * pmap_is_modified: 3062 * 3063 * Return whether or not the specified physical page was modified 3064 * in any physical maps. 3065 */ 3066boolean_t 3067pmap_is_modified(vm_page_t m) 3068{ 3069 3070 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3071 ("pmap_is_modified: page %p is not managed", m)); 3072 3073 /* 3074 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3075 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3076 * is clear, no PTEs can have PG_M set. 3077 */ 3078 VM_OBJECT_ASSERT_WLOCKED(m->object); 3079 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3080 return (FALSE); 3081 return (pmap_page_test_mappings(m, FALSE, TRUE)); 3082} 3083 3084/* 3085 * pmap_is_prefaultable: 3086 * 3087 * Return whether or not the specified virtual address is eligible 3088 * for prefault. 3089 */ 3090boolean_t 3091pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3092{ 3093 pt_entry_t *pte; 3094 boolean_t rv; 3095 int lvl; 3096 3097 rv = FALSE; 3098 PMAP_LOCK(pmap); 3099 pte = pmap_pte(pmap, addr, &lvl); 3100 if (pte != NULL && pmap_load(pte) != 0) { 3101 rv = TRUE; 3102 } 3103 PMAP_UNLOCK(pmap); 3104 return (rv); 3105} 3106 3107/* 3108 * pmap_is_referenced: 3109 * 3110 * Return whether or not the specified physical page was referenced 3111 * in any physical maps. 3112 */ 3113boolean_t 3114pmap_is_referenced(vm_page_t m) 3115{ 3116 3117 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3118 ("pmap_is_referenced: page %p is not managed", m)); 3119 return (pmap_page_test_mappings(m, TRUE, FALSE)); 3120} 3121 3122/* 3123 * Clear the write and modified bits in each of the given page's mappings. 3124 */ 3125void 3126pmap_remove_write(vm_page_t m) 3127{ 3128 pmap_t pmap; 3129 struct rwlock *lock; 3130 pv_entry_t pv; 3131 pt_entry_t oldpte, *pte; 3132 int lvl, md_gen; 3133 3134 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3135 ("pmap_remove_write: page %p is not managed", m)); 3136 3137 /* 3138 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3139 * set by another thread while the object is locked. Thus, 3140 * if PGA_WRITEABLE is clear, no page table entries need updating. 3141 */ 3142 VM_OBJECT_ASSERT_WLOCKED(m->object); 3143 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3144 return; 3145 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3146retry_pv_loop: 3147 rw_wlock(lock); 3148 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3149 pmap = PV_PMAP(pv); 3150 if (!PMAP_TRYLOCK(pmap)) { 3151 md_gen = m->md.pv_gen; 3152 rw_wunlock(lock); 3153 PMAP_LOCK(pmap); 3154 rw_wlock(lock); 3155 if (md_gen != m->md.pv_gen) { 3156 PMAP_UNLOCK(pmap); 3157 rw_wunlock(lock); 3158 goto retry_pv_loop; 3159 } 3160 } 3161 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3162retry: 3163 oldpte = pmap_load(pte); 3164 if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 3165 if (!atomic_cmpset_long(pte, oldpte, 3166 oldpte | ATTR_AP(ATTR_AP_RO))) 3167 goto retry; 3168 if ((oldpte & ATTR_AF) != 0) 3169 vm_page_dirty(m); 3170 pmap_invalidate_page(pmap, pv->pv_va); 3171 } 3172 PMAP_UNLOCK(pmap); 3173 } 3174 rw_wunlock(lock); 3175 vm_page_aflag_clear(m, PGA_WRITEABLE); 3176} 3177 3178static __inline boolean_t 3179safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 3180{ 3181 3182 return (FALSE); 3183} 3184 3185#define PMAP_TS_REFERENCED_MAX 5 3186 3187/* 3188 * pmap_ts_referenced: 3189 * 3190 * Return a count of reference bits for a page, clearing those bits. 3191 * It is not necessary for every reference bit to be cleared, but it 3192 * is necessary that 0 only be returned when there are truly no 3193 * reference bits set. 3194 * 3195 * XXX: The exact number of bits to check and clear is a matter that 3196 * should be tested and standardized at some point in the future for 3197 * optimal aging of shared pages. 3198 */ 3199int 3200pmap_ts_referenced(vm_page_t m) 3201{ 3202 pv_entry_t pv, pvf; 3203 pmap_t pmap; 3204 struct rwlock *lock; 3205 pd_entry_t *pde, tpde; 3206 pt_entry_t *pte, tpte; 3207 vm_paddr_t pa; 3208 int cleared, md_gen, not_cleared, lvl; 3209 struct spglist free; 3210 3211 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3212 ("pmap_ts_referenced: page %p is not managed", m)); 3213 SLIST_INIT(&free); 3214 cleared = 0; 3215 pa = VM_PAGE_TO_PHYS(m); 3216 lock = PHYS_TO_PV_LIST_LOCK(pa); 3217 rw_wlock(lock); 3218retry: 3219 not_cleared = 0; 3220 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 3221 goto out; 3222 pv = pvf; 3223 do { 3224 if (pvf == NULL) 3225 pvf = pv; 3226 pmap = PV_PMAP(pv); 3227 if (!PMAP_TRYLOCK(pmap)) { 3228 md_gen = m->md.pv_gen; 3229 rw_wunlock(lock); 3230 PMAP_LOCK(pmap); 3231 rw_wlock(lock); 3232 if (md_gen != m->md.pv_gen) { 3233 PMAP_UNLOCK(pmap); 3234 goto retry; 3235 } 3236 } 3237 pde = pmap_pde(pmap, pv->pv_va, &lvl); 3238 KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 3239 KASSERT(lvl == 2, 3240 ("pmap_ts_referenced: invalid pde level %d", lvl)); 3241 tpde = pmap_load(pde); 3242 KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 3243 ("pmap_ts_referenced: found an invalid l2 table")); 3244 pte = pmap_l2_to_l3(pde, pv->pv_va); 3245 tpte = pmap_load(pte); 3246 if ((tpte & ATTR_AF) != 0) { 3247 if (safe_to_clear_referenced(pmap, tpte)) { 3248 /* 3249 * TODO: We don't handle the access flag 3250 * at all. We need to be able to set it in 3251 * the exception handler. 3252 */ 3253 panic("ARM64TODO: safe_to_clear_referenced\n"); 3254 } else if ((tpte & ATTR_SW_WIRED) == 0) { 3255 /* 3256 * Wired pages cannot be paged out so 3257 * doing accessed bit emulation for 3258 * them is wasted effort. We do the 3259 * hard work for unwired pages only. 3260 */ 3261 pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 3262 &free, &lock); 3263 pmap_invalidate_page(pmap, pv->pv_va); 3264 cleared++; 3265 if (pvf == pv) 3266 pvf = NULL; 3267 pv = NULL; 3268 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3269 ("inconsistent pv lock %p %p for page %p", 3270 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3271 } else 3272 not_cleared++; 3273 } 3274 PMAP_UNLOCK(pmap); 3275 /* Rotate the PV list if it has more than one entry. */ 3276 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 3277 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3278 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3279 m->md.pv_gen++; 3280 } 3281 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 3282 not_cleared < PMAP_TS_REFERENCED_MAX); 3283out: 3284 rw_wunlock(lock); 3285 pmap_free_zero_pages(&free); 3286 return (cleared + not_cleared); 3287} 3288 3289/* 3290 * Apply the given advice to the specified range of addresses within the 3291 * given pmap. Depending on the advice, clear the referenced and/or 3292 * modified flags in each mapping and set the mapped page's dirty field. 3293 */ 3294void 3295pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3296{ 3297} 3298 3299/* 3300 * Clear the modify bits on the specified physical page. 3301 */ 3302void 3303pmap_clear_modify(vm_page_t m) 3304{ 3305 3306 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3307 ("pmap_clear_modify: page %p is not managed", m)); 3308 VM_OBJECT_ASSERT_WLOCKED(m->object); 3309 KASSERT(!vm_page_xbusied(m), 3310 ("pmap_clear_modify: page %p is exclusive busied", m)); 3311 3312 /* 3313 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3314 * If the object containing the page is locked and the page is not 3315 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3316 */ 3317 if ((m->aflags & PGA_WRITEABLE) == 0) 3318 return; 3319 3320 /* ARM64TODO: We lack support for tracking if a page is modified */ 3321} 3322 3323void * 3324pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3325{ 3326 3327 return ((void *)PHYS_TO_DMAP(pa)); 3328} 3329 3330void 3331pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3332{ 3333} 3334 3335/* 3336 * Sets the memory attribute for the specified page. 3337 */ 3338void 3339pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3340{ 3341 3342 m->md.pv_memattr = ma; 3343 3344 /* 3345 * ARM64TODO: Implement the below (from the amd64 pmap) 3346 * If "m" is a normal page, update its direct mapping. This update 3347 * can be relied upon to perform any cache operations that are 3348 * required for data coherence. 3349 */ 3350 if ((m->flags & PG_FICTITIOUS) == 0 && 3351 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3352 panic("ARM64TODO: pmap_page_set_memattr"); 3353} 3354 3355/* 3356 * perform the pmap work for mincore 3357 */ 3358int 3359pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3360{ 3361 pd_entry_t *l1p, l1; 3362 pd_entry_t *l2p, l2; 3363 pt_entry_t *l3p, l3; 3364 vm_paddr_t pa; 3365 bool managed; 3366 int val; 3367 3368 PMAP_LOCK(pmap); 3369retry: 3370 pa = 0; 3371 val = 0; 3372 managed = false; 3373 3374 l1p = pmap_l1(pmap, addr); 3375 if (l1p == NULL) /* No l1 */ 3376 goto done; 3377 3378 l1 = pmap_load(l1p); 3379 if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 3380 goto done; 3381 3382 if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 3383 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 3384 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3385 val = MINCORE_SUPER | MINCORE_INCORE; 3386 if (pmap_page_dirty(l1)) 3387 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3388 if ((l1 & ATTR_AF) == ATTR_AF) 3389 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3390 goto done; 3391 } 3392 3393 l2p = pmap_l1_to_l2(l1p, addr); 3394 if (l2p == NULL) /* No l2 */ 3395 goto done; 3396 3397 l2 = pmap_load(l2p); 3398 if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 3399 goto done; 3400 3401 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 3402 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 3403 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3404 val = MINCORE_SUPER | MINCORE_INCORE; 3405 if (pmap_page_dirty(l2)) 3406 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3407 if ((l2 & ATTR_AF) == ATTR_AF) 3408 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3409 goto done; 3410 } 3411 3412 l3p = pmap_l2_to_l3(l2p, addr); 3413 if (l3p == NULL) /* No l3 */ 3414 goto done; 3415 3416 l3 = pmap_load(l2p); 3417 if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 3418 goto done; 3419 3420 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 3421 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 3422 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3423 val = MINCORE_INCORE; 3424 if (pmap_page_dirty(l3)) 3425 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3426 if ((l3 & ATTR_AF) == ATTR_AF) 3427 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3428 } 3429 3430done: 3431 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3432 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 3433 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3434 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3435 goto retry; 3436 } else 3437 PA_UNLOCK_COND(*locked_pa); 3438 PMAP_UNLOCK(pmap); 3439 3440 return (val); 3441} 3442 3443void 3444pmap_activate(struct thread *td) 3445{ 3446 pmap_t pmap; 3447 3448 critical_enter(); 3449 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3450 td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); 3451 __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); 3452 pmap_invalidate_all(pmap); 3453 critical_exit(); 3454} 3455 3456void 3457pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 3458{ 3459 3460 if (va >= VM_MIN_KERNEL_ADDRESS) { 3461 cpu_icache_sync_range(va, sz); 3462 } else { 3463 u_int len, offset; 3464 vm_paddr_t pa; 3465 3466 /* Find the length of data in this page to flush */ 3467 offset = va & PAGE_MASK; 3468 len = imin(PAGE_SIZE - offset, sz); 3469 3470 while (sz != 0) { 3471 /* Extract the physical address & find it in the DMAP */ 3472 pa = pmap_extract(pmap, va); 3473 if (pa != 0) 3474 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 3475 3476 /* Move to the next page */ 3477 sz -= len; 3478 va += len; 3479 /* Set the length for the next iteration */ 3480 len = imin(PAGE_SIZE, sz); 3481 } 3482 } 3483} 3484 3485/* 3486 * Increase the starting virtual address of the given mapping if a 3487 * different alignment might result in more superpage mappings. 3488 */ 3489void 3490pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3491 vm_offset_t *addr, vm_size_t size) 3492{ 3493} 3494 3495/** 3496 * Get the kernel virtual address of a set of physical pages. If there are 3497 * physical addresses not covered by the DMAP perform a transient mapping 3498 * that will be removed when calling pmap_unmap_io_transient. 3499 * 3500 * \param page The pages the caller wishes to obtain the virtual 3501 * address on the kernel memory map. 3502 * \param vaddr On return contains the kernel virtual memory address 3503 * of the pages passed in the page parameter. 3504 * \param count Number of pages passed in. 3505 * \param can_fault TRUE if the thread using the mapped pages can take 3506 * page faults, FALSE otherwise. 3507 * 3508 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3509 * finished or FALSE otherwise. 3510 * 3511 */ 3512boolean_t 3513pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3514 boolean_t can_fault) 3515{ 3516 vm_paddr_t paddr; 3517 boolean_t needs_mapping; 3518 int error, i; 3519 3520 /* 3521 * Allocate any KVA space that we need, this is done in a separate 3522 * loop to prevent calling vmem_alloc while pinned. 3523 */ 3524 needs_mapping = FALSE; 3525 for (i = 0; i < count; i++) { 3526 paddr = VM_PAGE_TO_PHYS(page[i]); 3527 if (__predict_false(!PHYS_IN_DMAP(paddr))) { 3528 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3529 M_BESTFIT | M_WAITOK, &vaddr[i]); 3530 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3531 needs_mapping = TRUE; 3532 } else { 3533 vaddr[i] = PHYS_TO_DMAP(paddr); 3534 } 3535 } 3536 3537 /* Exit early if everything is covered by the DMAP */ 3538 if (!needs_mapping) 3539 return (FALSE); 3540 3541 if (!can_fault) 3542 sched_pin(); 3543 for (i = 0; i < count; i++) { 3544 paddr = VM_PAGE_TO_PHYS(page[i]); 3545 if (!PHYS_IN_DMAP(paddr)) { 3546 panic( 3547 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3548 } 3549 } 3550 3551 return (needs_mapping); 3552} 3553 3554void 3555pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3556 boolean_t can_fault) 3557{ 3558 vm_paddr_t paddr; 3559 int i; 3560 3561 if (!can_fault) 3562 sched_unpin(); 3563 for (i = 0; i < count; i++) { 3564 paddr = VM_PAGE_TO_PHYS(page[i]); 3565 if (!PHYS_IN_DMAP(paddr)) { 3566 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 3567 } 3568 } 3569} 3570