pmap-v6.c revision 318742
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 6 * Copyright (c) 2014-2016 Svatopluk Kraus <skra@FreeBSD.org> 7 * Copyright (c) 2014-2016 Michal Meloun <mmel@FreeBSD.org> 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department and William Jolitz of UUNET Technologies Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 39 */ 40/*- 41 * Copyright (c) 2003 Networks Associates Technology, Inc. 42 * All rights reserved. 43 * 44 * This software was developed for the FreeBSD Project by Jake Burkholder, 45 * Safeport Network Services, and Network Associates Laboratories, the 46 * Security Research Division of Network Associates, Inc. under 47 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 48 * CHATS research program. 49 * 50 * Redistribution and use in source and binary forms, with or without 51 * modification, are permitted provided that the following conditions 52 * are met: 53 * 1. Redistributions of source code must retain the above copyright 54 * notice, this list of conditions and the following disclaimer. 55 * 2. Redistributions in binary form must reproduce the above copyright 56 * notice, this list of conditions and the following disclaimer in the 57 * documentation and/or other materials provided with the distribution. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72#include <sys/cdefs.h> 73__FBSDID("$FreeBSD: stable/11/sys/arm/arm/pmap-v6.c 318742 2017-05-23 12:03:59Z mmel $"); 74 75/* 76 * Manages physical address maps. 77 * 78 * Since the information managed by this module is 79 * also stored by the logical address mapping module, 80 * this module may throw away valid virtual-to-physical 81 * mappings at almost any time. However, invalidations 82 * of virtual-to-physical mappings must be done as 83 * requested. 84 * 85 * In order to cope with hardware architectures which 86 * make virtual-to-physical map invalidates expensive, 87 * this module may delay invalidate or reduced protection 88 * operations until such time as they are actually 89 * necessary. This module is given full information as 90 * to which processors are currently using which maps, 91 * and to when physical maps must be made correct. 92 */ 93 94#include "opt_vm.h" 95#include "opt_pmap.h" 96#include "opt_ddb.h" 97 98#include <sys/param.h> 99#include <sys/systm.h> 100#include <sys/kernel.h> 101#include <sys/ktr.h> 102#include <sys/lock.h> 103#include <sys/proc.h> 104#include <sys/rwlock.h> 105#include <sys/malloc.h> 106#include <sys/vmmeter.h> 107#include <sys/malloc.h> 108#include <sys/mman.h> 109#include <sys/sf_buf.h> 110#include <sys/smp.h> 111#include <sys/sched.h> 112#include <sys/sysctl.h> 113 114#ifdef DDB 115#include <ddb/ddb.h> 116#endif 117 118#include <machine/physmem.h> 119 120#include <vm/vm.h> 121#include <vm/uma.h> 122#include <vm/pmap.h> 123#include <vm/vm_param.h> 124#include <vm/vm_kern.h> 125#include <vm/vm_object.h> 126#include <vm/vm_map.h> 127#include <vm/vm_page.h> 128#include <vm/vm_pageout.h> 129#include <vm/vm_phys.h> 130#include <vm/vm_extern.h> 131#include <vm/vm_reserv.h> 132#include <sys/lock.h> 133#include <sys/mutex.h> 134 135#include <machine/md_var.h> 136#include <machine/pmap_var.h> 137#include <machine/cpu.h> 138#include <machine/pcb.h> 139#include <machine/sf_buf.h> 140#ifdef SMP 141#include <machine/smp.h> 142#endif 143 144#ifndef PMAP_SHPGPERPROC 145#define PMAP_SHPGPERPROC 200 146#endif 147 148#ifndef DIAGNOSTIC 149#define PMAP_INLINE __inline 150#else 151#define PMAP_INLINE 152#endif 153 154#ifdef PMAP_DEBUG 155static void pmap_zero_page_check(vm_page_t m); 156void pmap_debug(int level); 157int pmap_pid_dump(int pid); 158 159#define PDEBUG(_lev_,_stat_) \ 160 if (pmap_debug_level >= (_lev_)) \ 161 ((_stat_)) 162#define dprintf printf 163int pmap_debug_level = 1; 164#else /* PMAP_DEBUG */ 165#define PDEBUG(_lev_,_stat_) /* Nothing */ 166#define dprintf(x, arg...) 167#endif /* PMAP_DEBUG */ 168 169/* 170 * Level 2 page tables map definion ('max' is excluded). 171 */ 172 173#define PT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) 174#define PT2V_MAX_ADDRESS ((vm_offset_t)PT2MAP + PT2MAP_SIZE) 175 176#define UPT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) 177#define UPT2V_MAX_ADDRESS \ 178 ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT))) 179 180/* 181 * Promotion to a 1MB (PTE1) page mapping requires that the corresponding 182 * 4KB (PTE2) page mappings have identical settings for the following fields: 183 */ 184#define PTE2_PROMOTE (PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG | \ 185 PTE2_NX | PTE2_RO | PTE2_U | PTE2_W | \ 186 PTE2_ATTR_MASK) 187 188#define PTE1_PROMOTE (PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG | \ 189 PTE1_NX | PTE1_RO | PTE1_U | PTE1_W | \ 190 PTE1_ATTR_MASK) 191 192#define ATTR_TO_L1(l2_attr) ((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \ 193 (((l2_attr) & L2_C) ? L1_S_C : 0) | \ 194 (((l2_attr) & L2_B) ? L1_S_B : 0) | \ 195 (((l2_attr) & PTE2_A) ? PTE1_A : 0) | \ 196 (((l2_attr) & PTE2_NM) ? PTE1_NM : 0) | \ 197 (((l2_attr) & PTE2_S) ? PTE1_S : 0) | \ 198 (((l2_attr) & PTE2_NG) ? PTE1_NG : 0) | \ 199 (((l2_attr) & PTE2_NX) ? PTE1_NX : 0) | \ 200 (((l2_attr) & PTE2_RO) ? PTE1_RO : 0) | \ 201 (((l2_attr) & PTE2_U) ? PTE1_U : 0) | \ 202 (((l2_attr) & PTE2_W) ? PTE1_W : 0)) 203 204#define ATTR_TO_L2(l1_attr) ((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \ 205 (((l1_attr) & L1_S_C) ? L2_C : 0) | \ 206 (((l1_attr) & L1_S_B) ? L2_B : 0) | \ 207 (((l1_attr) & PTE1_A) ? PTE2_A : 0) | \ 208 (((l1_attr) & PTE1_NM) ? PTE2_NM : 0) | \ 209 (((l1_attr) & PTE1_S) ? PTE2_S : 0) | \ 210 (((l1_attr) & PTE1_NG) ? PTE2_NG : 0) | \ 211 (((l1_attr) & PTE1_NX) ? PTE2_NX : 0) | \ 212 (((l1_attr) & PTE1_RO) ? PTE2_RO : 0) | \ 213 (((l1_attr) & PTE1_U) ? PTE2_U : 0) | \ 214 (((l1_attr) & PTE1_W) ? PTE2_W : 0)) 215 216/* 217 * PTE2 descriptors creation macros. 218 */ 219#define PTE2_ATTR_DEFAULT vm_memattr_to_pte2(VM_MEMATTR_DEFAULT) 220#define PTE2_ATTR_PT vm_memattr_to_pte2(pt_memattr) 221 222#define PTE2_KPT(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT) 223#define PTE2_KPT_NG(pa) PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT) 224 225#define PTE2_KRW(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT) 226#define PTE2_KRO(pa) PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT) 227 228#define PV_STATS 229#ifdef PV_STATS 230#define PV_STAT(x) do { x ; } while (0) 231#else 232#define PV_STAT(x) do { } while (0) 233#endif 234 235/* 236 * The boot_pt1 is used temporary in very early boot stage as L1 page table. 237 * We can init many things with no memory allocation thanks to its static 238 * allocation and this brings two main advantages: 239 * (1) other cores can be started very simply, 240 * (2) various boot loaders can be supported as its arguments can be processed 241 * in virtual address space and can be moved to safe location before 242 * first allocation happened. 243 * Only disadvantage is that boot_pt1 is used only in very early boot stage. 244 * However, the table is uninitialized and so lays in bss. Therefore kernel 245 * image size is not influenced. 246 * 247 * QQQ: In the future, maybe, boot_pt1 can be used for soft reset and 248 * CPU suspend/resume game. 249 */ 250extern pt1_entry_t boot_pt1[]; 251 252vm_paddr_t base_pt1; 253pt1_entry_t *kern_pt1; 254pt2_entry_t *kern_pt2tab; 255pt2_entry_t *PT2MAP; 256 257static uint32_t ttb_flags; 258static vm_memattr_t pt_memattr; 259ttb_entry_t pmap_kern_ttb; 260 261struct pmap kernel_pmap_store; 262LIST_HEAD(pmaplist, pmap); 263static struct pmaplist allpmaps; 264static struct mtx allpmaps_lock; 265 266vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 267vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 268 269static vm_offset_t kernel_vm_end_new; 270vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE; 271vm_offset_t vm_max_kernel_address; 272vm_paddr_t kernel_l1pa; 273 274static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock; 275 276/* 277 * Data for the pv entry allocation mechanism 278 */ 279static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 280static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 281static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */ 282static int shpgperproc = PMAP_SHPGPERPROC; 283 284struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 285int pv_maxchunks; /* How many chunks we have KVA for */ 286vm_offset_t pv_vafree; /* freelist stored in the PTE */ 287 288vm_paddr_t first_managed_pa; 289#define pa_to_pvh(pa) (&pv_table[pte1_index(pa - first_managed_pa)]) 290 291/* 292 * All those kernel PT submaps that BSD is so fond of 293 */ 294static pt2_entry_t *CMAP3; 295static caddr_t CADDR3; 296caddr_t _tmppt = 0; 297 298struct msgbuf *msgbufp = NULL; /* XXX move it to machdep.c */ 299 300/* 301 * Crashdump maps. 302 */ 303static caddr_t crashdumpmap; 304 305static pt2_entry_t *PMAP1 = NULL, *PMAP2; 306static pt2_entry_t *PADDR1 = NULL, *PADDR2; 307#ifdef DDB 308static pt2_entry_t *PMAP3; 309static pt2_entry_t *PADDR3; 310static int PMAP3cpu __unused; /* for SMP only */ 311#endif 312#ifdef SMP 313static int PMAP1cpu; 314static int PMAP1changedcpu; 315SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 316 &PMAP1changedcpu, 0, 317 "Number of times pmap_pte2_quick changed CPU with same PMAP1"); 318#endif 319static int PMAP1changed; 320SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 321 &PMAP1changed, 0, 322 "Number of times pmap_pte2_quick changed PMAP1"); 323static int PMAP1unchanged; 324SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 325 &PMAP1unchanged, 0, 326 "Number of times pmap_pte2_quick didn't change PMAP1"); 327static struct mtx PMAP2mutex; 328 329static __inline void pt2_wirecount_init(vm_page_t m); 330static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, 331 vm_offset_t va); 332void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size); 333 334/* 335 * Function to set the debug level of the pmap code. 336 */ 337#ifdef PMAP_DEBUG 338void 339pmap_debug(int level) 340{ 341 342 pmap_debug_level = level; 343 dprintf("pmap_debug: level=%d\n", pmap_debug_level); 344} 345#endif /* PMAP_DEBUG */ 346 347/* 348 * This table must corespond with memory attribute configuration in vm.h. 349 * First entry is used for normal system mapping. 350 * 351 * Device memory is always marked as shared. 352 * Normal memory is shared only in SMP . 353 * Not outer shareable bits are not used yet. 354 * Class 6 cannot be used on ARM11. 355 */ 356#define TEXDEF_TYPE_SHIFT 0 357#define TEXDEF_TYPE_MASK 0x3 358#define TEXDEF_INNER_SHIFT 2 359#define TEXDEF_INNER_MASK 0x3 360#define TEXDEF_OUTER_SHIFT 4 361#define TEXDEF_OUTER_MASK 0x3 362#define TEXDEF_NOS_SHIFT 6 363#define TEXDEF_NOS_MASK 0x1 364 365#define TEX(t, i, o, s) \ 366 ((t) << TEXDEF_TYPE_SHIFT) | \ 367 ((i) << TEXDEF_INNER_SHIFT) | \ 368 ((o) << TEXDEF_OUTER_SHIFT | \ 369 ((s) << TEXDEF_NOS_SHIFT)) 370 371static uint32_t tex_class[8] = { 372/* type inner cache outer cache */ 373 TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0), /* 0 - ATTR_WB_WA */ 374 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 1 - ATTR_NOCACHE */ 375 TEX(PRRR_DEV, NMRR_NC, NMRR_NC, 0), /* 2 - ATTR_DEVICE */ 376 TEX(PRRR_SO, NMRR_NC, NMRR_NC, 0), /* 3 - ATTR_SO */ 377 TEX(PRRR_MEM, NMRR_WT, NMRR_WT, 0), /* 4 - ATTR_WT */ 378 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 5 - NOT USED YET */ 379 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 6 - NOT USED YET */ 380 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 7 - NOT USED YET */ 381}; 382#undef TEX 383 384static uint32_t pte2_attr_tab[8] = { 385 PTE2_ATTR_WB_WA, /* 0 - VM_MEMATTR_WB_WA */ 386 PTE2_ATTR_NOCACHE, /* 1 - VM_MEMATTR_NOCACHE */ 387 PTE2_ATTR_DEVICE, /* 2 - VM_MEMATTR_DEVICE */ 388 PTE2_ATTR_SO, /* 3 - VM_MEMATTR_SO */ 389 PTE2_ATTR_WT, /* 4 - VM_MEMATTR_WRITE_THROUGH */ 390 0, /* 5 - NOT USED YET */ 391 0, /* 6 - NOT USED YET */ 392 0 /* 7 - NOT USED YET */ 393}; 394CTASSERT(VM_MEMATTR_WB_WA == 0); 395CTASSERT(VM_MEMATTR_NOCACHE == 1); 396CTASSERT(VM_MEMATTR_DEVICE == 2); 397CTASSERT(VM_MEMATTR_SO == 3); 398CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4); 399 400static inline uint32_t 401vm_memattr_to_pte2(vm_memattr_t ma) 402{ 403 404 KASSERT((u_int)ma < 5, ("%s: bad vm_memattr_t %d", __func__, ma)); 405 return (pte2_attr_tab[(u_int)ma]); 406} 407 408static inline uint32_t 409vm_page_pte2_attr(vm_page_t m) 410{ 411 412 return (vm_memattr_to_pte2(m->md.pat_mode)); 413} 414 415/* 416 * Convert TEX definition entry to TTB flags. 417 */ 418static uint32_t 419encode_ttb_flags(int idx) 420{ 421 uint32_t inner, outer, nos, reg; 422 423 inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) & 424 TEXDEF_INNER_MASK; 425 outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) & 426 TEXDEF_OUTER_MASK; 427 nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) & 428 TEXDEF_NOS_MASK; 429 430 reg = nos << 5; 431 reg |= outer << 3; 432 if (cpuinfo.coherent_walk) 433 reg |= (inner & 0x1) << 6; 434 reg |= (inner & 0x2) >> 1; 435#ifdef SMP 436 reg |= 1 << 1; 437#endif 438 return reg; 439} 440 441/* 442 * Set TEX remapping registers in current CPU. 443 */ 444void 445pmap_set_tex(void) 446{ 447 uint32_t prrr, nmrr; 448 uint32_t type, inner, outer, nos; 449 int i; 450 451#ifdef PMAP_PTE_NOCACHE 452 /* XXX fixme */ 453 if (cpuinfo.coherent_walk) { 454 pt_memattr = VM_MEMATTR_WB_WA; 455 ttb_flags = encode_ttb_flags(0); 456 } 457 else { 458 pt_memattr = VM_MEMATTR_NOCACHE; 459 ttb_flags = encode_ttb_flags(1); 460 } 461#else 462 pt_memattr = VM_MEMATTR_WB_WA; 463 ttb_flags = encode_ttb_flags(0); 464#endif 465 466 prrr = 0; 467 nmrr = 0; 468 469 /* Build remapping register from TEX classes. */ 470 for (i = 0; i < 8; i++) { 471 type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) & 472 TEXDEF_TYPE_MASK; 473 inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) & 474 TEXDEF_INNER_MASK; 475 outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) & 476 TEXDEF_OUTER_MASK; 477 nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) & 478 TEXDEF_NOS_MASK; 479 480 prrr |= type << (i * 2); 481 prrr |= nos << (i + 24); 482 nmrr |= inner << (i * 2); 483 nmrr |= outer << (i * 2 + 16); 484 } 485 /* Add shareable bits for device memory. */ 486 prrr |= PRRR_DS0 | PRRR_DS1; 487 488 /* Add shareable bits for normal memory in SMP case. */ 489#ifdef SMP 490 prrr |= PRRR_NS1; 491#endif 492 cp15_prrr_set(prrr); 493 cp15_nmrr_set(nmrr); 494 495 /* Caches are disabled, so full TLB flush should be enough. */ 496 tlb_flush_all_local(); 497} 498 499/* 500 * Remap one vm_meattr class to another one. This can be useful as 501 * workaround for SOC errata, e.g. if devices must be accessed using 502 * SO memory class. 503 * 504 * !!! Please note that this function is absolutely last resort thing. 505 * It should not be used under normal circumstances. !!! 506 * 507 * Usage rules: 508 * - it shall be called after pmap_bootstrap_prepare() and before 509 * cpu_mp_start() (thus only on boot CPU). In practice, it's expected 510 * to be called from platform_attach() or platform_late_init(). 511 * 512 * - if remapping doesn't change caching mode, or until uncached class 513 * is remapped to any kind of cached one, then no other restriction exists. 514 * 515 * - if pmap_remap_vm_attr() changes caching mode, but both (original and 516 * remapped) remain cached, then caller is resposible for calling 517 * of dcache_wbinv_poc_all(). 518 * 519 * - remapping of any kind of cached class to uncached is not permitted. 520 */ 521void 522pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr) 523{ 524 int old_idx, new_idx; 525 526 /* Map VM memattrs to indexes to tex_class table. */ 527 old_idx = pte2_attr_tab[(int)old_attr]; 528 new_idx = pte2_attr_tab[(int)new_attr]; 529 530 /* Replace TEX attribute and apply it. */ 531 tex_class[old_idx] = tex_class[new_idx]; 532 pmap_set_tex(); 533} 534 535/* 536 * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words, 537 * KERNBASE is mapped by first L2 page table in L2 page table page. It 538 * meets same constrain due to PT2MAP being placed just under KERNBASE. 539 */ 540CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0); 541CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE); 542 543/* 544 * In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general. 545 * For now, anyhow, the following check must be fulfilled. 546 */ 547CTASSERT(PAGE_SIZE == PTE2_SIZE); 548/* 549 * We don't want to mess up MI code with all MMU and PMAP definitions, 550 * so some things, which depend on other ones, are defined independently. 551 * Now, it is time to check that we don't screw up something. 552 */ 553CTASSERT(PDRSHIFT == PTE1_SHIFT); 554/* 555 * Check L1 and L2 page table entries definitions consistency. 556 */ 557CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1)); 558CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2)); 559/* 560 * Check L2 page tables page consistency. 561 */ 562CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2)); 563CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG); 564/* 565 * Check PT2TAB consistency. 566 * PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG. 567 * This should be done without remainder. 568 */ 569CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG)); 570 571/* 572 * A PT2MAP magic. 573 * 574 * All level 2 page tables (PT2s) are mapped continuously and accordingly 575 * into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can 576 * be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page 577 * must be used together, but not necessary at once. The first PT2 in a page 578 * must map things on correctly aligned address and the others must follow 579 * in right order. 580 */ 581#define NB_IN_PT2TAB (PT2TAB_ENTRIES * sizeof(pt2_entry_t)) 582#define NPT2_IN_PT2TAB (NB_IN_PT2TAB / NB_IN_PT2) 583#define NPG_IN_PT2TAB (NB_IN_PT2TAB / PAGE_SIZE) 584 585/* 586 * Check PT2TAB consistency. 587 * NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2. 588 * NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE. 589 * The both should be done without remainder. 590 */ 591CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2)); 592CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE)); 593/* 594 * The implementation was made general, however, with the assumption 595 * bellow in mind. In case of another value of NPG_IN_PT2TAB, 596 * the code should be once more rechecked. 597 */ 598CTASSERT(NPG_IN_PT2TAB == 1); 599 600/* 601 * Get offset of PT2 in a page 602 * associated with given PT1 index. 603 */ 604static __inline u_int 605page_pt2off(u_int pt1_idx) 606{ 607 608 return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2); 609} 610 611/* 612 * Get physical address of PT2 613 * associated with given PT2s page and PT1 index. 614 */ 615static __inline vm_paddr_t 616page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx) 617{ 618 619 return (pgpa + page_pt2off(pt1_idx)); 620} 621 622/* 623 * Get first entry of PT2 624 * associated with given PT2s page and PT1 index. 625 */ 626static __inline pt2_entry_t * 627page_pt2(vm_offset_t pgva, u_int pt1_idx) 628{ 629 630 return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx))); 631} 632 633/* 634 * Get virtual address of PT2s page (mapped in PT2MAP) 635 * which holds PT2 which holds entry which maps given virtual address. 636 */ 637static __inline vm_offset_t 638pt2map_pt2pg(vm_offset_t va) 639{ 640 641 va &= ~(NPT2_IN_PG * PTE1_SIZE - 1); 642 return ((vm_offset_t)pt2map_entry(va)); 643} 644 645/***************************************************************************** 646 * 647 * THREE pmap initialization milestones exist: 648 * 649 * locore.S 650 * -> fundamental init (including MMU) in ASM 651 * 652 * initarm() 653 * -> fundamental init continues in C 654 * -> first available physical address is known 655 * 656 * pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins) 657 * -> basic (safe) interface for physical address allocation is made 658 * -> basic (safe) interface for virtual mapping is made 659 * -> limited not SMP coherent work is possible 660 * 661 * -> more fundamental init continues in C 662 * -> locks and some more things are available 663 * -> all fundamental allocations and mappings are done 664 * 665 * pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins) 666 * -> phys_avail[] and virtual_avail is set 667 * -> control is passed to vm subsystem 668 * -> physical and virtual address allocation are off limit 669 * -> low level mapping functions, some SMP coherent, 670 * are available, which cannot be used before vm subsystem 671 * is being inited 672 * 673 * mi_startup() 674 * -> vm subsystem is being inited 675 * 676 * pmap_init() -> THIRD PMAP MILESTONE (third epoch begins) 677 * -> pmap is fully inited 678 * 679 *****************************************************************************/ 680 681/***************************************************************************** 682 * 683 * PMAP first stage initialization and utility functions 684 * for pre-bootstrap epoch. 685 * 686 * After pmap_bootstrap_prepare() is called, the following functions 687 * can be used: 688 * 689 * (1) strictly only for this stage functions for physical page allocations, 690 * virtual space allocations, and mappings: 691 * 692 * vm_paddr_t pmap_preboot_get_pages(u_int num); 693 * void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num); 694 * vm_offset_t pmap_preboot_reserve_pages(u_int num); 695 * vm_offset_t pmap_preboot_get_vpages(u_int num); 696 * void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, 697 * vm_prot_t prot, vm_memattr_t attr); 698 * 699 * (2) for all stages: 700 * 701 * vm_paddr_t pmap_kextract(vm_offset_t va); 702 * 703 * NOTE: This is not SMP coherent stage. 704 * 705 *****************************************************************************/ 706 707#define KERNEL_P2V(pa) \ 708 ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR)) 709#define KERNEL_V2P(va) \ 710 ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr)) 711 712static vm_paddr_t last_paddr; 713 714/* 715 * Pre-bootstrap epoch page allocator. 716 */ 717vm_paddr_t 718pmap_preboot_get_pages(u_int num) 719{ 720 vm_paddr_t ret; 721 722 ret = last_paddr; 723 last_paddr += num * PAGE_SIZE; 724 725 return (ret); 726} 727 728/* 729 * The fundamental initialization of PMAP stuff. 730 * 731 * Some things already happened in locore.S and some things could happen 732 * before pmap_bootstrap_prepare() is called, so let's recall what is done: 733 * 1. Caches are disabled. 734 * 2. We are running on virtual addresses already with 'boot_pt1' 735 * as L1 page table. 736 * 3. So far, all virtual addresses can be converted to physical ones and 737 * vice versa by the following macros: 738 * KERNEL_P2V(pa) .... physical to virtual ones, 739 * KERNEL_V2P(va) .... virtual to physical ones. 740 * 741 * What is done herein: 742 * 1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'. 743 * 2. PT2MAP magic is brought to live. 744 * 3. Basic preboot functions for page allocations and mappings can be used. 745 * 4. Everything is prepared for L1 cache enabling. 746 * 747 * Variations: 748 * 1. To use second TTB register, so kernel and users page tables will be 749 * separated. This way process forking - pmap_pinit() - could be faster, 750 * it saves physical pages and KVA per a process, and it's simple change. 751 * However, it will lead, due to hardware matter, to the following: 752 * (a) 2G space for kernel and 2G space for users. 753 * (b) 1G space for kernel in low addresses and 3G for users above it. 754 * A question is: Is the case (b) really an option? Note that case (b) 755 * does save neither physical memory and KVA. 756 */ 757void 758pmap_bootstrap_prepare(vm_paddr_t last) 759{ 760 vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size; 761 vm_offset_t pt2pg_va; 762 pt1_entry_t *pte1p; 763 pt2_entry_t *pte2p; 764 u_int i; 765 uint32_t actlr_mask, actlr_set, l1_attr; 766 767 /* 768 * Now, we are going to make real kernel mapping. Note that we are 769 * already running on some mapping made in locore.S and we expect 770 * that it's large enough to ensure nofault access to physical memory 771 * allocated herein before switch. 772 * 773 * As kernel image and everything needed before are and will be mapped 774 * by section mappings, we align last physical address to PTE1_SIZE. 775 */ 776 last_paddr = pte1_roundup(last); 777 778 /* 779 * Allocate and zero page(s) for kernel L1 page table. 780 * 781 * Note that it's first allocation on space which was PTE1_SIZE 782 * aligned and as such base_pt1 is aligned to NB_IN_PT1 too. 783 */ 784 base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1); 785 kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1); 786 bzero((void*)kern_pt1, NB_IN_PT1); 787 pte1_sync_range(kern_pt1, NB_IN_PT1); 788 789 /* Allocate and zero page(s) for kernel PT2TAB. */ 790 pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB); 791 kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa); 792 bzero(kern_pt2tab, NB_IN_PT2TAB); 793 pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB); 794 795 /* Allocate and zero page(s) for kernel L2 page tables. */ 796 pt2pg_pa = pmap_preboot_get_pages(NKPT2PG); 797 pt2pg_va = KERNEL_P2V(pt2pg_pa); 798 size = NKPT2PG * PAGE_SIZE; 799 bzero((void*)pt2pg_va, size); 800 pte2_sync_range((pt2_entry_t *)pt2pg_va, size); 801 802 /* 803 * Add a physical memory segment (vm_phys_seg) corresponding to the 804 * preallocated pages for kernel L2 page tables so that vm_page 805 * structures representing these pages will be created. The vm_page 806 * structures are required for promotion of the corresponding kernel 807 * virtual addresses to section mappings. 808 */ 809 vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0)); 810 811 /* 812 * Insert allocated L2 page table pages to PT2TAB and make 813 * link to all PT2s in L1 page table. See how kernel_vm_end 814 * is initialized. 815 * 816 * We play simple and safe. So every KVA will have underlaying 817 * L2 page table, even kernel image mapped by sections. 818 */ 819 pte2p = kern_pt2tab_entry(KERNBASE); 820 for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE) 821 pt2tab_store(pte2p++, PTE2_KPT(pa)); 822 823 pte1p = kern_pte1(KERNBASE); 824 for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2) 825 pte1_store(pte1p++, PTE1_LINK(pa)); 826 827 /* Make section mappings for kernel. */ 828 l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT); 829 pte1p = kern_pte1(KERNBASE); 830 for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE) 831 pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr)); 832 833 /* 834 * Get free and aligned space for PT2MAP and make L1 page table links 835 * to L2 page tables held in PT2TAB. 836 * 837 * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t 838 * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus 839 * each entry in PT2TAB maps all PT2s in a page. This implies that 840 * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE. 841 */ 842 PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE); 843 pte1p = kern_pte1((vm_offset_t)PT2MAP); 844 for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { 845 pte1_store(pte1p++, PTE1_LINK(pa)); 846 } 847 848 /* 849 * Store PT2TAB in PT2TAB itself, i.e. self reference mapping. 850 * Each pmap will hold own PT2TAB, so the mapping should be not global. 851 */ 852 pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP); 853 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { 854 pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); 855 } 856 857 /* 858 * Choose correct L2 page table and make mappings for allocations 859 * made herein which replaces temporary locore.S mappings after a while. 860 * Note that PT2MAP cannot be used until we switch to kern_pt1. 861 * 862 * Note, that these allocations started aligned on 1M section and 863 * kernel PT1 was allocated first. Making of mappings must follow 864 * order of physical allocations as we've used KERNEL_P2V() macro 865 * for virtual addresses resolution. 866 */ 867 pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1); 868 pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p))); 869 870 pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1)); 871 872 /* Make mapping for kernel L1 page table. */ 873 for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE) 874 pte2_store(pte2p++, PTE2_KPT(pa)); 875 876 /* Make mapping for kernel PT2TAB. */ 877 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) 878 pte2_store(pte2p++, PTE2_KPT(pa)); 879 880 /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ 881 pmap_kern_ttb = base_pt1 | ttb_flags; 882 cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); 883 reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set); 884 /* 885 * Initialize the first available KVA. As kernel image is mapped by 886 * sections, we are leaving some gap behind. 887 */ 888 virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE; 889} 890 891/* 892 * Setup L2 page table page for given KVA. 893 * Used in pre-bootstrap epoch. 894 * 895 * Note that we have allocated NKPT2PG pages for L2 page tables in advance 896 * and used them for mapping KVA starting from KERNBASE. However, this is not 897 * enough. Vectors and devices need L2 page tables too. Note that they are 898 * even above VM_MAX_KERNEL_ADDRESS. 899 */ 900static __inline vm_paddr_t 901pmap_preboot_pt2pg_setup(vm_offset_t va) 902{ 903 pt2_entry_t *pte2p, pte2; 904 vm_paddr_t pt2pg_pa; 905 906 /* Get associated entry in PT2TAB. */ 907 pte2p = kern_pt2tab_entry(va); 908 909 /* Just return, if PT2s page exists already. */ 910 pte2 = pt2tab_load(pte2p); 911 if (pte2_is_valid(pte2)) 912 return (pte2_pa(pte2)); 913 914 KASSERT(va >= VM_MAX_KERNEL_ADDRESS, 915 ("%s: NKPT2PG too small", __func__)); 916 917 /* 918 * Allocate page for PT2s and insert it to PT2TAB. 919 * In other words, map it into PT2MAP space. 920 */ 921 pt2pg_pa = pmap_preboot_get_pages(1); 922 pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa)); 923 924 /* Zero all PT2s in allocated page. */ 925 bzero((void*)pt2map_pt2pg(va), PAGE_SIZE); 926 pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE); 927 928 return (pt2pg_pa); 929} 930 931/* 932 * Setup L2 page table for given KVA. 933 * Used in pre-bootstrap epoch. 934 */ 935static void 936pmap_preboot_pt2_setup(vm_offset_t va) 937{ 938 pt1_entry_t *pte1p; 939 vm_paddr_t pt2pg_pa, pt2_pa; 940 941 /* Setup PT2's page. */ 942 pt2pg_pa = pmap_preboot_pt2pg_setup(va); 943 pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va)); 944 945 /* Insert PT2 to PT1. */ 946 pte1p = kern_pte1(va); 947 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 948} 949 950/* 951 * Get L2 page entry associated with given KVA. 952 * Used in pre-bootstrap epoch. 953 */ 954static __inline pt2_entry_t* 955pmap_preboot_vtopte2(vm_offset_t va) 956{ 957 pt1_entry_t *pte1p; 958 959 /* Setup PT2 if needed. */ 960 pte1p = kern_pte1(va); 961 if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */ 962 pmap_preboot_pt2_setup(va); 963 964 return (pt2map_entry(va)); 965} 966 967/* 968 * Pre-bootstrap epoch page(s) mapping(s). 969 */ 970void 971pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num) 972{ 973 u_int i; 974 pt2_entry_t *pte2p; 975 976 /* Map all the pages. */ 977 for (i = 0; i < num; i++) { 978 pte2p = pmap_preboot_vtopte2(va); 979 pte2_store(pte2p, PTE2_KRW(pa)); 980 va += PAGE_SIZE; 981 pa += PAGE_SIZE; 982 } 983} 984 985/* 986 * Pre-bootstrap epoch virtual space alocator. 987 */ 988vm_offset_t 989pmap_preboot_reserve_pages(u_int num) 990{ 991 u_int i; 992 vm_offset_t start, va; 993 pt2_entry_t *pte2p; 994 995 /* Allocate virtual space. */ 996 start = va = virtual_avail; 997 virtual_avail += num * PAGE_SIZE; 998 999 /* Zero the mapping. */ 1000 for (i = 0; i < num; i++) { 1001 pte2p = pmap_preboot_vtopte2(va); 1002 pte2_store(pte2p, 0); 1003 va += PAGE_SIZE; 1004 } 1005 1006 return (start); 1007} 1008 1009/* 1010 * Pre-bootstrap epoch page(s) allocation and mapping(s). 1011 */ 1012vm_offset_t 1013pmap_preboot_get_vpages(u_int num) 1014{ 1015 vm_paddr_t pa; 1016 vm_offset_t va; 1017 1018 /* Allocate physical page(s). */ 1019 pa = pmap_preboot_get_pages(num); 1020 1021 /* Allocate virtual space. */ 1022 va = virtual_avail; 1023 virtual_avail += num * PAGE_SIZE; 1024 1025 /* Map and zero all. */ 1026 pmap_preboot_map_pages(pa, va, num); 1027 bzero((void *)va, num * PAGE_SIZE); 1028 1029 return (va); 1030} 1031 1032/* 1033 * Pre-bootstrap epoch page mapping(s) with attributes. 1034 */ 1035void 1036pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, 1037 vm_prot_t prot, vm_memattr_t attr) 1038{ 1039 u_int num; 1040 u_int l1_attr, l1_prot, l2_prot, l2_attr; 1041 pt1_entry_t *pte1p; 1042 pt2_entry_t *pte2p; 1043 1044 l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR; 1045 l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; 1046 l2_attr = vm_memattr_to_pte2(attr); 1047 l1_prot = ATTR_TO_L1(l2_prot); 1048 l1_attr = ATTR_TO_L1(l2_attr); 1049 1050 /* Map all the pages. */ 1051 num = round_page(size); 1052 while (num > 0) { 1053 if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) { 1054 pte1p = kern_pte1(va); 1055 pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr)); 1056 va += PTE1_SIZE; 1057 pa += PTE1_SIZE; 1058 num -= PTE1_SIZE; 1059 } else { 1060 pte2p = pmap_preboot_vtopte2(va); 1061 pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr)); 1062 va += PAGE_SIZE; 1063 pa += PAGE_SIZE; 1064 num -= PAGE_SIZE; 1065 } 1066 } 1067} 1068 1069/* 1070 * Extract from the kernel page table the physical address 1071 * that is mapped by the given virtual address "va". 1072 */ 1073vm_paddr_t 1074pmap_kextract(vm_offset_t va) 1075{ 1076 vm_paddr_t pa; 1077 pt1_entry_t pte1; 1078 pt2_entry_t pte2; 1079 1080 pte1 = pte1_load(kern_pte1(va)); 1081 if (pte1_is_section(pte1)) { 1082 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1083 } else if (pte1_is_link(pte1)) { 1084 /* 1085 * We should beware of concurrent promotion that changes 1086 * pte1 at this point. However, it's not a problem as PT2 1087 * page is preserved by promotion in PT2TAB. So even if 1088 * it happens, using of PT2MAP is still safe. 1089 * 1090 * QQQ: However, concurrent removing is a problem which 1091 * ends in abort on PT2MAP space. Locking must be used 1092 * to deal with this. 1093 */ 1094 pte2 = pte2_load(pt2map_entry(va)); 1095 pa = pte2_pa(pte2) | (va & PTE2_OFFSET); 1096 } 1097 else { 1098 panic("%s: va %#x pte1 %#x", __func__, va, pte1); 1099 } 1100 return (pa); 1101} 1102 1103/* 1104 * Extract from the kernel page table the physical address 1105 * that is mapped by the given virtual address "va". Also 1106 * return L2 page table entry which maps the address. 1107 * 1108 * This is only intended to be used for panic dumps. 1109 */ 1110vm_paddr_t 1111pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p) 1112{ 1113 vm_paddr_t pa; 1114 pt1_entry_t pte1; 1115 pt2_entry_t pte2; 1116 1117 pte1 = pte1_load(kern_pte1(va)); 1118 if (pte1_is_section(pte1)) { 1119 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1120 pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V; 1121 } else if (pte1_is_link(pte1)) { 1122 pte2 = pte2_load(pt2map_entry(va)); 1123 pa = pte2_pa(pte2); 1124 } else { 1125 pte2 = 0; 1126 pa = 0; 1127 } 1128 if (pte2p != NULL) 1129 *pte2p = pte2; 1130 return (pa); 1131} 1132 1133/***************************************************************************** 1134 * 1135 * PMAP second stage initialization and utility functions 1136 * for bootstrap epoch. 1137 * 1138 * After pmap_bootstrap() is called, the following functions for 1139 * mappings can be used: 1140 * 1141 * void pmap_kenter(vm_offset_t va, vm_paddr_t pa); 1142 * void pmap_kremove(vm_offset_t va); 1143 * vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, 1144 * int prot); 1145 * 1146 * NOTE: This is not SMP coherent stage. And physical page allocation is not 1147 * allowed during this stage. 1148 * 1149 *****************************************************************************/ 1150 1151/* 1152 * Initialize kernel PMAP locks and lists, kernel_pmap itself, and 1153 * reserve various virtual spaces for temporary mappings. 1154 */ 1155void 1156pmap_bootstrap(vm_offset_t firstaddr) 1157{ 1158 pt2_entry_t *unused __unused; 1159 struct pcpu *pc; 1160 1161 /* 1162 * Initialize the kernel pmap (which is statically allocated). 1163 */ 1164 PMAP_LOCK_INIT(kernel_pmap); 1165 kernel_l1pa = (vm_paddr_t)kern_pt1; /* for libkvm */ 1166 kernel_pmap->pm_pt1 = kern_pt1; 1167 kernel_pmap->pm_pt2tab = kern_pt2tab; 1168 CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ 1169 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 1170 1171 /* 1172 * Initialize the global pv list lock. 1173 */ 1174 rw_init(&pvh_global_lock, "pmap pv global"); 1175 1176 LIST_INIT(&allpmaps); 1177 1178 /* 1179 * Request a spin mutex so that changes to allpmaps cannot be 1180 * preempted by smp_rendezvous_cpus(). 1181 */ 1182 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 1183 mtx_lock_spin(&allpmaps_lock); 1184 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 1185 mtx_unlock_spin(&allpmaps_lock); 1186 1187 /* 1188 * Reserve some special page table entries/VA space for temporary 1189 * mapping of pages. 1190 */ 1191#define SYSMAP(c, p, v, n) do { \ 1192 v = (c)pmap_preboot_reserve_pages(n); \ 1193 p = pt2map_entry((vm_offset_t)v); \ 1194 } while (0) 1195 1196 /* 1197 * Local CMAP1/CMAP2 are used for zeroing and copying pages. 1198 * Local CMAP2 is also used for data cache cleaning. 1199 * Global CMAP3 is used for the idle process page zeroing. 1200 */ 1201 pc = get_pcpu(); 1202 mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); 1203 SYSMAP(caddr_t, pc->pc_cmap1_pte2p, pc->pc_cmap1_addr, 1); 1204 SYSMAP(caddr_t, pc->pc_cmap2_pte2p, pc->pc_cmap2_addr, 1); 1205 SYSMAP(vm_offset_t, pc->pc_qmap_pte2p, pc->pc_qmap_addr, 1); 1206 SYSMAP(caddr_t, CMAP3, CADDR3, 1); 1207 1208 /* 1209 * Crashdump maps. 1210 */ 1211 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS); 1212 1213 /* 1214 * _tmppt is used for reading arbitrary physical pages via /dev/mem. 1215 */ 1216 SYSMAP(caddr_t, unused, _tmppt, 1); 1217 1218 /* 1219 * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(), 1220 * respectively. PADDR3 is used by pmap_pte2_ddb(). 1221 */ 1222 SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1); 1223 SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1); 1224#ifdef DDB 1225 SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1); 1226#endif 1227 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 1228 1229 /* 1230 * Note that in very short time in initarm(), we are going to 1231 * initialize phys_avail[] array and no further page allocation 1232 * can happen after that until vm subsystem will be initialized. 1233 */ 1234 kernel_vm_end_new = kernel_vm_end; 1235 virtual_end = vm_max_kernel_address; 1236} 1237 1238static void 1239pmap_init_reserved_pages(void) 1240{ 1241 struct pcpu *pc; 1242 vm_offset_t pages; 1243 int i; 1244 1245 CPU_FOREACH(i) { 1246 pc = pcpu_find(i); 1247 /* 1248 * Skip if the mapping has already been initialized, 1249 * i.e. this is the BSP. 1250 */ 1251 if (pc->pc_cmap1_addr != 0) 1252 continue; 1253 mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); 1254 pages = kva_alloc(PAGE_SIZE * 3); 1255 if (pages == 0) 1256 panic("%s: unable to allocate KVA", __func__); 1257 pc->pc_cmap1_pte2p = pt2map_entry(pages); 1258 pc->pc_cmap2_pte2p = pt2map_entry(pages + PAGE_SIZE); 1259 pc->pc_qmap_pte2p = pt2map_entry(pages + (PAGE_SIZE * 2)); 1260 pc->pc_cmap1_addr = (caddr_t)pages; 1261 pc->pc_cmap2_addr = (caddr_t)(pages + PAGE_SIZE); 1262 pc->pc_qmap_addr = pages + (PAGE_SIZE * 2); 1263 } 1264} 1265SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL); 1266 1267/* 1268 * The function can already be use in second initialization stage. 1269 * As such, the function DOES NOT call pmap_growkernel() where PT2 1270 * allocation can happen. So if used, be sure that PT2 for given 1271 * virtual address is allocated already! 1272 * 1273 * Add a wired page to the kva. 1274 * Note: not SMP coherent. 1275 */ 1276static __inline void 1277pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot, 1278 uint32_t attr) 1279{ 1280 pt1_entry_t *pte1p; 1281 pt2_entry_t *pte2p; 1282 1283 pte1p = kern_pte1(va); 1284 if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */ 1285 /* 1286 * This is a very low level function, so PT2 and particularly 1287 * PT2PG associated with given virtual address must be already 1288 * allocated. It's a pain mainly during pmap initialization 1289 * stage. However, called after pmap initialization with 1290 * virtual address not under kernel_vm_end will lead to 1291 * the same misery. 1292 */ 1293 if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va)))) 1294 panic("%s: kernel PT2 not allocated!", __func__); 1295 } 1296 1297 pte2p = pt2map_entry(va); 1298 pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); 1299} 1300 1301PMAP_INLINE void 1302pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1303{ 1304 1305 pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT); 1306} 1307 1308/* 1309 * Remove a page from the kernel pagetables. 1310 * Note: not SMP coherent. 1311 */ 1312PMAP_INLINE void 1313pmap_kremove(vm_offset_t va) 1314{ 1315 pt2_entry_t *pte2p; 1316 1317 pte2p = pt2map_entry(va); 1318 pte2_clear(pte2p); 1319} 1320 1321/* 1322 * Share new kernel PT2PG with all pmaps. 1323 * The caller is responsible for maintaining TLB consistency. 1324 */ 1325static void 1326pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2) 1327{ 1328 pmap_t pmap; 1329 pt2_entry_t *pte2p; 1330 1331 mtx_lock_spin(&allpmaps_lock); 1332 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1333 pte2p = pmap_pt2tab_entry(pmap, va); 1334 pt2tab_store(pte2p, npte2); 1335 } 1336 mtx_unlock_spin(&allpmaps_lock); 1337} 1338 1339/* 1340 * Share new kernel PTE1 with all pmaps. 1341 * The caller is responsible for maintaining TLB consistency. 1342 */ 1343static void 1344pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1) 1345{ 1346 pmap_t pmap; 1347 pt1_entry_t *pte1p; 1348 1349 mtx_lock_spin(&allpmaps_lock); 1350 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1351 pte1p = pmap_pte1(pmap, va); 1352 pte1_store(pte1p, npte1); 1353 } 1354 mtx_unlock_spin(&allpmaps_lock); 1355} 1356 1357/* 1358 * Used to map a range of physical addresses into kernel 1359 * virtual address space. 1360 * 1361 * The value passed in '*virt' is a suggested virtual address for 1362 * the mapping. Architectures which can support a direct-mapped 1363 * physical to virtual region can return the appropriate address 1364 * within that region, leaving '*virt' unchanged. Other 1365 * architectures should map the pages starting at '*virt' and 1366 * update '*virt' with the first usable address after the mapped 1367 * region. 1368 * 1369 * NOTE: Read the comments above pmap_kenter_prot_attr() as 1370 * the function is used herein! 1371 */ 1372vm_offset_t 1373pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1374{ 1375 vm_offset_t va, sva; 1376 vm_paddr_t pte1_offset; 1377 pt1_entry_t npte1; 1378 uint32_t l1prot, l2prot; 1379 uint32_t l1attr, l2attr; 1380 1381 PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x)," 1382 " prot = %d\n", __func__, *virt, start, end, end - start, prot)); 1383 1384 l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR; 1385 l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; 1386 l1prot = ATTR_TO_L1(l2prot); 1387 1388 l2attr = PTE2_ATTR_DEFAULT; 1389 l1attr = ATTR_TO_L1(l2attr); 1390 1391 va = *virt; 1392 /* 1393 * Does the physical address range's size and alignment permit at 1394 * least one section mapping to be created? 1395 */ 1396 pte1_offset = start & PTE1_OFFSET; 1397 if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >= 1398 PTE1_SIZE) { 1399 /* 1400 * Increase the starting virtual address so that its alignment 1401 * does not preclude the use of section mappings. 1402 */ 1403 if ((va & PTE1_OFFSET) < pte1_offset) 1404 va = pte1_trunc(va) + pte1_offset; 1405 else if ((va & PTE1_OFFSET) > pte1_offset) 1406 va = pte1_roundup(va) + pte1_offset; 1407 } 1408 sva = va; 1409 while (start < end) { 1410 if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) { 1411 KASSERT((va & PTE1_OFFSET) == 0, 1412 ("%s: misaligned va %#x", __func__, va)); 1413 npte1 = PTE1_KERN(start, l1prot, l1attr); 1414 pmap_kenter_pte1(va, npte1); 1415 va += PTE1_SIZE; 1416 start += PTE1_SIZE; 1417 } else { 1418 pmap_kenter_prot_attr(va, start, l2prot, l2attr); 1419 va += PAGE_SIZE; 1420 start += PAGE_SIZE; 1421 } 1422 } 1423 tlb_flush_range(sva, va - sva); 1424 *virt = va; 1425 return (sva); 1426} 1427 1428/* 1429 * Make a temporary mapping for a physical address. 1430 * This is only intended to be used for panic dumps. 1431 */ 1432void * 1433pmap_kenter_temporary(vm_paddr_t pa, int i) 1434{ 1435 vm_offset_t va; 1436 1437 /* QQQ: 'i' should be less or equal to MAXDUMPPGS. */ 1438 1439 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 1440 pmap_kenter(va, pa); 1441 tlb_flush_local(va); 1442 return ((void *)crashdumpmap); 1443} 1444 1445 1446/************************************* 1447 * 1448 * TLB & cache maintenance routines. 1449 * 1450 *************************************/ 1451 1452/* 1453 * We inline these within pmap.c for speed. 1454 */ 1455PMAP_INLINE void 1456pmap_tlb_flush(pmap_t pmap, vm_offset_t va) 1457{ 1458 1459 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1460 tlb_flush(va); 1461} 1462 1463PMAP_INLINE void 1464pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size) 1465{ 1466 1467 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1468 tlb_flush_range(sva, size); 1469} 1470 1471/* 1472 * Abuse the pte2 nodes for unmapped kva to thread a kva freelist through. 1473 * Requirements: 1474 * - Must deal with pages in order to ensure that none of the PTE2_* bits 1475 * are ever set, PTE2_V in particular. 1476 * - Assumes we can write to pte2s without pte2_store() atomic ops. 1477 * - Assumes nothing will ever test these addresses for 0 to indicate 1478 * no mapping instead of correctly checking PTE2_V. 1479 * - Assumes a vm_offset_t will fit in a pte2 (true for arm). 1480 * Because PTE2_V is never set, there can be no mappings to invalidate. 1481 */ 1482static vm_offset_t 1483pmap_pte2list_alloc(vm_offset_t *head) 1484{ 1485 pt2_entry_t *pte2p; 1486 vm_offset_t va; 1487 1488 va = *head; 1489 if (va == 0) 1490 panic("pmap_ptelist_alloc: exhausted ptelist KVA"); 1491 pte2p = pt2map_entry(va); 1492 *head = *pte2p; 1493 if (*head & PTE2_V) 1494 panic("%s: va with PTE2_V set!", __func__); 1495 *pte2p = 0; 1496 return (va); 1497} 1498 1499static void 1500pmap_pte2list_free(vm_offset_t *head, vm_offset_t va) 1501{ 1502 pt2_entry_t *pte2p; 1503 1504 if (va & PTE2_V) 1505 panic("%s: freeing va with PTE2_V set!", __func__); 1506 pte2p = pt2map_entry(va); 1507 *pte2p = *head; /* virtual! PTE2_V is 0 though */ 1508 *head = va; 1509} 1510 1511static void 1512pmap_pte2list_init(vm_offset_t *head, void *base, int npages) 1513{ 1514 int i; 1515 vm_offset_t va; 1516 1517 *head = 0; 1518 for (i = npages - 1; i >= 0; i--) { 1519 va = (vm_offset_t)base + i * PAGE_SIZE; 1520 pmap_pte2list_free(head, va); 1521 } 1522} 1523 1524/***************************************************************************** 1525 * 1526 * PMAP third and final stage initialization. 1527 * 1528 * After pmap_init() is called, PMAP subsystem is fully initialized. 1529 * 1530 *****************************************************************************/ 1531 1532SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 1533 1534SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 1535 "Max number of PV entries"); 1536SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 1537 "Page share factor per proc"); 1538 1539static u_long nkpt2pg = NKPT2PG; 1540SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD, 1541 &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s"); 1542 1543static int sp_enabled = 1; 1544SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 1545 &sp_enabled, 0, "Are large page mappings enabled?"); 1546 1547static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0, 1548 "1MB page mapping counters"); 1549 1550static u_long pmap_pte1_demotions; 1551SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD, 1552 &pmap_pte1_demotions, 0, "1MB page demotions"); 1553 1554static u_long pmap_pte1_mappings; 1555SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD, 1556 &pmap_pte1_mappings, 0, "1MB page mappings"); 1557 1558static u_long pmap_pte1_p_failures; 1559SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD, 1560 &pmap_pte1_p_failures, 0, "1MB page promotion failures"); 1561 1562static u_long pmap_pte1_promotions; 1563SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD, 1564 &pmap_pte1_promotions, 0, "1MB page promotions"); 1565 1566static u_long pmap_pte1_kern_demotions; 1567SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_demotions, CTLFLAG_RD, 1568 &pmap_pte1_kern_demotions, 0, "1MB page kernel demotions"); 1569 1570static u_long pmap_pte1_kern_promotions; 1571SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_promotions, CTLFLAG_RD, 1572 &pmap_pte1_kern_promotions, 0, "1MB page kernel promotions"); 1573 1574static __inline ttb_entry_t 1575pmap_ttb_get(pmap_t pmap) 1576{ 1577 1578 return (vtophys(pmap->pm_pt1) | ttb_flags); 1579} 1580 1581/* 1582 * Initialize a vm_page's machine-dependent fields. 1583 * 1584 * Variations: 1585 * 1. Pages for L2 page tables are always not managed. So, pv_list and 1586 * pt2_wirecount can share same physical space. However, proper 1587 * initialization on a page alloc for page tables and reinitialization 1588 * on the page free must be ensured. 1589 */ 1590void 1591pmap_page_init(vm_page_t m) 1592{ 1593 1594 TAILQ_INIT(&m->md.pv_list); 1595 pt2_wirecount_init(m); 1596 m->md.pat_mode = VM_MEMATTR_DEFAULT; 1597} 1598 1599/* 1600 * Virtualization for faster way how to zero whole page. 1601 */ 1602static __inline void 1603pagezero(void *page) 1604{ 1605 1606 bzero(page, PAGE_SIZE); 1607} 1608 1609/* 1610 * Zero L2 page table page. 1611 * Use same KVA as in pmap_zero_page(). 1612 */ 1613static __inline vm_paddr_t 1614pmap_pt2pg_zero(vm_page_t m) 1615{ 1616 pt2_entry_t *cmap2_pte2p; 1617 vm_paddr_t pa; 1618 struct pcpu *pc; 1619 1620 pa = VM_PAGE_TO_PHYS(m); 1621 1622 /* 1623 * XXX: For now, we map whole page even if it's already zero, 1624 * to sync it even if the sync is only DSB. 1625 */ 1626 sched_pin(); 1627 pc = get_pcpu(); 1628 cmap2_pte2p = pc->pc_cmap2_pte2p; 1629 mtx_lock(&pc->pc_cmap_lock); 1630 if (pte2_load(cmap2_pte2p) != 0) 1631 panic("%s: CMAP2 busy", __func__); 1632 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, 1633 vm_page_pte2_attr(m))); 1634 /* Even VM_ALLOC_ZERO request is only advisory. */ 1635 if ((m->flags & PG_ZERO) == 0) 1636 pagezero(pc->pc_cmap2_addr); 1637 pte2_sync_range((pt2_entry_t *)pc->pc_cmap2_addr, PAGE_SIZE); 1638 pte2_clear(cmap2_pte2p); 1639 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 1640 1641 /* 1642 * Unpin the thread before releasing the lock. Otherwise the thread 1643 * could be rescheduled while still bound to the current CPU, only 1644 * to unpin itself immediately upon resuming execution. 1645 */ 1646 sched_unpin(); 1647 mtx_unlock(&pc->pc_cmap_lock); 1648 1649 return (pa); 1650} 1651 1652/* 1653 * Init just allocated page as L2 page table(s) holder 1654 * and return its physical address. 1655 */ 1656static __inline vm_paddr_t 1657pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m) 1658{ 1659 vm_paddr_t pa; 1660 pt2_entry_t *pte2p; 1661 1662 /* Check page attributes. */ 1663 if (m->md.pat_mode != pt_memattr) 1664 pmap_page_set_memattr(m, pt_memattr); 1665 1666 /* Zero page and init wire counts. */ 1667 pa = pmap_pt2pg_zero(m); 1668 pt2_wirecount_init(m); 1669 1670 /* 1671 * Map page to PT2MAP address space for given pmap. 1672 * Note that PT2MAP space is shared with all pmaps. 1673 */ 1674 if (pmap == kernel_pmap) 1675 pmap_kenter_pt2tab(va, PTE2_KPT(pa)); 1676 else { 1677 pte2p = pmap_pt2tab_entry(pmap, va); 1678 pt2tab_store(pte2p, PTE2_KPT_NG(pa)); 1679 } 1680 1681 return (pa); 1682} 1683 1684/* 1685 * Initialize the pmap module. 1686 * Called by vm_init, to initialize any structures that the pmap 1687 * system needs to map virtual memory. 1688 */ 1689void 1690pmap_init(void) 1691{ 1692 vm_size_t s; 1693 pt2_entry_t *pte2p, pte2; 1694 u_int i, pte1_idx, pv_npg; 1695 1696 PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR)); 1697 1698 /* 1699 * Initialize the vm page array entries for kernel pmap's 1700 * L2 page table pages allocated in advance. 1701 */ 1702 pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE); 1703 pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE); 1704 for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) { 1705 vm_paddr_t pa; 1706 vm_page_t m; 1707 1708 pte2 = pte2_load(pte2p); 1709 KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__)); 1710 1711 pa = pte2_pa(pte2); 1712 m = PHYS_TO_VM_PAGE(pa); 1713 KASSERT(m >= vm_page_array && 1714 m < &vm_page_array[vm_page_array_size], 1715 ("%s: L2 page table page is out of range", __func__)); 1716 1717 m->pindex = pte1_idx; 1718 m->phys_addr = pa; 1719 pte1_idx += NPT2_IN_PG; 1720 } 1721 1722 /* 1723 * Initialize the address space (zone) for the pv entries. Set a 1724 * high water mark so that the system can recover from excessive 1725 * numbers of pv entries. 1726 */ 1727 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1728 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 1729 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1730 pv_entry_max = roundup(pv_entry_max, _NPCPV); 1731 pv_entry_high_water = 9 * (pv_entry_max / 10); 1732 1733 /* 1734 * Are large page mappings enabled? 1735 */ 1736 TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); 1737 if (sp_enabled) { 1738 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 1739 ("%s: can't assign to pagesizes[1]", __func__)); 1740 pagesizes[1] = PTE1_SIZE; 1741 } 1742 1743 /* 1744 * Calculate the size of the pv head table for sections. 1745 * Handle the possibility that "vm_phys_segs[...].end" is zero. 1746 * Note that the table is only for sections which could be promoted. 1747 */ 1748 first_managed_pa = pte1_trunc(vm_phys_segs[0].start); 1749 pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) 1750 - first_managed_pa) / PTE1_SIZE + 1; 1751 1752 /* 1753 * Allocate memory for the pv head table for sections. 1754 */ 1755 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 1756 s = round_page(s); 1757 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 1758 M_WAITOK | M_ZERO); 1759 for (i = 0; i < pv_npg; i++) 1760 TAILQ_INIT(&pv_table[i].pv_list); 1761 1762 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 1763 pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 1764 if (pv_chunkbase == NULL) 1765 panic("%s: not enough kvm for pv chunks", __func__); 1766 pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 1767} 1768 1769/* 1770 * Add a list of wired pages to the kva 1771 * this routine is only used for temporary 1772 * kernel mappings that do not need to have 1773 * page modification or references recorded. 1774 * Note that old mappings are simply written 1775 * over. The page *must* be wired. 1776 * Note: SMP coherent. Uses a ranged shootdown IPI. 1777 */ 1778void 1779pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1780{ 1781 u_int anychanged; 1782 pt2_entry_t *epte2p, *pte2p, pte2; 1783 vm_page_t m; 1784 vm_paddr_t pa; 1785 1786 anychanged = 0; 1787 pte2p = pt2map_entry(sva); 1788 epte2p = pte2p + count; 1789 while (pte2p < epte2p) { 1790 m = *ma++; 1791 pa = VM_PAGE_TO_PHYS(m); 1792 pte2 = pte2_load(pte2p); 1793 if ((pte2_pa(pte2) != pa) || 1794 (pte2_attr(pte2) != vm_page_pte2_attr(m))) { 1795 anychanged++; 1796 pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW, 1797 vm_page_pte2_attr(m))); 1798 } 1799 pte2p++; 1800 } 1801 if (__predict_false(anychanged)) 1802 tlb_flush_range(sva, count * PAGE_SIZE); 1803} 1804 1805/* 1806 * This routine tears out page mappings from the 1807 * kernel -- it is meant only for temporary mappings. 1808 * Note: SMP coherent. Uses a ranged shootdown IPI. 1809 */ 1810void 1811pmap_qremove(vm_offset_t sva, int count) 1812{ 1813 vm_offset_t va; 1814 1815 va = sva; 1816 while (count-- > 0) { 1817 pmap_kremove(va); 1818 va += PAGE_SIZE; 1819 } 1820 tlb_flush_range(sva, va - sva); 1821} 1822 1823/* 1824 * Are we current address space or kernel? 1825 */ 1826static __inline int 1827pmap_is_current(pmap_t pmap) 1828{ 1829 1830 return (pmap == kernel_pmap || 1831 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace))); 1832} 1833 1834/* 1835 * If the given pmap is not the current or kernel pmap, the returned 1836 * pte2 must be released by passing it to pmap_pte2_release(). 1837 */ 1838static pt2_entry_t * 1839pmap_pte2(pmap_t pmap, vm_offset_t va) 1840{ 1841 pt1_entry_t pte1; 1842 vm_paddr_t pt2pg_pa; 1843 1844 pte1 = pte1_load(pmap_pte1(pmap, va)); 1845 if (pte1_is_section(pte1)) 1846 panic("%s: attempt to map PTE1", __func__); 1847 if (pte1_is_link(pte1)) { 1848 /* Are we current address space or kernel? */ 1849 if (pmap_is_current(pmap)) 1850 return (pt2map_entry(va)); 1851 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 1852 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 1853 mtx_lock(&PMAP2mutex); 1854 if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { 1855 pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); 1856 tlb_flush((vm_offset_t)PADDR2); 1857 } 1858 return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 1859 } 1860 return (NULL); 1861} 1862 1863/* 1864 * Releases a pte2 that was obtained from pmap_pte2(). 1865 * Be prepared for the pte2p being NULL. 1866 */ 1867static __inline void 1868pmap_pte2_release(pt2_entry_t *pte2p) 1869{ 1870 1871 if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) { 1872 mtx_unlock(&PMAP2mutex); 1873 } 1874} 1875 1876/* 1877 * Super fast pmap_pte2 routine best used when scanning 1878 * the pv lists. This eliminates many coarse-grained 1879 * invltlb calls. Note that many of the pv list 1880 * scans are across different pmaps. It is very wasteful 1881 * to do an entire tlb flush for checking a single mapping. 1882 * 1883 * If the given pmap is not the current pmap, pvh_global_lock 1884 * must be held and curthread pinned to a CPU. 1885 */ 1886static pt2_entry_t * 1887pmap_pte2_quick(pmap_t pmap, vm_offset_t va) 1888{ 1889 pt1_entry_t pte1; 1890 vm_paddr_t pt2pg_pa; 1891 1892 pte1 = pte1_load(pmap_pte1(pmap, va)); 1893 if (pte1_is_section(pte1)) 1894 panic("%s: attempt to map PTE1", __func__); 1895 if (pte1_is_link(pte1)) { 1896 /* Are we current address space or kernel? */ 1897 if (pmap_is_current(pmap)) 1898 return (pt2map_entry(va)); 1899 rw_assert(&pvh_global_lock, RA_WLOCKED); 1900 KASSERT(curthread->td_pinned > 0, 1901 ("%s: curthread not pinned", __func__)); 1902 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 1903 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 1904 if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { 1905 pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); 1906#ifdef SMP 1907 PMAP1cpu = PCPU_GET(cpuid); 1908#endif 1909 tlb_flush_local((vm_offset_t)PADDR1); 1910 PMAP1changed++; 1911 } else 1912#ifdef SMP 1913 if (PMAP1cpu != PCPU_GET(cpuid)) { 1914 PMAP1cpu = PCPU_GET(cpuid); 1915 tlb_flush_local((vm_offset_t)PADDR1); 1916 PMAP1changedcpu++; 1917 } else 1918#endif 1919 PMAP1unchanged++; 1920 return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 1921 } 1922 return (NULL); 1923} 1924 1925/* 1926 * Routine: pmap_extract 1927 * Function: 1928 * Extract the physical page address associated 1929 * with the given map/virtual_address pair. 1930 */ 1931vm_paddr_t 1932pmap_extract(pmap_t pmap, vm_offset_t va) 1933{ 1934 vm_paddr_t pa; 1935 pt1_entry_t pte1; 1936 pt2_entry_t *pte2p; 1937 1938 PMAP_LOCK(pmap); 1939 pte1 = pte1_load(pmap_pte1(pmap, va)); 1940 if (pte1_is_section(pte1)) 1941 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1942 else if (pte1_is_link(pte1)) { 1943 pte2p = pmap_pte2(pmap, va); 1944 pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET); 1945 pmap_pte2_release(pte2p); 1946 } else 1947 pa = 0; 1948 PMAP_UNLOCK(pmap); 1949 return (pa); 1950} 1951 1952/* 1953 * Routine: pmap_extract_and_hold 1954 * Function: 1955 * Atomically extract and hold the physical page 1956 * with the given pmap and virtual address pair 1957 * if that mapping permits the given protection. 1958 */ 1959vm_page_t 1960pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1961{ 1962 vm_paddr_t pa, lockpa; 1963 pt1_entry_t pte1; 1964 pt2_entry_t pte2, *pte2p; 1965 vm_page_t m; 1966 1967 lockpa = 0; 1968 m = NULL; 1969 PMAP_LOCK(pmap); 1970retry: 1971 pte1 = pte1_load(pmap_pte1(pmap, va)); 1972 if (pte1_is_section(pte1)) { 1973 if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { 1974 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1975 if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) 1976 goto retry; 1977 m = PHYS_TO_VM_PAGE(pa); 1978 vm_page_hold(m); 1979 } 1980 } else if (pte1_is_link(pte1)) { 1981 pte2p = pmap_pte2(pmap, va); 1982 pte2 = pte2_load(pte2p); 1983 pmap_pte2_release(pte2p); 1984 if (pte2_is_valid(pte2) && 1985 (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { 1986 pa = pte2_pa(pte2); 1987 if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) 1988 goto retry; 1989 m = PHYS_TO_VM_PAGE(pa); 1990 vm_page_hold(m); 1991 } 1992 } 1993 PA_UNLOCK_COND(lockpa); 1994 PMAP_UNLOCK(pmap); 1995 return (m); 1996} 1997 1998/* 1999 * Grow the number of kernel L2 page table entries, if needed. 2000 */ 2001void 2002pmap_growkernel(vm_offset_t addr) 2003{ 2004 vm_page_t m; 2005 vm_paddr_t pt2pg_pa, pt2_pa; 2006 pt1_entry_t pte1; 2007 pt2_entry_t pte2; 2008 2009 PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr)); 2010 /* 2011 * All the time kernel_vm_end is first KVA for which underlying 2012 * L2 page table is either not allocated or linked from L1 page table 2013 * (not considering sections). Except for two possible cases: 2014 * 2015 * (1) in the very beginning as long as pmap_growkernel() was 2016 * not called, it could be first unused KVA (which is not 2017 * rounded up to PTE1_SIZE), 2018 * 2019 * (2) when all KVA space is mapped and kernel_map->max_offset 2020 * address is not rounded up to PTE1_SIZE. (For example, 2021 * it could be 0xFFFFFFFF.) 2022 */ 2023 kernel_vm_end = pte1_roundup(kernel_vm_end); 2024 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 2025 addr = roundup2(addr, PTE1_SIZE); 2026 if (addr - 1 >= kernel_map->max_offset) 2027 addr = kernel_map->max_offset; 2028 while (kernel_vm_end < addr) { 2029 pte1 = pte1_load(kern_pte1(kernel_vm_end)); 2030 if (pte1_is_valid(pte1)) { 2031 kernel_vm_end += PTE1_SIZE; 2032 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2033 kernel_vm_end = kernel_map->max_offset; 2034 break; 2035 } 2036 continue; 2037 } 2038 2039 /* 2040 * kernel_vm_end_new is used in pmap_pinit() when kernel 2041 * mappings are entered to new pmap all at once to avoid race 2042 * between pmap_kenter_pte1() and kernel_vm_end increase. 2043 * The same aplies to pmap_kenter_pt2tab(). 2044 */ 2045 kernel_vm_end_new = kernel_vm_end + PTE1_SIZE; 2046 2047 pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end)); 2048 if (!pte2_is_valid(pte2)) { 2049 /* 2050 * Install new PT2s page into kernel PT2TAB. 2051 */ 2052 m = vm_page_alloc(NULL, 2053 pte1_index(kernel_vm_end) & ~PT2PG_MASK, 2054 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 2055 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2056 if (m == NULL) 2057 panic("%s: no memory to grow kernel", __func__); 2058 /* 2059 * QQQ: To link all new L2 page tables from L1 page 2060 * table now and so pmap_kenter_pte1() them 2061 * at once together with pmap_kenter_pt2tab() 2062 * could be nice speed up. However, 2063 * pmap_growkernel() does not happen so often... 2064 * QQQ: The other TTBR is another option. 2065 */ 2066 pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end, 2067 m); 2068 } else 2069 pt2pg_pa = pte2_pa(pte2); 2070 2071 pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end)); 2072 pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); 2073 2074 kernel_vm_end = kernel_vm_end_new; 2075 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2076 kernel_vm_end = kernel_map->max_offset; 2077 break; 2078 } 2079 } 2080} 2081 2082static int 2083kvm_size(SYSCTL_HANDLER_ARGS) 2084{ 2085 unsigned long ksize = vm_max_kernel_address - KERNBASE; 2086 2087 return (sysctl_handle_long(oidp, &ksize, 0, req)); 2088} 2089SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 2090 0, 0, kvm_size, "IU", "Size of KVM"); 2091 2092static int 2093kvm_free(SYSCTL_HANDLER_ARGS) 2094{ 2095 unsigned long kfree = vm_max_kernel_address - kernel_vm_end; 2096 2097 return (sysctl_handle_long(oidp, &kfree, 0, req)); 2098} 2099SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 2100 0, 0, kvm_free, "IU", "Amount of KVM free"); 2101 2102/*********************************************** 2103 * 2104 * Pmap allocation/deallocation routines. 2105 * 2106 ***********************************************/ 2107 2108/* 2109 * Initialize the pmap for the swapper process. 2110 */ 2111void 2112pmap_pinit0(pmap_t pmap) 2113{ 2114 PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap)); 2115 2116 PMAP_LOCK_INIT(pmap); 2117 2118 /* 2119 * Kernel page table directory and pmap stuff around is already 2120 * initialized, we are using it right now and here. So, finish 2121 * only PMAP structures initialization for process0 ... 2122 * 2123 * Since the L1 page table and PT2TAB is shared with the kernel pmap, 2124 * which is already included in the list "allpmaps", this pmap does 2125 * not need to be inserted into that list. 2126 */ 2127 pmap->pm_pt1 = kern_pt1; 2128 pmap->pm_pt2tab = kern_pt2tab; 2129 CPU_ZERO(&pmap->pm_active); 2130 PCPU_SET(curpmap, pmap); 2131 TAILQ_INIT(&pmap->pm_pvchunk); 2132 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 2133 CPU_SET(0, &pmap->pm_active); 2134} 2135 2136static __inline void 2137pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva, 2138 vm_offset_t eva) 2139{ 2140 u_int idx, count; 2141 2142 idx = pte1_index(sva); 2143 count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t); 2144 bcopy(spte1p + idx, dpte1p + idx, count); 2145} 2146 2147static __inline void 2148pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva, 2149 vm_offset_t eva) 2150{ 2151 u_int idx, count; 2152 2153 idx = pt2tab_index(sva); 2154 count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t); 2155 bcopy(spte2p + idx, dpte2p + idx, count); 2156} 2157 2158/* 2159 * Initialize a preallocated and zeroed pmap structure, 2160 * such as one in a vmspace structure. 2161 */ 2162int 2163pmap_pinit(pmap_t pmap) 2164{ 2165 pt1_entry_t *pte1p; 2166 pt2_entry_t *pte2p; 2167 vm_paddr_t pa, pt2tab_pa; 2168 u_int i; 2169 2170 PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap, 2171 pmap->pm_pt1)); 2172 2173 /* 2174 * No need to allocate L2 page table space yet but we do need 2175 * a valid L1 page table and PT2TAB table. 2176 * 2177 * Install shared kernel mappings to these tables. It's a little 2178 * tricky as some parts of KVA are reserved for vectors, devices, 2179 * and whatever else. These parts are supposed to be above 2180 * vm_max_kernel_address. Thus two regions should be installed: 2181 * 2182 * (1) <KERNBASE, kernel_vm_end), 2183 * (2) <vm_max_kernel_address, 0xFFFFFFFF>. 2184 * 2185 * QQQ: The second region should be stable enough to be installed 2186 * only once in time when the tables are allocated. 2187 * QQQ: Maybe copy of both regions at once could be faster ... 2188 * QQQ: Maybe the other TTBR is an option. 2189 * 2190 * Finally, install own PT2TAB table to these tables. 2191 */ 2192 2193 if (pmap->pm_pt1 == NULL) { 2194 pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(kernel_arena, 2195 NB_IN_PT1, M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, 2196 pt_memattr); 2197 if (pmap->pm_pt1 == NULL) 2198 return (0); 2199 } 2200 if (pmap->pm_pt2tab == NULL) { 2201 /* 2202 * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page 2203 * only, what should be the only size for 32 bit systems, 2204 * then we could allocate it with vm_page_alloc() and all 2205 * the stuff needed as other L2 page table pages. 2206 * (2) Note that a process PT2TAB is special L2 page table 2207 * page. Its mapping in kernel_arena is permanent and can 2208 * be used no matter which process is current. Its mapping 2209 * in PT2MAP can be used only for current process. 2210 */ 2211 pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(kernel_arena, 2212 NB_IN_PT2TAB, M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr); 2213 if (pmap->pm_pt2tab == NULL) { 2214 /* 2215 * QQQ: As struct pmap is allocated from UMA with 2216 * UMA_ZONE_NOFREE flag, it's important to leave 2217 * no allocation in pmap if initialization failed. 2218 */ 2219 kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, 2220 NB_IN_PT1); 2221 pmap->pm_pt1 = NULL; 2222 return (0); 2223 } 2224 /* 2225 * QQQ: Each L2 page table page vm_page_t has pindex set to 2226 * pte1 index of virtual address mapped by this page. 2227 * It's not valid for non kernel PT2TABs themselves. 2228 * The pindex of these pages can not be altered because 2229 * of the way how they are allocated now. However, it 2230 * should not be a problem. 2231 */ 2232 } 2233 2234 mtx_lock_spin(&allpmaps_lock); 2235 /* 2236 * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(), 2237 * kernel_vm_end_new is used here instead of kernel_vm_end. 2238 */ 2239 pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE, 2240 kernel_vm_end_new - 1); 2241 pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address, 2242 0xFFFFFFFF); 2243 pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE, 2244 kernel_vm_end_new - 1); 2245 pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address, 2246 0xFFFFFFFF); 2247 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 2248 mtx_unlock_spin(&allpmaps_lock); 2249 2250 /* 2251 * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself. 2252 * I.e. self reference mapping. The PT2TAB is private, however mapped 2253 * into shared PT2MAP space, so the mapping should be not global. 2254 */ 2255 pt2tab_pa = vtophys(pmap->pm_pt2tab); 2256 pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP); 2257 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { 2258 pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); 2259 } 2260 2261 /* Insert PT2MAP PT2s into pmap PT1. */ 2262 pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP); 2263 for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { 2264 pte1_store(pte1p++, PTE1_LINK(pa)); 2265 } 2266 2267 /* 2268 * Now synchronize new mapping which was made above. 2269 */ 2270 pte1_sync_range(pmap->pm_pt1, NB_IN_PT1); 2271 pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB); 2272 2273 CPU_ZERO(&pmap->pm_active); 2274 TAILQ_INIT(&pmap->pm_pvchunk); 2275 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 2276 2277 return (1); 2278} 2279 2280#ifdef INVARIANTS 2281static boolean_t 2282pt2tab_user_is_empty(pt2_entry_t *tab) 2283{ 2284 u_int i, end; 2285 2286 end = pt2tab_index(VM_MAXUSER_ADDRESS); 2287 for (i = 0; i < end; i++) 2288 if (tab[i] != 0) return (FALSE); 2289 return (TRUE); 2290} 2291#endif 2292/* 2293 * Release any resources held by the given physical map. 2294 * Called when a pmap initialized by pmap_pinit is being released. 2295 * Should only be called if the map contains no valid mappings. 2296 */ 2297void 2298pmap_release(pmap_t pmap) 2299{ 2300#ifdef INVARIANTS 2301 vm_offset_t start, end; 2302#endif 2303 KASSERT(pmap->pm_stats.resident_count == 0, 2304 ("%s: pmap resident count %ld != 0", __func__, 2305 pmap->pm_stats.resident_count)); 2306 KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab), 2307 ("%s: has allocated user PT2(s)", __func__)); 2308 KASSERT(CPU_EMPTY(&pmap->pm_active), 2309 ("%s: pmap %p is active on some CPU(s)", __func__, pmap)); 2310 2311 mtx_lock_spin(&allpmaps_lock); 2312 LIST_REMOVE(pmap, pm_list); 2313 mtx_unlock_spin(&allpmaps_lock); 2314 2315#ifdef INVARIANTS 2316 start = pte1_index(KERNBASE) * sizeof(pt1_entry_t); 2317 end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t); 2318 bzero((char *)pmap->pm_pt1 + start, end - start); 2319 2320 start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t); 2321 end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t); 2322 bzero((char *)pmap->pm_pt2tab + start, end - start); 2323#endif 2324 /* 2325 * We are leaving PT1 and PT2TAB allocated on released pmap, 2326 * so hopefully UMA vmspace_zone will always be inited with 2327 * UMA_ZONE_NOFREE flag. 2328 */ 2329} 2330 2331/********************************************************* 2332 * 2333 * L2 table pages and their pages management routines. 2334 * 2335 *********************************************************/ 2336 2337/* 2338 * Virtual interface for L2 page table wire counting. 2339 * 2340 * Each L2 page table in a page has own counter which counts a number of 2341 * valid mappings in a table. Global page counter counts mappings in all 2342 * tables in a page plus a single itself mapping in PT2TAB. 2343 * 2344 * During a promotion we leave the associated L2 page table counter 2345 * untouched, so the table (strictly speaking a page which holds it) 2346 * is never freed if promoted. 2347 * 2348 * If a page m->wire_count == 1 then no valid mappings exist in any L2 page 2349 * table in the page and the page itself is only mapped in PT2TAB. 2350 */ 2351 2352static __inline void 2353pt2_wirecount_init(vm_page_t m) 2354{ 2355 u_int i; 2356 2357 /* 2358 * Note: A page m is allocated with VM_ALLOC_WIRED flag and 2359 * m->wire_count should be already set correctly. 2360 * So, there is no need to set it again herein. 2361 */ 2362 for (i = 0; i < NPT2_IN_PG; i++) 2363 m->md.pt2_wirecount[i] = 0; 2364} 2365 2366static __inline void 2367pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx) 2368{ 2369 2370 /* 2371 * Note: A just modificated pte2 (i.e. already allocated) 2372 * is acquiring one extra reference which must be 2373 * explicitly cleared. It influences the KASSERTs herein. 2374 * All L2 page tables in a page always belong to the same 2375 * pmap, so we allow only one extra reference for the page. 2376 */ 2377 KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1), 2378 ("%s: PT2 is overflowing ...", __func__)); 2379 KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), 2380 ("%s: PT2PG is overflowing ...", __func__)); 2381 2382 m->wire_count++; 2383 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++; 2384} 2385 2386static __inline void 2387pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx) 2388{ 2389 2390 KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0, 2391 ("%s: PT2 is underflowing ...", __func__)); 2392 KASSERT(m->wire_count > 1, 2393 ("%s: PT2PG is underflowing ...", __func__)); 2394 2395 m->wire_count--; 2396 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--; 2397} 2398 2399static __inline void 2400pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count) 2401{ 2402 2403 KASSERT(count <= NPTE2_IN_PT2, 2404 ("%s: invalid count %u", __func__, count)); 2405 KASSERT(m->wire_count > m->md.pt2_wirecount[pte1_idx & PT2PG_MASK], 2406 ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count, 2407 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK])); 2408 2409 m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]; 2410 m->wire_count += count; 2411 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count; 2412 2413 KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), 2414 ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count)); 2415} 2416 2417static __inline uint32_t 2418pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx) 2419{ 2420 2421 return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]); 2422} 2423 2424static __inline boolean_t 2425pt2_is_empty(vm_page_t m, vm_offset_t va) 2426{ 2427 2428 return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0); 2429} 2430 2431static __inline boolean_t 2432pt2_is_full(vm_page_t m, vm_offset_t va) 2433{ 2434 2435 return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 2436 NPTE2_IN_PT2); 2437} 2438 2439static __inline boolean_t 2440pt2pg_is_empty(vm_page_t m) 2441{ 2442 2443 return (m->wire_count == 1); 2444} 2445 2446/* 2447 * This routine is called if the L2 page table 2448 * is not mapped correctly. 2449 */ 2450static vm_page_t 2451_pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) 2452{ 2453 uint32_t pte1_idx; 2454 pt1_entry_t *pte1p; 2455 pt2_entry_t pte2; 2456 vm_page_t m; 2457 vm_paddr_t pt2pg_pa, pt2_pa; 2458 2459 pte1_idx = pte1_index(va); 2460 pte1p = pmap->pm_pt1 + pte1_idx; 2461 2462 KASSERT(pte1_load(pte1p) == 0, 2463 ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx, 2464 pte1_load(pte1p))); 2465 2466 pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va)); 2467 if (!pte2_is_valid(pte2)) { 2468 /* 2469 * Install new PT2s page into pmap PT2TAB. 2470 */ 2471 m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK, 2472 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2473 if (m == NULL) { 2474 if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 2475 PMAP_UNLOCK(pmap); 2476 rw_wunlock(&pvh_global_lock); 2477 VM_WAIT; 2478 rw_wlock(&pvh_global_lock); 2479 PMAP_LOCK(pmap); 2480 } 2481 2482 /* 2483 * Indicate the need to retry. While waiting, 2484 * the L2 page table page may have been allocated. 2485 */ 2486 return (NULL); 2487 } 2488 pmap->pm_stats.resident_count++; 2489 pt2pg_pa = pmap_pt2pg_init(pmap, va, m); 2490 } else { 2491 pt2pg_pa = pte2_pa(pte2); 2492 m = PHYS_TO_VM_PAGE(pt2pg_pa); 2493 } 2494 2495 pt2_wirecount_inc(m, pte1_idx); 2496 pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); 2497 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 2498 2499 return (m); 2500} 2501 2502static vm_page_t 2503pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) 2504{ 2505 u_int pte1_idx; 2506 pt1_entry_t *pte1p, pte1; 2507 vm_page_t m; 2508 2509 pte1_idx = pte1_index(va); 2510retry: 2511 pte1p = pmap->pm_pt1 + pte1_idx; 2512 pte1 = pte1_load(pte1p); 2513 2514 /* 2515 * This supports switching from a 1MB page to a 2516 * normal 4K page. 2517 */ 2518 if (pte1_is_section(pte1)) { 2519 (void)pmap_demote_pte1(pmap, pte1p, va); 2520 /* 2521 * Reload pte1 after demotion. 2522 * 2523 * Note: Demotion can even fail as either PT2 is not find for 2524 * the virtual address or PT2PG can not be allocated. 2525 */ 2526 pte1 = pte1_load(pte1p); 2527 } 2528 2529 /* 2530 * If the L2 page table page is mapped, we just increment the 2531 * hold count, and activate it. 2532 */ 2533 if (pte1_is_link(pte1)) { 2534 m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 2535 pt2_wirecount_inc(m, pte1_idx); 2536 } else { 2537 /* 2538 * Here if the PT2 isn't mapped, or if it has 2539 * been deallocated. 2540 */ 2541 m = _pmap_allocpte2(pmap, va, flags); 2542 if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) 2543 goto retry; 2544 } 2545 2546 return (m); 2547} 2548 2549static __inline void 2550pmap_free_zero_pages(struct spglist *free) 2551{ 2552 vm_page_t m; 2553 2554 while ((m = SLIST_FIRST(free)) != NULL) { 2555 SLIST_REMOVE_HEAD(free, plinks.s.ss); 2556 /* Preserve the page's PG_ZERO setting. */ 2557 vm_page_free_toq(m); 2558 } 2559} 2560 2561/* 2562 * Schedule the specified unused L2 page table page to be freed. Specifically, 2563 * add the page to the specified list of pages that will be released to the 2564 * physical memory manager after the TLB has been updated. 2565 */ 2566static __inline void 2567pmap_add_delayed_free_list(vm_page_t m, struct spglist *free) 2568{ 2569 2570 /* 2571 * Put page on a list so that it is released after 2572 * *ALL* TLB shootdown is done 2573 */ 2574#ifdef PMAP_DEBUG 2575 pmap_zero_page_check(m); 2576#endif 2577 m->flags |= PG_ZERO; 2578 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 2579} 2580 2581/* 2582 * Unwire L2 page tables page. 2583 */ 2584static void 2585pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m) 2586{ 2587 pt1_entry_t *pte1p, opte1 __unused; 2588 pt2_entry_t *pte2p; 2589 uint32_t i; 2590 2591 KASSERT(pt2pg_is_empty(m), 2592 ("%s: pmap %p PT2PG %p wired", __func__, pmap, m)); 2593 2594 /* 2595 * Unmap all L2 page tables in the page from L1 page table. 2596 * 2597 * QQQ: Individual L2 page tables (except the last one) can be unmapped 2598 * earlier. However, we are doing that this way. 2599 */ 2600 KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), 2601 ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m)); 2602 pte1p = pmap->pm_pt1 + m->pindex; 2603 for (i = 0; i < NPT2_IN_PG; i++, pte1p++) { 2604 KASSERT(m->md.pt2_wirecount[i] == 0, 2605 ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m)); 2606 opte1 = pte1_load(pte1p); 2607 if (pte1_is_link(opte1)) { 2608 pte1_clear(pte1p); 2609 /* 2610 * Flush intermediate TLB cache. 2611 */ 2612 pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT); 2613 } 2614#ifdef INVARIANTS 2615 else 2616 KASSERT((opte1 == 0) || pte1_is_section(opte1), 2617 ("%s: pmap %p va %#x bad pte1 %x at %u", __func__, 2618 pmap, va, opte1, i)); 2619#endif 2620 } 2621 2622 /* 2623 * Unmap the page from PT2TAB. 2624 */ 2625 pte2p = pmap_pt2tab_entry(pmap, va); 2626 (void)pt2tab_load_clear(pte2p); 2627 pmap_tlb_flush(pmap, pt2map_pt2pg(va)); 2628 2629 m->wire_count = 0; 2630 pmap->pm_stats.resident_count--; 2631 2632 /* 2633 * This is a release store so that the ordinary store unmapping 2634 * the L2 page table page is globally performed before TLB shoot- 2635 * down is begun. 2636 */ 2637 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 2638} 2639 2640/* 2641 * Decrements a L2 page table page's wire count, which is used to record the 2642 * number of valid page table entries within the page. If the wire count 2643 * drops to zero, then the page table page is unmapped. Returns TRUE if the 2644 * page table page was unmapped and FALSE otherwise. 2645 */ 2646static __inline boolean_t 2647pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 2648{ 2649 pt2_wirecount_dec(m, pte1_index(va)); 2650 if (pt2pg_is_empty(m)) { 2651 /* 2652 * QQQ: Wire count is zero, so whole page should be zero and 2653 * we can set PG_ZERO flag to it. 2654 * Note that when promotion is enabled, it takes some 2655 * more efforts. See pmap_unwire_pt2_all() below. 2656 */ 2657 pmap_unwire_pt2pg(pmap, va, m); 2658 pmap_add_delayed_free_list(m, free); 2659 return (TRUE); 2660 } else 2661 return (FALSE); 2662} 2663 2664/* 2665 * Drop a L2 page table page's wire count at once, which is used to record 2666 * the number of valid L2 page table entries within the page. If the wire 2667 * count drops to zero, then the L2 page table page is unmapped. 2668 */ 2669static __inline void 2670pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m, 2671 struct spglist *free) 2672{ 2673 u_int pte1_idx = pte1_index(va); 2674 2675 KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK), 2676 ("%s: PT2 page's pindex is wrong", __func__)); 2677 KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx), 2678 ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count, 2679 pt2_wirecount_get(m, pte1_idx))); 2680 2681 /* 2682 * It's possible that the L2 page table was never used. 2683 * It happened in case that a section was created without promotion. 2684 */ 2685 if (pt2_is_full(m, va)) { 2686 pt2_wirecount_set(m, pte1_idx, 0); 2687 2688 /* 2689 * QQQ: We clear L2 page table now, so when L2 page table page 2690 * is going to be freed, we can set it PG_ZERO flag ... 2691 * This function is called only on section mappings, so 2692 * hopefully it's not to big overload. 2693 * 2694 * XXX: If pmap is current, existing PT2MAP mapping could be 2695 * used for zeroing. 2696 */ 2697 pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2); 2698 } 2699#ifdef INVARIANTS 2700 else 2701 KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)", 2702 __func__, pt2_wirecount_get(m, pte1_idx))); 2703#endif 2704 if (pt2pg_is_empty(m)) { 2705 pmap_unwire_pt2pg(pmap, va, m); 2706 pmap_add_delayed_free_list(m, free); 2707 } 2708} 2709 2710/* 2711 * After removing a L2 page table entry, this routine is used to 2712 * conditionally free the page, and manage the hold/wire counts. 2713 */ 2714static boolean_t 2715pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free) 2716{ 2717 pt1_entry_t pte1; 2718 vm_page_t mpte; 2719 2720 if (va >= VM_MAXUSER_ADDRESS) 2721 return (FALSE); 2722 pte1 = pte1_load(pmap_pte1(pmap, va)); 2723 mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 2724 return (pmap_unwire_pt2(pmap, va, mpte, free)); 2725} 2726 2727/************************************* 2728 * 2729 * Page management routines. 2730 * 2731 *************************************/ 2732 2733CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 2734CTASSERT(_NPCM == 11); 2735CTASSERT(_NPCPV == 336); 2736 2737static __inline struct pv_chunk * 2738pv_to_chunk(pv_entry_t pv) 2739{ 2740 2741 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 2742} 2743 2744#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 2745 2746#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 2747#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 2748 2749static const uint32_t pc_freemask[_NPCM] = { 2750 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2751 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2752 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2753 PC_FREE0_9, PC_FREE10 2754}; 2755 2756SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 2757 "Current number of pv entries"); 2758 2759#ifdef PV_STATS 2760static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 2761 2762SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 2763 "Current number of pv entry chunks"); 2764SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 2765 "Current number of pv entry chunks allocated"); 2766SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 2767 "Current number of pv entry chunks frees"); 2768SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 2769 0, "Number of times tried to get a chunk page but failed."); 2770 2771static long pv_entry_frees, pv_entry_allocs; 2772static int pv_entry_spare; 2773 2774SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 2775 "Current number of pv entry frees"); 2776SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 2777 0, "Current number of pv entry allocs"); 2778SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 2779 "Current number of spare pv entries"); 2780#endif 2781 2782/* 2783 * Is given page managed? 2784 */ 2785static __inline bool 2786is_managed(vm_paddr_t pa) 2787{ 2788 vm_page_t m; 2789 2790 m = PHYS_TO_VM_PAGE(pa); 2791 if (m == NULL) 2792 return (false); 2793 return ((m->oflags & VPO_UNMANAGED) == 0); 2794} 2795 2796static __inline bool 2797pte1_is_managed(pt1_entry_t pte1) 2798{ 2799 2800 return (is_managed(pte1_pa(pte1))); 2801} 2802 2803static __inline bool 2804pte2_is_managed(pt2_entry_t pte2) 2805{ 2806 2807 return (is_managed(pte2_pa(pte2))); 2808} 2809 2810/* 2811 * We are in a serious low memory condition. Resort to 2812 * drastic measures to free some pages so we can allocate 2813 * another pv entry chunk. 2814 */ 2815static vm_page_t 2816pmap_pv_reclaim(pmap_t locked_pmap) 2817{ 2818 struct pch newtail; 2819 struct pv_chunk *pc; 2820 struct md_page *pvh; 2821 pt1_entry_t *pte1p; 2822 pmap_t pmap; 2823 pt2_entry_t *pte2p, tpte2; 2824 pv_entry_t pv; 2825 vm_offset_t va; 2826 vm_page_t m, m_pc; 2827 struct spglist free; 2828 uint32_t inuse; 2829 int bit, field, freed; 2830 2831 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 2832 pmap = NULL; 2833 m_pc = NULL; 2834 SLIST_INIT(&free); 2835 TAILQ_INIT(&newtail); 2836 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 2837 SLIST_EMPTY(&free))) { 2838 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2839 if (pmap != pc->pc_pmap) { 2840 if (pmap != NULL) { 2841 if (pmap != locked_pmap) 2842 PMAP_UNLOCK(pmap); 2843 } 2844 pmap = pc->pc_pmap; 2845 /* Avoid deadlock and lock recursion. */ 2846 if (pmap > locked_pmap) 2847 PMAP_LOCK(pmap); 2848 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 2849 pmap = NULL; 2850 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2851 continue; 2852 } 2853 } 2854 2855 /* 2856 * Destroy every non-wired, 4 KB page mapping in the chunk. 2857 */ 2858 freed = 0; 2859 for (field = 0; field < _NPCM; field++) { 2860 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 2861 inuse != 0; inuse &= ~(1UL << bit)) { 2862 bit = ffs(inuse) - 1; 2863 pv = &pc->pc_pventry[field * 32 + bit]; 2864 va = pv->pv_va; 2865 pte1p = pmap_pte1(pmap, va); 2866 if (pte1_is_section(pte1_load(pte1p))) 2867 continue; 2868 pte2p = pmap_pte2(pmap, va); 2869 tpte2 = pte2_load(pte2p); 2870 if ((tpte2 & PTE2_W) == 0) 2871 tpte2 = pte2_load_clear(pte2p); 2872 pmap_pte2_release(pte2p); 2873 if ((tpte2 & PTE2_W) != 0) 2874 continue; 2875 KASSERT(tpte2 != 0, 2876 ("pmap_pv_reclaim: pmap %p va %#x zero pte", 2877 pmap, va)); 2878 pmap_tlb_flush(pmap, va); 2879 m = PHYS_TO_VM_PAGE(pte2_pa(tpte2)); 2880 if (pte2_is_dirty(tpte2)) 2881 vm_page_dirty(m); 2882 if ((tpte2 & PTE2_A) != 0) 2883 vm_page_aflag_set(m, PGA_REFERENCED); 2884 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2885 if (TAILQ_EMPTY(&m->md.pv_list) && 2886 (m->flags & PG_FICTITIOUS) == 0) { 2887 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2888 if (TAILQ_EMPTY(&pvh->pv_list)) { 2889 vm_page_aflag_clear(m, 2890 PGA_WRITEABLE); 2891 } 2892 } 2893 pc->pc_map[field] |= 1UL << bit; 2894 pmap_unuse_pt2(pmap, va, &free); 2895 freed++; 2896 } 2897 } 2898 if (freed == 0) { 2899 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2900 continue; 2901 } 2902 /* Every freed mapping is for a 4 KB page. */ 2903 pmap->pm_stats.resident_count -= freed; 2904 PV_STAT(pv_entry_frees += freed); 2905 PV_STAT(pv_entry_spare += freed); 2906 pv_entry_count -= freed; 2907 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2908 for (field = 0; field < _NPCM; field++) 2909 if (pc->pc_map[field] != pc_freemask[field]) { 2910 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2911 pc_list); 2912 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2913 2914 /* 2915 * One freed pv entry in locked_pmap is 2916 * sufficient. 2917 */ 2918 if (pmap == locked_pmap) 2919 goto out; 2920 break; 2921 } 2922 if (field == _NPCM) { 2923 PV_STAT(pv_entry_spare -= _NPCPV); 2924 PV_STAT(pc_chunk_count--); 2925 PV_STAT(pc_chunk_frees++); 2926 /* Entire chunk is free; return it. */ 2927 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2928 pmap_qremove((vm_offset_t)pc, 1); 2929 pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); 2930 break; 2931 } 2932 } 2933out: 2934 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 2935 if (pmap != NULL) { 2936 if (pmap != locked_pmap) 2937 PMAP_UNLOCK(pmap); 2938 } 2939 if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { 2940 m_pc = SLIST_FIRST(&free); 2941 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2942 /* Recycle a freed page table page. */ 2943 m_pc->wire_count = 1; 2944 atomic_add_int(&vm_cnt.v_wire_count, 1); 2945 } 2946 pmap_free_zero_pages(&free); 2947 return (m_pc); 2948} 2949 2950static void 2951free_pv_chunk(struct pv_chunk *pc) 2952{ 2953 vm_page_t m; 2954 2955 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2956 PV_STAT(pv_entry_spare -= _NPCPV); 2957 PV_STAT(pc_chunk_count--); 2958 PV_STAT(pc_chunk_frees++); 2959 /* entire chunk is free, return it */ 2960 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2961 pmap_qremove((vm_offset_t)pc, 1); 2962 vm_page_unwire(m, PQ_NONE); 2963 vm_page_free(m); 2964 pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); 2965} 2966 2967/* 2968 * Free the pv_entry back to the free list. 2969 */ 2970static void 2971free_pv_entry(pmap_t pmap, pv_entry_t pv) 2972{ 2973 struct pv_chunk *pc; 2974 int idx, field, bit; 2975 2976 rw_assert(&pvh_global_lock, RA_WLOCKED); 2977 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2978 PV_STAT(pv_entry_frees++); 2979 PV_STAT(pv_entry_spare++); 2980 pv_entry_count--; 2981 pc = pv_to_chunk(pv); 2982 idx = pv - &pc->pc_pventry[0]; 2983 field = idx / 32; 2984 bit = idx % 32; 2985 pc->pc_map[field] |= 1ul << bit; 2986 for (idx = 0; idx < _NPCM; idx++) 2987 if (pc->pc_map[idx] != pc_freemask[idx]) { 2988 /* 2989 * 98% of the time, pc is already at the head of the 2990 * list. If it isn't already, move it to the head. 2991 */ 2992 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 2993 pc)) { 2994 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2995 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2996 pc_list); 2997 } 2998 return; 2999 } 3000 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3001 free_pv_chunk(pc); 3002} 3003 3004/* 3005 * Get a new pv_entry, allocating a block from the system 3006 * when needed. 3007 */ 3008static pv_entry_t 3009get_pv_entry(pmap_t pmap, boolean_t try) 3010{ 3011 static const struct timeval printinterval = { 60, 0 }; 3012 static struct timeval lastprint; 3013 int bit, field; 3014 pv_entry_t pv; 3015 struct pv_chunk *pc; 3016 vm_page_t m; 3017 3018 rw_assert(&pvh_global_lock, RA_WLOCKED); 3019 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3020 PV_STAT(pv_entry_allocs++); 3021 pv_entry_count++; 3022 if (pv_entry_count > pv_entry_high_water) 3023 if (ratecheck(&lastprint, &printinterval)) 3024 printf("Approaching the limit on PV entries, consider " 3025 "increasing either the vm.pmap.shpgperproc or the " 3026 "vm.pmap.pv_entry_max tunable.\n"); 3027retry: 3028 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 3029 if (pc != NULL) { 3030 for (field = 0; field < _NPCM; field++) { 3031 if (pc->pc_map[field]) { 3032 bit = ffs(pc->pc_map[field]) - 1; 3033 break; 3034 } 3035 } 3036 if (field < _NPCM) { 3037 pv = &pc->pc_pventry[field * 32 + bit]; 3038 pc->pc_map[field] &= ~(1ul << bit); 3039 /* If this was the last item, move it to tail */ 3040 for (field = 0; field < _NPCM; field++) 3041 if (pc->pc_map[field] != 0) { 3042 PV_STAT(pv_entry_spare--); 3043 return (pv); /* not full, return */ 3044 } 3045 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3046 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 3047 PV_STAT(pv_entry_spare--); 3048 return (pv); 3049 } 3050 } 3051 /* 3052 * Access to the pte2list "pv_vafree" is synchronized by the pvh 3053 * global lock. If "pv_vafree" is currently non-empty, it will 3054 * remain non-empty until pmap_pte2list_alloc() completes. 3055 */ 3056 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3057 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 3058 if (try) { 3059 pv_entry_count--; 3060 PV_STAT(pc_chunk_tryfail++); 3061 return (NULL); 3062 } 3063 m = pmap_pv_reclaim(pmap); 3064 if (m == NULL) 3065 goto retry; 3066 } 3067 PV_STAT(pc_chunk_count++); 3068 PV_STAT(pc_chunk_allocs++); 3069 pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree); 3070 pmap_qenter((vm_offset_t)pc, &m, 1); 3071 pc->pc_pmap = pmap; 3072 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 3073 for (field = 1; field < _NPCM; field++) 3074 pc->pc_map[field] = pc_freemask[field]; 3075 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 3076 pv = &pc->pc_pventry[0]; 3077 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 3078 PV_STAT(pv_entry_spare += _NPCPV - 1); 3079 return (pv); 3080} 3081 3082/* 3083 * Create a pv entry for page at pa for 3084 * (pmap, va). 3085 */ 3086static void 3087pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 3088{ 3089 pv_entry_t pv; 3090 3091 rw_assert(&pvh_global_lock, RA_WLOCKED); 3092 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3093 pv = get_pv_entry(pmap, FALSE); 3094 pv->pv_va = va; 3095 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3096} 3097 3098static __inline pv_entry_t 3099pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3100{ 3101 pv_entry_t pv; 3102 3103 rw_assert(&pvh_global_lock, RA_WLOCKED); 3104 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3105 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 3106 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 3107 break; 3108 } 3109 } 3110 return (pv); 3111} 3112 3113static void 3114pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3115{ 3116 pv_entry_t pv; 3117 3118 pv = pmap_pvh_remove(pvh, pmap, va); 3119 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 3120 free_pv_entry(pmap, pv); 3121} 3122 3123static void 3124pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 3125{ 3126 struct md_page *pvh; 3127 3128 rw_assert(&pvh_global_lock, RA_WLOCKED); 3129 pmap_pvh_free(&m->md, pmap, va); 3130 if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { 3131 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3132 if (TAILQ_EMPTY(&pvh->pv_list)) 3133 vm_page_aflag_clear(m, PGA_WRITEABLE); 3134 } 3135} 3136 3137static void 3138pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3139{ 3140 struct md_page *pvh; 3141 pv_entry_t pv; 3142 vm_offset_t va_last; 3143 vm_page_t m; 3144 3145 rw_assert(&pvh_global_lock, RA_WLOCKED); 3146 KASSERT((pa & PTE1_OFFSET) == 0, 3147 ("pmap_pv_demote_pte1: pa is not 1mpage aligned")); 3148 3149 /* 3150 * Transfer the 1mpage's pv entry for this mapping to the first 3151 * page's pv list. 3152 */ 3153 pvh = pa_to_pvh(pa); 3154 va = pte1_trunc(va); 3155 pv = pmap_pvh_remove(pvh, pmap, va); 3156 KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found")); 3157 m = PHYS_TO_VM_PAGE(pa); 3158 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3159 /* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */ 3160 va_last = va + PTE1_SIZE - PAGE_SIZE; 3161 do { 3162 m++; 3163 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3164 ("pmap_pv_demote_pte1: page %p is not managed", m)); 3165 va += PAGE_SIZE; 3166 pmap_insert_entry(pmap, va, m); 3167 } while (va < va_last); 3168} 3169 3170static void 3171pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3172{ 3173 struct md_page *pvh; 3174 pv_entry_t pv; 3175 vm_offset_t va_last; 3176 vm_page_t m; 3177 3178 rw_assert(&pvh_global_lock, RA_WLOCKED); 3179 KASSERT((pa & PTE1_OFFSET) == 0, 3180 ("pmap_pv_promote_pte1: pa is not 1mpage aligned")); 3181 3182 /* 3183 * Transfer the first page's pv entry for this mapping to the 3184 * 1mpage's pv list. Aside from avoiding the cost of a call 3185 * to get_pv_entry(), a transfer avoids the possibility that 3186 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() 3187 * removes one of the mappings that is being promoted. 3188 */ 3189 m = PHYS_TO_VM_PAGE(pa); 3190 va = pte1_trunc(va); 3191 pv = pmap_pvh_remove(&m->md, pmap, va); 3192 KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found")); 3193 pvh = pa_to_pvh(pa); 3194 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 3195 /* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */ 3196 va_last = va + PTE1_SIZE - PAGE_SIZE; 3197 do { 3198 m++; 3199 va += PAGE_SIZE; 3200 pmap_pvh_free(&m->md, pmap, va); 3201 } while (va < va_last); 3202} 3203 3204/* 3205 * Conditionally create a pv entry. 3206 */ 3207static boolean_t 3208pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 3209{ 3210 pv_entry_t pv; 3211 3212 rw_assert(&pvh_global_lock, RA_WLOCKED); 3213 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3214 if (pv_entry_count < pv_entry_high_water && 3215 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 3216 pv->pv_va = va; 3217 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3218 return (TRUE); 3219 } else 3220 return (FALSE); 3221} 3222 3223/* 3224 * Create the pv entries for each of the pages within a section. 3225 */ 3226static boolean_t 3227pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3228{ 3229 struct md_page *pvh; 3230 pv_entry_t pv; 3231 3232 rw_assert(&pvh_global_lock, RA_WLOCKED); 3233 if (pv_entry_count < pv_entry_high_water && 3234 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 3235 pv->pv_va = va; 3236 pvh = pa_to_pvh(pa); 3237 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 3238 return (TRUE); 3239 } else 3240 return (FALSE); 3241} 3242 3243static inline void 3244pmap_tlb_flush_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t npte1) 3245{ 3246 3247 /* Kill all the small mappings or the big one only. */ 3248 if (pte1_is_section(npte1)) 3249 pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE); 3250 else 3251 pmap_tlb_flush(pmap, pte1_trunc(va)); 3252} 3253 3254/* 3255 * Update kernel pte1 on all pmaps. 3256 * 3257 * The following function is called only on one cpu with disabled interrupts. 3258 * In SMP case, smp_rendezvous_cpus() is used to stop other cpus. This way 3259 * nobody can invoke explicit hardware table walk during the update of pte1. 3260 * Unsolicited hardware table walk can still happen, invoked by speculative 3261 * data or instruction prefetch or even by speculative hardware table walk. 3262 * 3263 * The break-before-make approach should be implemented here. However, it's 3264 * not so easy to do that for kernel mappings as it would be unhappy to unmap 3265 * itself unexpectedly but voluntarily. 3266 */ 3267static void 3268pmap_update_pte1_kernel(vm_offset_t va, pt1_entry_t npte1) 3269{ 3270 pmap_t pmap; 3271 pt1_entry_t *pte1p; 3272 3273 /* 3274 * Get current pmap. Interrupts should be disabled here 3275 * so PCPU_GET() is done atomically. 3276 */ 3277 pmap = PCPU_GET(curpmap); 3278 if (pmap == NULL) 3279 pmap = kernel_pmap; 3280 3281 /* 3282 * (1) Change pte1 on current pmap. 3283 * (2) Flush all obsolete TLB entries on current CPU. 3284 * (3) Change pte1 on all pmaps. 3285 * (4) Flush all obsolete TLB entries on all CPUs in SMP case. 3286 */ 3287 3288 pte1p = pmap_pte1(pmap, va); 3289 pte1_store(pte1p, npte1); 3290 3291 /* Kill all the small mappings or the big one only. */ 3292 if (pte1_is_section(npte1)) { 3293 pmap_pte1_kern_promotions++; 3294 tlb_flush_range_local(pte1_trunc(va), PTE1_SIZE); 3295 } else { 3296 pmap_pte1_kern_demotions++; 3297 tlb_flush_local(pte1_trunc(va)); 3298 } 3299 3300 /* 3301 * In SMP case, this function is called when all cpus are at smp 3302 * rendezvous, so there is no need to use 'allpmaps_lock' lock here. 3303 * In UP case, the function is called with this lock locked. 3304 */ 3305 LIST_FOREACH(pmap, &allpmaps, pm_list) { 3306 pte1p = pmap_pte1(pmap, va); 3307 pte1_store(pte1p, npte1); 3308 } 3309 3310#ifdef SMP 3311 /* Kill all the small mappings or the big one only. */ 3312 if (pte1_is_section(npte1)) 3313 tlb_flush_range(pte1_trunc(va), PTE1_SIZE); 3314 else 3315 tlb_flush(pte1_trunc(va)); 3316#endif 3317} 3318 3319#ifdef SMP 3320struct pte1_action { 3321 vm_offset_t va; 3322 pt1_entry_t npte1; 3323 u_int update; /* CPU that updates the PTE1 */ 3324}; 3325 3326static void 3327pmap_update_pte1_action(void *arg) 3328{ 3329 struct pte1_action *act = arg; 3330 3331 if (act->update == PCPU_GET(cpuid)) 3332 pmap_update_pte1_kernel(act->va, act->npte1); 3333} 3334 3335/* 3336 * Change pte1 on current pmap. 3337 * Note that kernel pte1 must be changed on all pmaps. 3338 * 3339 * According to the architecture reference manual published by ARM, 3340 * the behaviour is UNPREDICTABLE when two or more TLB entries map the same VA. 3341 * According to this manual, UNPREDICTABLE behaviours must never happen in 3342 * a viable system. In contrast, on x86 processors, it is not specified which 3343 * TLB entry mapping the virtual address will be used, but the MMU doesn't 3344 * generate a bogus translation the way it does on Cortex-A8 rev 2 (Beaglebone 3345 * Black). 3346 * 3347 * It's a problem when either promotion or demotion is being done. The pte1 3348 * update and appropriate TLB flush must be done atomically in general. 3349 */ 3350static void 3351pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, 3352 pt1_entry_t npte1) 3353{ 3354 3355 if (pmap == kernel_pmap) { 3356 struct pte1_action act; 3357 3358 sched_pin(); 3359 act.va = va; 3360 act.npte1 = npte1; 3361 act.update = PCPU_GET(cpuid); 3362 smp_rendezvous_cpus(all_cpus, smp_no_rendevous_barrier, 3363 pmap_update_pte1_action, NULL, &act); 3364 sched_unpin(); 3365 } else { 3366 register_t cspr; 3367 3368 /* 3369 * Use break-before-make approach for changing userland 3370 * mappings. It can cause L1 translation aborts on other 3371 * cores in SMP case. So, special treatment is implemented 3372 * in pmap_fault(). To reduce the likelihood that another core 3373 * will be affected by the broken mapping, disable interrupts 3374 * until the mapping change is completed. 3375 */ 3376 cspr = disable_interrupts(PSR_I | PSR_F); 3377 pte1_clear(pte1p); 3378 pmap_tlb_flush_pte1(pmap, va, npte1); 3379 pte1_store(pte1p, npte1); 3380 restore_interrupts(cspr); 3381 } 3382} 3383#else 3384static void 3385pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, 3386 pt1_entry_t npte1) 3387{ 3388 3389 if (pmap == kernel_pmap) { 3390 mtx_lock_spin(&allpmaps_lock); 3391 pmap_update_pte1_kernel(va, npte1); 3392 mtx_unlock_spin(&allpmaps_lock); 3393 } else { 3394 register_t cspr; 3395 3396 /* 3397 * Use break-before-make approach for changing userland 3398 * mappings. It's absolutely safe in UP case when interrupts 3399 * are disabled. 3400 */ 3401 cspr = disable_interrupts(PSR_I | PSR_F); 3402 pte1_clear(pte1p); 3403 pmap_tlb_flush_pte1(pmap, va, npte1); 3404 pte1_store(pte1p, npte1); 3405 restore_interrupts(cspr); 3406 } 3407} 3408#endif 3409 3410/* 3411 * Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are 3412 * within a single page table page (PT2) to a single 1MB page mapping. 3413 * For promotion to occur, two conditions must be met: (1) the 4KB page 3414 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3415 * mappings must have identical characteristics. 3416 * 3417 * Managed (PG_MANAGED) mappings within the kernel address space are not 3418 * promoted. The reason is that kernel PTE1s are replicated in each pmap but 3419 * pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only 3420 * read the PTE1 from the kernel pmap. 3421 */ 3422static void 3423pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3424{ 3425 pt1_entry_t npte1; 3426 pt2_entry_t *fpte2p, fpte2, fpte2_fav; 3427 pt2_entry_t *pte2p, pte2; 3428 vm_offset_t pteva __unused; 3429 vm_page_t m __unused; 3430 3431 PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, 3432 pmap, va, pte1_load(pte1p), pte1p)); 3433 3434 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3435 3436 /* 3437 * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is 3438 * either invalid, unused, or does not map the first 4KB physical page 3439 * within a 1MB page. 3440 */ 3441 fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va)); 3442 fpte2 = pte2_load(fpte2p); 3443 if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) != 3444 (PTE2_A | PTE2_V)) { 3445 pmap_pte1_p_failures++; 3446 CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p", 3447 __func__, va, pmap); 3448 return; 3449 } 3450 if (pte2_is_managed(fpte2) && pmap == kernel_pmap) { 3451 pmap_pte1_p_failures++; 3452 CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p", 3453 __func__, va, pmap); 3454 return; 3455 } 3456 if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { 3457 /* 3458 * When page is not modified, PTE2_RO can be set without 3459 * a TLB invalidation. 3460 */ 3461 fpte2 |= PTE2_RO; 3462 pte2_store(fpte2p, fpte2); 3463 } 3464 3465 /* 3466 * Examine each of the other PTE2s in the specified PT2. Abort if this 3467 * PTE2 maps an unexpected 4KB physical page or does not have identical 3468 * characteristics to the first PTE2. 3469 */ 3470 fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V)); 3471 fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */ 3472 for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) { 3473 pte2 = pte2_load(pte2p); 3474 if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) { 3475 pmap_pte1_p_failures++; 3476 CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p", 3477 __func__, va, pmap); 3478 return; 3479 } 3480 if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { 3481 /* 3482 * When page is not modified, PTE2_RO can be set 3483 * without a TLB invalidation. See note above. 3484 */ 3485 pte2 |= PTE2_RO; 3486 pte2_store(pte2p, pte2); 3487 pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET & 3488 PTE2_FRAME); 3489 CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p", 3490 __func__, pteva, pmap); 3491 } 3492 if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) { 3493 pmap_pte1_p_failures++; 3494 CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p", 3495 __func__, va, pmap); 3496 return; 3497 } 3498 3499 fpte2_fav -= PTE2_SIZE; 3500 } 3501 /* 3502 * The page table page in its current state will stay in PT2TAB 3503 * until the PTE1 mapping the section is demoted by pmap_demote_pte1() 3504 * or destroyed by pmap_remove_pte1(). 3505 * 3506 * Note that L2 page table size is not equal to PAGE_SIZE. 3507 */ 3508 m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p)))); 3509 KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], 3510 ("%s: PT2 page is out of range", __func__)); 3511 KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), 3512 ("%s: PT2 page's pindex is wrong", __func__)); 3513 3514 /* 3515 * Get pte1 from pte2 format. 3516 */ 3517 npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V; 3518 3519 /* 3520 * Promote the pv entries. 3521 */ 3522 if (pte2_is_managed(fpte2)) 3523 pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1)); 3524 3525 /* 3526 * Promote the mappings. 3527 */ 3528 pmap_change_pte1(pmap, pte1p, va, npte1); 3529 3530 pmap_pte1_promotions++; 3531 CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", 3532 __func__, va, pmap); 3533 3534 PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", 3535 __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); 3536} 3537 3538/* 3539 * Zero L2 page table page. 3540 */ 3541static __inline void 3542pmap_clear_pt2(pt2_entry_t *fpte2p) 3543{ 3544 pt2_entry_t *pte2p; 3545 3546 for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) 3547 pte2_clear(pte2p); 3548 3549} 3550 3551/* 3552 * Removes a 1MB page mapping from the kernel pmap. 3553 */ 3554static void 3555pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3556{ 3557 vm_page_t m; 3558 uint32_t pte1_idx; 3559 pt2_entry_t *fpte2p; 3560 vm_paddr_t pt2_pa; 3561 3562 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3563 m = pmap_pt2_page(pmap, va); 3564 if (m == NULL) 3565 /* 3566 * QQQ: Is this function called only on promoted pte1? 3567 * We certainly do section mappings directly 3568 * (without promotion) in kernel !!! 3569 */ 3570 panic("%s: missing pt2 page", __func__); 3571 3572 pte1_idx = pte1_index(va); 3573 3574 /* 3575 * Initialize the L2 page table. 3576 */ 3577 fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); 3578 pmap_clear_pt2(fpte2p); 3579 3580 /* 3581 * Remove the mapping. 3582 */ 3583 pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx); 3584 pmap_kenter_pte1(va, PTE1_LINK(pt2_pa)); 3585 3586 /* 3587 * QQQ: We do not need to invalidate PT2MAP mapping 3588 * as we did not change it. I.e. the L2 page table page 3589 * was and still is mapped the same way. 3590 */ 3591} 3592 3593/* 3594 * Do the things to unmap a section in a process 3595 */ 3596static void 3597pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, 3598 struct spglist *free) 3599{ 3600 pt1_entry_t opte1; 3601 struct md_page *pvh; 3602 vm_offset_t eva, va; 3603 vm_page_t m; 3604 3605 PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva, 3606 pte1_load(pte1p), pte1p)); 3607 3608 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3609 KASSERT((sva & PTE1_OFFSET) == 0, 3610 ("%s: sva is not 1mpage aligned", __func__)); 3611 3612 /* 3613 * Clear and invalidate the mapping. It should occupy one and only TLB 3614 * entry. So, pmap_tlb_flush() called with aligned address should be 3615 * sufficient. 3616 */ 3617 opte1 = pte1_load_clear(pte1p); 3618 pmap_tlb_flush(pmap, sva); 3619 3620 if (pte1_is_wired(opte1)) 3621 pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; 3622 pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; 3623 if (pte1_is_managed(opte1)) { 3624 pvh = pa_to_pvh(pte1_pa(opte1)); 3625 pmap_pvh_free(pvh, pmap, sva); 3626 eva = sva + PTE1_SIZE; 3627 for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); 3628 va < eva; va += PAGE_SIZE, m++) { 3629 if (pte1_is_dirty(opte1)) 3630 vm_page_dirty(m); 3631 if (opte1 & PTE1_A) 3632 vm_page_aflag_set(m, PGA_REFERENCED); 3633 if (TAILQ_EMPTY(&m->md.pv_list) && 3634 TAILQ_EMPTY(&pvh->pv_list)) 3635 vm_page_aflag_clear(m, PGA_WRITEABLE); 3636 } 3637 } 3638 if (pmap == kernel_pmap) { 3639 /* 3640 * L2 page table(s) can't be removed from kernel map as 3641 * kernel counts on it (stuff around pmap_growkernel()). 3642 */ 3643 pmap_remove_kernel_pte1(pmap, pte1p, sva); 3644 } else { 3645 /* 3646 * Get associated L2 page table page. 3647 * It's possible that the page was never allocated. 3648 */ 3649 m = pmap_pt2_page(pmap, sva); 3650 if (m != NULL) 3651 pmap_unwire_pt2_all(pmap, sva, m, free); 3652 } 3653} 3654 3655/* 3656 * Fills L2 page table page with mappings to consecutive physical pages. 3657 */ 3658static __inline void 3659pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2) 3660{ 3661 pt2_entry_t *pte2p; 3662 3663 for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) { 3664 pte2_store(pte2p, npte2); 3665 npte2 += PTE2_SIZE; 3666 } 3667} 3668 3669/* 3670 * Tries to demote a 1MB page mapping. If demotion fails, the 3671 * 1MB page mapping is invalidated. 3672 */ 3673static boolean_t 3674pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3675{ 3676 pt1_entry_t opte1, npte1; 3677 pt2_entry_t *fpte2p, npte2; 3678 vm_paddr_t pt2pg_pa, pt2_pa; 3679 vm_page_t m; 3680 struct spglist free; 3681 uint32_t pte1_idx, isnew = 0; 3682 3683 PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, 3684 pmap, va, pte1_load(pte1p), pte1p)); 3685 3686 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3687 3688 opte1 = pte1_load(pte1p); 3689 KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__)); 3690 3691 if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) { 3692 KASSERT(!pte1_is_wired(opte1), 3693 ("%s: PT2 page for a wired mapping is missing", __func__)); 3694 3695 /* 3696 * Invalidate the 1MB page mapping and return 3697 * "failure" if the mapping was never accessed or the 3698 * allocation of the new page table page fails. 3699 */ 3700 if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL, 3701 pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ | 3702 VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { 3703 SLIST_INIT(&free); 3704 pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free); 3705 pmap_free_zero_pages(&free); 3706 CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p", 3707 __func__, va, pmap); 3708 return (FALSE); 3709 } 3710 if (va < VM_MAXUSER_ADDRESS) 3711 pmap->pm_stats.resident_count++; 3712 3713 isnew = 1; 3714 3715 /* 3716 * We init all L2 page tables in the page even if 3717 * we are going to change everything for one L2 page 3718 * table in a while. 3719 */ 3720 pt2pg_pa = pmap_pt2pg_init(pmap, va, m); 3721 } else { 3722 if (va < VM_MAXUSER_ADDRESS) { 3723 if (pt2_is_empty(m, va)) 3724 isnew = 1; /* Demoting section w/o promotion. */ 3725#ifdef INVARIANTS 3726 else 3727 KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire" 3728 " count %u", __func__, 3729 pt2_wirecount_get(m, pte1_index(va)))); 3730#endif 3731 } 3732 } 3733 3734 pt2pg_pa = VM_PAGE_TO_PHYS(m); 3735 pte1_idx = pte1_index(va); 3736 /* 3737 * If the pmap is current, then the PT2MAP can provide access to 3738 * the page table page (promoted L2 page tables are not unmapped). 3739 * Otherwise, temporarily map the L2 page table page (m) into 3740 * the kernel's address space at either PADDR1 or PADDR2. 3741 * 3742 * Note that L2 page table size is not equal to PAGE_SIZE. 3743 */ 3744 if (pmap_is_current(pmap)) 3745 fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); 3746 else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { 3747 if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { 3748 pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); 3749#ifdef SMP 3750 PMAP1cpu = PCPU_GET(cpuid); 3751#endif 3752 tlb_flush_local((vm_offset_t)PADDR1); 3753 PMAP1changed++; 3754 } else 3755#ifdef SMP 3756 if (PMAP1cpu != PCPU_GET(cpuid)) { 3757 PMAP1cpu = PCPU_GET(cpuid); 3758 tlb_flush_local((vm_offset_t)PADDR1); 3759 PMAP1changedcpu++; 3760 } else 3761#endif 3762 PMAP1unchanged++; 3763 fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx); 3764 } else { 3765 mtx_lock(&PMAP2mutex); 3766 if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { 3767 pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); 3768 tlb_flush((vm_offset_t)PADDR2); 3769 } 3770 fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx); 3771 } 3772 pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); 3773 npte1 = PTE1_LINK(pt2_pa); 3774 3775 KASSERT((opte1 & PTE1_A) != 0, 3776 ("%s: opte1 is missing PTE1_A", __func__)); 3777 KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM, 3778 ("%s: opte1 has PTE1_NM", __func__)); 3779 3780 /* 3781 * Get pte2 from pte1 format. 3782 */ 3783 npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V; 3784 3785 /* 3786 * If the L2 page table page is new, initialize it. If the mapping 3787 * has changed attributes, update the page table entries. 3788 */ 3789 if (isnew != 0) { 3790 pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2); 3791 pmap_fill_pt2(fpte2p, npte2); 3792 } else if ((pte2_load(fpte2p) & PTE2_PROMOTE) != 3793 (npte2 & PTE2_PROMOTE)) 3794 pmap_fill_pt2(fpte2p, npte2); 3795 3796 KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2), 3797 ("%s: fpte2p and npte2 map different physical addresses", 3798 __func__)); 3799 3800 if (fpte2p == PADDR2) 3801 mtx_unlock(&PMAP2mutex); 3802 3803 /* 3804 * Demote the mapping. This pmap is locked. The old PTE1 has 3805 * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also 3806 * has not PTE1_NM set. Thus, there is no danger of a race with 3807 * another processor changing the setting of PTE1_A and/or PTE1_NM 3808 * between the read above and the store below. 3809 */ 3810 pmap_change_pte1(pmap, pte1p, va, npte1); 3811 3812 /* 3813 * Demote the pv entry. This depends on the earlier demotion 3814 * of the mapping. Specifically, the (re)creation of a per- 3815 * page pv entry might trigger the execution of pmap_pv_reclaim(), 3816 * which might reclaim a newly (re)created per-page pv entry 3817 * and destroy the associated mapping. In order to destroy 3818 * the mapping, the PTE1 must have already changed from mapping 3819 * the 1mpage to referencing the page table page. 3820 */ 3821 if (pte1_is_managed(opte1)) 3822 pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1)); 3823 3824 pmap_pte1_demotions++; 3825 CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", 3826 __func__, va, pmap); 3827 3828 PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", 3829 __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); 3830 return (TRUE); 3831} 3832 3833/* 3834 * Insert the given physical page (p) at 3835 * the specified virtual address (v) in the 3836 * target physical map with the protection requested. 3837 * 3838 * If specified, the page will be wired down, meaning 3839 * that the related pte can not be reclaimed. 3840 * 3841 * NB: This is the only routine which MAY NOT lazy-evaluate 3842 * or lose information. That is, this routine must actually 3843 * insert this page into the given map NOW. 3844 */ 3845int 3846pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 3847 u_int flags, int8_t psind) 3848{ 3849 pt1_entry_t *pte1p; 3850 pt2_entry_t *pte2p; 3851 pt2_entry_t npte2, opte2; 3852 pv_entry_t pv; 3853 vm_paddr_t opa, pa; 3854 vm_page_t mpte2, om; 3855 boolean_t wired; 3856 3857 va = trunc_page(va); 3858 mpte2 = NULL; 3859 wired = (flags & PMAP_ENTER_WIRED) != 0; 3860 3861 KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__)); 3862 KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS, 3863 ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__, 3864 va)); 3865 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 3866 VM_OBJECT_ASSERT_LOCKED(m->object); 3867 3868 rw_wlock(&pvh_global_lock); 3869 PMAP_LOCK(pmap); 3870 sched_pin(); 3871 3872 /* 3873 * In the case that a page table page is not 3874 * resident, we are creating it here. 3875 */ 3876 if (va < VM_MAXUSER_ADDRESS) { 3877 mpte2 = pmap_allocpte2(pmap, va, flags); 3878 if (mpte2 == NULL) { 3879 KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, 3880 ("pmap_allocpte2 failed with sleep allowed")); 3881 sched_unpin(); 3882 rw_wunlock(&pvh_global_lock); 3883 PMAP_UNLOCK(pmap); 3884 return (KERN_RESOURCE_SHORTAGE); 3885 } 3886 } 3887 pte1p = pmap_pte1(pmap, va); 3888 if (pte1_is_section(pte1_load(pte1p))) 3889 panic("%s: attempted on 1MB page", __func__); 3890 pte2p = pmap_pte2_quick(pmap, va); 3891 if (pte2p == NULL) 3892 panic("%s: invalid L1 page table entry va=%#x", __func__, va); 3893 3894 om = NULL; 3895 pa = VM_PAGE_TO_PHYS(m); 3896 opte2 = pte2_load(pte2p); 3897 opa = pte2_pa(opte2); 3898 /* 3899 * Mapping has not changed, must be protection or wiring change. 3900 */ 3901 if (pte2_is_valid(opte2) && (opa == pa)) { 3902 /* 3903 * Wiring change, just update stats. We don't worry about 3904 * wiring PT2 pages as they remain resident as long as there 3905 * are valid mappings in them. Hence, if a user page is wired, 3906 * the PT2 page will be also. 3907 */ 3908 if (wired && !pte2_is_wired(opte2)) 3909 pmap->pm_stats.wired_count++; 3910 else if (!wired && pte2_is_wired(opte2)) 3911 pmap->pm_stats.wired_count--; 3912 3913 /* 3914 * Remove extra pte2 reference 3915 */ 3916 if (mpte2) 3917 pt2_wirecount_dec(mpte2, pte1_index(va)); 3918 if (pte2_is_managed(opte2)) 3919 om = m; 3920 goto validate; 3921 } 3922 3923 /* 3924 * QQQ: We think that changing physical address on writeable mapping 3925 * is not safe. Well, maybe on kernel address space with correct 3926 * locking, it can make a sense. However, we have no idea why 3927 * anyone should do that on user address space. Are we wrong? 3928 */ 3929 KASSERT((opa == 0) || (opa == pa) || 3930 !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0), 3931 ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!", 3932 __func__, pmap, va, opte2, opa, pa, flags, prot)); 3933 3934 pv = NULL; 3935 3936 /* 3937 * Mapping has changed, invalidate old range and fall through to 3938 * handle validating new mapping. 3939 */ 3940 if (opa) { 3941 if (pte2_is_wired(opte2)) 3942 pmap->pm_stats.wired_count--; 3943 if (pte2_is_managed(opte2)) { 3944 om = PHYS_TO_VM_PAGE(opa); 3945 pv = pmap_pvh_remove(&om->md, pmap, va); 3946 } 3947 /* 3948 * Remove extra pte2 reference 3949 */ 3950 if (mpte2 != NULL) 3951 pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT); 3952 } else 3953 pmap->pm_stats.resident_count++; 3954 3955 /* 3956 * Enter on the PV list if part of our managed memory. 3957 */ 3958 if ((m->oflags & VPO_UNMANAGED) == 0) { 3959 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3960 ("%s: managed mapping within the clean submap", __func__)); 3961 if (pv == NULL) 3962 pv = get_pv_entry(pmap, FALSE); 3963 pv->pv_va = va; 3964 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3965 } else if (pv != NULL) 3966 free_pv_entry(pmap, pv); 3967 3968 /* 3969 * Increment counters 3970 */ 3971 if (wired) 3972 pmap->pm_stats.wired_count++; 3973 3974validate: 3975 /* 3976 * Now validate mapping with desired protection/wiring. 3977 */ 3978 npte2 = PTE2(pa, PTE2_NM, vm_page_pte2_attr(m)); 3979 if (prot & VM_PROT_WRITE) { 3980 if (pte2_is_managed(npte2)) 3981 vm_page_aflag_set(m, PGA_WRITEABLE); 3982 } 3983 else 3984 npte2 |= PTE2_RO; 3985 if ((prot & VM_PROT_EXECUTE) == 0) 3986 npte2 |= PTE2_NX; 3987 if (wired) 3988 npte2 |= PTE2_W; 3989 if (va < VM_MAXUSER_ADDRESS) 3990 npte2 |= PTE2_U; 3991 if (pmap != kernel_pmap) 3992 npte2 |= PTE2_NG; 3993 3994 /* 3995 * If the mapping or permission bits are different, we need 3996 * to update the pte2. 3997 * 3998 * QQQ: Think again and again what to do 3999 * if the mapping is going to be changed! 4000 */ 4001 if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) { 4002 /* 4003 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4004 * is set. Do it now, before the mapping is stored and made 4005 * valid for hardware table walk. If done later, there is a race 4006 * for other threads of current process in lazy loading case. 4007 * Don't do it for kernel memory which is mapped with exec 4008 * permission even if the memory isn't going to hold executable 4009 * code. The only time when icache sync is needed is after 4010 * kernel module is loaded and the relocation info is processed. 4011 * And it's done in elf_cpu_load_file(). 4012 * 4013 * QQQ: (1) Does it exist any better way where 4014 * or how to sync icache? 4015 * (2) Now, we do it on a page basis. 4016 */ 4017 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 4018 m->md.pat_mode == VM_MEMATTR_WB_WA && 4019 (opa != pa || (opte2 & PTE2_NX))) 4020 cache_icache_sync_fresh(va, pa, PAGE_SIZE); 4021 4022 npte2 |= PTE2_A; 4023 if (flags & VM_PROT_WRITE) 4024 npte2 &= ~PTE2_NM; 4025 if (opte2 & PTE2_V) { 4026 /* Change mapping with break-before-make approach. */ 4027 opte2 = pte2_load_clear(pte2p); 4028 pmap_tlb_flush(pmap, va); 4029 pte2_store(pte2p, npte2); 4030 if (opte2 & PTE2_A) { 4031 if (pte2_is_managed(opte2)) 4032 vm_page_aflag_set(om, PGA_REFERENCED); 4033 } 4034 if (pte2_is_dirty(opte2)) { 4035 if (pte2_is_managed(opte2)) 4036 vm_page_dirty(om); 4037 } 4038 if (pte2_is_managed(opte2) && 4039 TAILQ_EMPTY(&om->md.pv_list) && 4040 ((om->flags & PG_FICTITIOUS) != 0 || 4041 TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 4042 vm_page_aflag_clear(om, PGA_WRITEABLE); 4043 } else 4044 pte2_store(pte2p, npte2); 4045 } 4046#if 0 4047 else { 4048 /* 4049 * QQQ: In time when both access and not mofified bits are 4050 * emulated by software, this should not happen. Some 4051 * analysis is need, if this really happen. Missing 4052 * tlb flush somewhere could be the reason. 4053 */ 4054 panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap, 4055 va, opte2, npte2); 4056 } 4057#endif 4058 /* 4059 * If both the L2 page table page and the reservation are fully 4060 * populated, then attempt promotion. 4061 */ 4062 if ((mpte2 == NULL || pt2_is_full(mpte2, va)) && 4063 sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && 4064 vm_reserv_level_iffullpop(m) == 0) 4065 pmap_promote_pte1(pmap, pte1p, va); 4066 sched_unpin(); 4067 rw_wunlock(&pvh_global_lock); 4068 PMAP_UNLOCK(pmap); 4069 return (KERN_SUCCESS); 4070} 4071 4072/* 4073 * Do the things to unmap a page in a process. 4074 */ 4075static int 4076pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va, 4077 struct spglist *free) 4078{ 4079 pt2_entry_t opte2; 4080 vm_page_t m; 4081 4082 rw_assert(&pvh_global_lock, RA_WLOCKED); 4083 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4084 4085 /* Clear and invalidate the mapping. */ 4086 opte2 = pte2_load_clear(pte2p); 4087 pmap_tlb_flush(pmap, va); 4088 4089 KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x", 4090 __func__, pmap, va, opte2)); 4091 4092 if (opte2 & PTE2_W) 4093 pmap->pm_stats.wired_count -= 1; 4094 pmap->pm_stats.resident_count -= 1; 4095 if (pte2_is_managed(opte2)) { 4096 m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); 4097 if (pte2_is_dirty(opte2)) 4098 vm_page_dirty(m); 4099 if (opte2 & PTE2_A) 4100 vm_page_aflag_set(m, PGA_REFERENCED); 4101 pmap_remove_entry(pmap, m, va); 4102 } 4103 return (pmap_unuse_pt2(pmap, va, free)); 4104} 4105 4106/* 4107 * Remove a single page from a process address space. 4108 */ 4109static void 4110pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) 4111{ 4112 pt2_entry_t *pte2p; 4113 4114 rw_assert(&pvh_global_lock, RA_WLOCKED); 4115 KASSERT(curthread->td_pinned > 0, 4116 ("%s: curthread not pinned", __func__)); 4117 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4118 if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL || 4119 !pte2_is_valid(pte2_load(pte2p))) 4120 return; 4121 pmap_remove_pte2(pmap, pte2p, va, free); 4122} 4123 4124/* 4125 * Remove the given range of addresses from the specified map. 4126 * 4127 * It is assumed that the start and end are properly 4128 * rounded to the page size. 4129 */ 4130void 4131pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4132{ 4133 vm_offset_t nextva; 4134 pt1_entry_t *pte1p, pte1; 4135 pt2_entry_t *pte2p, pte2; 4136 struct spglist free; 4137 4138 /* 4139 * Perform an unsynchronized read. This is, however, safe. 4140 */ 4141 if (pmap->pm_stats.resident_count == 0) 4142 return; 4143 4144 SLIST_INIT(&free); 4145 4146 rw_wlock(&pvh_global_lock); 4147 sched_pin(); 4148 PMAP_LOCK(pmap); 4149 4150 /* 4151 * Special handling of removing one page. A very common 4152 * operation and easy to short circuit some code. 4153 */ 4154 if (sva + PAGE_SIZE == eva) { 4155 pte1 = pte1_load(pmap_pte1(pmap, sva)); 4156 if (pte1_is_link(pte1)) { 4157 pmap_remove_page(pmap, sva, &free); 4158 goto out; 4159 } 4160 } 4161 4162 for (; sva < eva; sva = nextva) { 4163 /* 4164 * Calculate address for next L2 page table. 4165 */ 4166 nextva = pte1_trunc(sva + PTE1_SIZE); 4167 if (nextva < sva) 4168 nextva = eva; 4169 if (pmap->pm_stats.resident_count == 0) 4170 break; 4171 4172 pte1p = pmap_pte1(pmap, sva); 4173 pte1 = pte1_load(pte1p); 4174 4175 /* 4176 * Weed out invalid mappings. Note: we assume that the L1 page 4177 * table is always allocated, and in kernel virtual. 4178 */ 4179 if (pte1 == 0) 4180 continue; 4181 4182 if (pte1_is_section(pte1)) { 4183 /* 4184 * Are we removing the entire large page? If not, 4185 * demote the mapping and fall through. 4186 */ 4187 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 4188 pmap_remove_pte1(pmap, pte1p, sva, &free); 4189 continue; 4190 } else if (!pmap_demote_pte1(pmap, pte1p, sva)) { 4191 /* The large page mapping was destroyed. */ 4192 continue; 4193 } 4194#ifdef INVARIANTS 4195 else { 4196 /* Update pte1 after demotion. */ 4197 pte1 = pte1_load(pte1p); 4198 } 4199#endif 4200 } 4201 4202 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 4203 " is not link", __func__, pmap, sva, pte1, pte1p)); 4204 4205 /* 4206 * Limit our scan to either the end of the va represented 4207 * by the current L2 page table page, or to the end of the 4208 * range being removed. 4209 */ 4210 if (nextva > eva) 4211 nextva = eva; 4212 4213 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; 4214 pte2p++, sva += PAGE_SIZE) { 4215 pte2 = pte2_load(pte2p); 4216 if (!pte2_is_valid(pte2)) 4217 continue; 4218 if (pmap_remove_pte2(pmap, pte2p, sva, &free)) 4219 break; 4220 } 4221 } 4222out: 4223 sched_unpin(); 4224 rw_wunlock(&pvh_global_lock); 4225 PMAP_UNLOCK(pmap); 4226 pmap_free_zero_pages(&free); 4227} 4228 4229/* 4230 * Routine: pmap_remove_all 4231 * Function: 4232 * Removes this physical page from 4233 * all physical maps in which it resides. 4234 * Reflects back modify bits to the pager. 4235 * 4236 * Notes: 4237 * Original versions of this routine were very 4238 * inefficient because they iteratively called 4239 * pmap_remove (slow...) 4240 */ 4241 4242void 4243pmap_remove_all(vm_page_t m) 4244{ 4245 struct md_page *pvh; 4246 pv_entry_t pv; 4247 pmap_t pmap; 4248 pt2_entry_t *pte2p, opte2; 4249 pt1_entry_t *pte1p; 4250 vm_offset_t va; 4251 struct spglist free; 4252 4253 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4254 ("%s: page %p is not managed", __func__, m)); 4255 SLIST_INIT(&free); 4256 rw_wlock(&pvh_global_lock); 4257 sched_pin(); 4258 if ((m->flags & PG_FICTITIOUS) != 0) 4259 goto small_mappings; 4260 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4261 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 4262 va = pv->pv_va; 4263 pmap = PV_PMAP(pv); 4264 PMAP_LOCK(pmap); 4265 pte1p = pmap_pte1(pmap, va); 4266 (void)pmap_demote_pte1(pmap, pte1p, va); 4267 PMAP_UNLOCK(pmap); 4268 } 4269small_mappings: 4270 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 4271 pmap = PV_PMAP(pv); 4272 PMAP_LOCK(pmap); 4273 pmap->pm_stats.resident_count--; 4274 pte1p = pmap_pte1(pmap, pv->pv_va); 4275 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found " 4276 "a 1mpage in page %p's pv list", __func__, m)); 4277 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 4278 opte2 = pte2_load_clear(pte2p); 4279 pmap_tlb_flush(pmap, pv->pv_va); 4280 KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2", 4281 __func__, pmap, pv->pv_va)); 4282 if (pte2_is_wired(opte2)) 4283 pmap->pm_stats.wired_count--; 4284 if (opte2 & PTE2_A) 4285 vm_page_aflag_set(m, PGA_REFERENCED); 4286 4287 /* 4288 * Update the vm_page_t clean and reference bits. 4289 */ 4290 if (pte2_is_dirty(opte2)) 4291 vm_page_dirty(m); 4292 pmap_unuse_pt2(pmap, pv->pv_va, &free); 4293 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4294 free_pv_entry(pmap, pv); 4295 PMAP_UNLOCK(pmap); 4296 } 4297 vm_page_aflag_clear(m, PGA_WRITEABLE); 4298 sched_unpin(); 4299 rw_wunlock(&pvh_global_lock); 4300 pmap_free_zero_pages(&free); 4301} 4302 4303/* 4304 * Just subroutine for pmap_remove_pages() to reasonably satisfy 4305 * good coding style, a.k.a. 80 character line width limit hell. 4306 */ 4307static __inline void 4308pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv, 4309 struct spglist *free) 4310{ 4311 vm_paddr_t pa; 4312 vm_page_t m, mt, mpt2pg; 4313 struct md_page *pvh; 4314 4315 pa = pte1_pa(pte1); 4316 m = PHYS_TO_VM_PAGE(pa); 4317 4318 KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", 4319 __func__, m, m->phys_addr, pa)); 4320 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4321 m < &vm_page_array[vm_page_array_size], 4322 ("%s: bad pte1 %#x", __func__, pte1)); 4323 4324 if (pte1_is_dirty(pte1)) { 4325 for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) 4326 vm_page_dirty(mt); 4327 } 4328 4329 pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; 4330 pvh = pa_to_pvh(pa); 4331 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4332 if (TAILQ_EMPTY(&pvh->pv_list)) { 4333 for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) 4334 if (TAILQ_EMPTY(&mt->md.pv_list)) 4335 vm_page_aflag_clear(mt, PGA_WRITEABLE); 4336 } 4337 mpt2pg = pmap_pt2_page(pmap, pv->pv_va); 4338 if (mpt2pg != NULL) 4339 pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free); 4340} 4341 4342/* 4343 * Just subroutine for pmap_remove_pages() to reasonably satisfy 4344 * good coding style, a.k.a. 80 character line width limit hell. 4345 */ 4346static __inline void 4347pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv, 4348 struct spglist *free) 4349{ 4350 vm_paddr_t pa; 4351 vm_page_t m; 4352 struct md_page *pvh; 4353 4354 pa = pte2_pa(pte2); 4355 m = PHYS_TO_VM_PAGE(pa); 4356 4357 KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", 4358 __func__, m, m->phys_addr, pa)); 4359 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4360 m < &vm_page_array[vm_page_array_size], 4361 ("%s: bad pte2 %#x", __func__, pte2)); 4362 4363 if (pte2_is_dirty(pte2)) 4364 vm_page_dirty(m); 4365 4366 pmap->pm_stats.resident_count--; 4367 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4368 if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { 4369 pvh = pa_to_pvh(pa); 4370 if (TAILQ_EMPTY(&pvh->pv_list)) 4371 vm_page_aflag_clear(m, PGA_WRITEABLE); 4372 } 4373 pmap_unuse_pt2(pmap, pv->pv_va, free); 4374} 4375 4376/* 4377 * Remove all pages from specified address space this aids process 4378 * exit speeds. Also, this code is special cased for current process 4379 * only, but can have the more generic (and slightly slower) mode enabled. 4380 * This is much faster than pmap_remove in the case of running down 4381 * an entire address space. 4382 */ 4383void 4384pmap_remove_pages(pmap_t pmap) 4385{ 4386 pt1_entry_t *pte1p, pte1; 4387 pt2_entry_t *pte2p, pte2; 4388 pv_entry_t pv; 4389 struct pv_chunk *pc, *npc; 4390 struct spglist free; 4391 int field, idx; 4392 int32_t bit; 4393 uint32_t inuse, bitmask; 4394 boolean_t allfree; 4395 4396 /* 4397 * Assert that the given pmap is only active on the current 4398 * CPU. Unfortunately, we cannot block another CPU from 4399 * activating the pmap while this function is executing. 4400 */ 4401 KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace), 4402 ("%s: non-current pmap %p", __func__, pmap)); 4403#if defined(SMP) && defined(INVARIANTS) 4404 { 4405 cpuset_t other_cpus; 4406 4407 sched_pin(); 4408 other_cpus = pmap->pm_active; 4409 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 4410 sched_unpin(); 4411 KASSERT(CPU_EMPTY(&other_cpus), 4412 ("%s: pmap %p active on other cpus", __func__, pmap)); 4413 } 4414#endif 4415 SLIST_INIT(&free); 4416 rw_wlock(&pvh_global_lock); 4417 PMAP_LOCK(pmap); 4418 sched_pin(); 4419 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 4420 KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p", 4421 __func__, pmap, pc->pc_pmap)); 4422 allfree = TRUE; 4423 for (field = 0; field < _NPCM; field++) { 4424 inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 4425 while (inuse != 0) { 4426 bit = ffs(inuse) - 1; 4427 bitmask = 1UL << bit; 4428 idx = field * 32 + bit; 4429 pv = &pc->pc_pventry[idx]; 4430 inuse &= ~bitmask; 4431 4432 /* 4433 * Note that we cannot remove wired pages 4434 * from a process' mapping at this time 4435 */ 4436 pte1p = pmap_pte1(pmap, pv->pv_va); 4437 pte1 = pte1_load(pte1p); 4438 if (pte1_is_section(pte1)) { 4439 if (pte1_is_wired(pte1)) { 4440 allfree = FALSE; 4441 continue; 4442 } 4443 pte1_clear(pte1p); 4444 pmap_remove_pte1_quick(pmap, pte1, pv, 4445 &free); 4446 } 4447 else if (pte1_is_link(pte1)) { 4448 pte2p = pt2map_entry(pv->pv_va); 4449 pte2 = pte2_load(pte2p); 4450 4451 if (!pte2_is_valid(pte2)) { 4452 printf("%s: pmap %p va %#x " 4453 "pte2 %#x\n", __func__, 4454 pmap, pv->pv_va, pte2); 4455 panic("bad pte2"); 4456 } 4457 4458 if (pte2_is_wired(pte2)) { 4459 allfree = FALSE; 4460 continue; 4461 } 4462 pte2_clear(pte2p); 4463 pmap_remove_pte2_quick(pmap, pte2, pv, 4464 &free); 4465 } else { 4466 printf("%s: pmap %p va %#x pte1 %#x\n", 4467 __func__, pmap, pv->pv_va, pte1); 4468 panic("bad pte1"); 4469 } 4470 4471 /* Mark free */ 4472 PV_STAT(pv_entry_frees++); 4473 PV_STAT(pv_entry_spare++); 4474 pv_entry_count--; 4475 pc->pc_map[field] |= bitmask; 4476 } 4477 } 4478 if (allfree) { 4479 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4480 free_pv_chunk(pc); 4481 } 4482 } 4483 tlb_flush_all_ng_local(); 4484 sched_unpin(); 4485 rw_wunlock(&pvh_global_lock); 4486 PMAP_UNLOCK(pmap); 4487 pmap_free_zero_pages(&free); 4488} 4489 4490/* 4491 * This code makes some *MAJOR* assumptions: 4492 * 1. Current pmap & pmap exists. 4493 * 2. Not wired. 4494 * 3. Read access. 4495 * 4. No L2 page table pages. 4496 * but is *MUCH* faster than pmap_enter... 4497 */ 4498static vm_page_t 4499pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 4500 vm_prot_t prot, vm_page_t mpt2pg) 4501{ 4502 pt2_entry_t *pte2p, pte2; 4503 vm_paddr_t pa; 4504 struct spglist free; 4505 uint32_t l2prot; 4506 4507 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 4508 (m->oflags & VPO_UNMANAGED) != 0, 4509 ("%s: managed mapping within the clean submap", __func__)); 4510 rw_assert(&pvh_global_lock, RA_WLOCKED); 4511 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4512 4513 /* 4514 * In the case that a L2 page table page is not 4515 * resident, we are creating it here. 4516 */ 4517 if (va < VM_MAXUSER_ADDRESS) { 4518 u_int pte1_idx; 4519 pt1_entry_t pte1, *pte1p; 4520 vm_paddr_t pt2_pa; 4521 4522 /* 4523 * Get L1 page table things. 4524 */ 4525 pte1_idx = pte1_index(va); 4526 pte1p = pmap_pte1(pmap, va); 4527 pte1 = pte1_load(pte1p); 4528 4529 if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) { 4530 /* 4531 * Each of NPT2_IN_PG L2 page tables on the page can 4532 * come here. Make sure that associated L1 page table 4533 * link is established. 4534 * 4535 * QQQ: It comes that we don't establish all links to 4536 * L2 page tables for newly allocated L2 page 4537 * tables page. 4538 */ 4539 KASSERT(!pte1_is_section(pte1), 4540 ("%s: pte1 %#x is section", __func__, pte1)); 4541 if (!pte1_is_link(pte1)) { 4542 pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg), 4543 pte1_idx); 4544 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 4545 } 4546 pt2_wirecount_inc(mpt2pg, pte1_idx); 4547 } else { 4548 /* 4549 * If the L2 page table page is mapped, we just 4550 * increment the hold count, and activate it. 4551 */ 4552 if (pte1_is_section(pte1)) { 4553 return (NULL); 4554 } else if (pte1_is_link(pte1)) { 4555 mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 4556 pt2_wirecount_inc(mpt2pg, pte1_idx); 4557 } else { 4558 mpt2pg = _pmap_allocpte2(pmap, va, 4559 PMAP_ENTER_NOSLEEP); 4560 if (mpt2pg == NULL) 4561 return (NULL); 4562 } 4563 } 4564 } else { 4565 mpt2pg = NULL; 4566 } 4567 4568 /* 4569 * This call to pt2map_entry() makes the assumption that we are 4570 * entering the page into the current pmap. In order to support 4571 * quick entry into any pmap, one would likely use pmap_pte2_quick(). 4572 * But that isn't as quick as pt2map_entry(). 4573 */ 4574 pte2p = pt2map_entry(va); 4575 pte2 = pte2_load(pte2p); 4576 if (pte2_is_valid(pte2)) { 4577 if (mpt2pg != NULL) { 4578 /* 4579 * Remove extra pte2 reference 4580 */ 4581 pt2_wirecount_dec(mpt2pg, pte1_index(va)); 4582 mpt2pg = NULL; 4583 } 4584 return (NULL); 4585 } 4586 4587 /* 4588 * Enter on the PV list if part of our managed memory. 4589 */ 4590 if ((m->oflags & VPO_UNMANAGED) == 0 && 4591 !pmap_try_insert_pv_entry(pmap, va, m)) { 4592 if (mpt2pg != NULL) { 4593 SLIST_INIT(&free); 4594 if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) { 4595 pmap_tlb_flush(pmap, va); 4596 pmap_free_zero_pages(&free); 4597 } 4598 4599 mpt2pg = NULL; 4600 } 4601 return (NULL); 4602 } 4603 4604 /* 4605 * Increment counters 4606 */ 4607 pmap->pm_stats.resident_count++; 4608 4609 /* 4610 * Now validate mapping with RO protection 4611 */ 4612 pa = VM_PAGE_TO_PHYS(m); 4613 l2prot = PTE2_RO | PTE2_NM; 4614 if (va < VM_MAXUSER_ADDRESS) 4615 l2prot |= PTE2_U | PTE2_NG; 4616 if ((prot & VM_PROT_EXECUTE) == 0) 4617 l2prot |= PTE2_NX; 4618 else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { 4619 /* 4620 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4621 * is set. QQQ: For more info, see comments in pmap_enter(). 4622 */ 4623 cache_icache_sync_fresh(va, pa, PAGE_SIZE); 4624 } 4625 pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m))); 4626 4627 return (mpt2pg); 4628} 4629 4630void 4631pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 4632{ 4633 4634 rw_wlock(&pvh_global_lock); 4635 PMAP_LOCK(pmap); 4636 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 4637 rw_wunlock(&pvh_global_lock); 4638 PMAP_UNLOCK(pmap); 4639} 4640 4641/* 4642 * Tries to create 1MB page mapping. Returns TRUE if successful and 4643 * FALSE otherwise. Fails if (1) a page table page cannot be allocated without 4644 * blocking, (2) a mapping already exists at the specified virtual address, or 4645 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 4646 */ 4647static boolean_t 4648pmap_enter_pte1(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 4649{ 4650 pt1_entry_t *pte1p; 4651 vm_paddr_t pa; 4652 uint32_t l1prot; 4653 4654 rw_assert(&pvh_global_lock, RA_WLOCKED); 4655 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4656 pte1p = pmap_pte1(pmap, va); 4657 if (pte1_is_valid(pte1_load(pte1p))) { 4658 CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, 4659 va, pmap); 4660 return (FALSE); 4661 } 4662 if ((m->oflags & VPO_UNMANAGED) == 0) { 4663 /* 4664 * Abort this mapping if its PV entry could not be created. 4665 */ 4666 if (!pmap_pv_insert_pte1(pmap, va, VM_PAGE_TO_PHYS(m))) { 4667 CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", 4668 __func__, va, pmap); 4669 return (FALSE); 4670 } 4671 } 4672 /* 4673 * Increment counters. 4674 */ 4675 pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; 4676 4677 /* 4678 * Map the section. 4679 * 4680 * QQQ: Why VM_PROT_WRITE is not evaluated and the mapping is 4681 * made readonly? 4682 */ 4683 pa = VM_PAGE_TO_PHYS(m); 4684 l1prot = PTE1_RO | PTE1_NM; 4685 if (va < VM_MAXUSER_ADDRESS) 4686 l1prot |= PTE1_U | PTE1_NG; 4687 if ((prot & VM_PROT_EXECUTE) == 0) 4688 l1prot |= PTE1_NX; 4689 else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { 4690 /* 4691 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4692 * is set. QQQ: For more info, see comments in pmap_enter(). 4693 */ 4694 cache_icache_sync_fresh(va, pa, PTE1_SIZE); 4695 } 4696 pte1_store(pte1p, PTE1(pa, l1prot, ATTR_TO_L1(vm_page_pte2_attr(m)))); 4697 4698 pmap_pte1_mappings++; 4699 CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va, 4700 pmap); 4701 return (TRUE); 4702} 4703 4704/* 4705 * Maps a sequence of resident pages belonging to the same object. 4706 * The sequence begins with the given page m_start. This page is 4707 * mapped at the given virtual address start. Each subsequent page is 4708 * mapped at a virtual address that is offset from start by the same 4709 * amount as the page is offset from m_start within the object. The 4710 * last page in the sequence is the page with the largest offset from 4711 * m_start that can be mapped at a virtual address less than the given 4712 * virtual address end. Not every virtual page between start and end 4713 * is mapped; only those for which a resident page exists with the 4714 * corresponding offset from m_start are mapped. 4715 */ 4716void 4717pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 4718 vm_page_t m_start, vm_prot_t prot) 4719{ 4720 vm_offset_t va; 4721 vm_page_t m, mpt2pg; 4722 vm_pindex_t diff, psize; 4723 4724 PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", 4725 __func__, pmap, start, end, m_start, prot)); 4726 4727 VM_OBJECT_ASSERT_LOCKED(m_start->object); 4728 psize = atop(end - start); 4729 mpt2pg = NULL; 4730 m = m_start; 4731 rw_wlock(&pvh_global_lock); 4732 PMAP_LOCK(pmap); 4733 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 4734 va = start + ptoa(diff); 4735 if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && 4736 m->psind == 1 && sp_enabled && 4737 pmap_enter_pte1(pmap, va, m, prot)) 4738 m = &m[PTE1_SIZE / PAGE_SIZE - 1]; 4739 else 4740 mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, 4741 mpt2pg); 4742 m = TAILQ_NEXT(m, listq); 4743 } 4744 rw_wunlock(&pvh_global_lock); 4745 PMAP_UNLOCK(pmap); 4746} 4747 4748/* 4749 * This code maps large physical mmap regions into the 4750 * processor address space. Note that some shortcuts 4751 * are taken, but the code works. 4752 */ 4753void 4754pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 4755 vm_pindex_t pindex, vm_size_t size) 4756{ 4757 pt1_entry_t *pte1p; 4758 vm_paddr_t pa, pte2_pa; 4759 vm_page_t p; 4760 vm_memattr_t pat_mode; 4761 u_int l1attr, l1prot; 4762 4763 VM_OBJECT_ASSERT_WLOCKED(object); 4764 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 4765 ("%s: non-device object", __func__)); 4766 if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { 4767 if (!vm_object_populate(object, pindex, pindex + atop(size))) 4768 return; 4769 p = vm_page_lookup(object, pindex); 4770 KASSERT(p->valid == VM_PAGE_BITS_ALL, 4771 ("%s: invalid page %p", __func__, p)); 4772 pat_mode = p->md.pat_mode; 4773 4774 /* 4775 * Abort the mapping if the first page is not physically 4776 * aligned to a 1MB page boundary. 4777 */ 4778 pte2_pa = VM_PAGE_TO_PHYS(p); 4779 if (pte2_pa & PTE1_OFFSET) 4780 return; 4781 4782 /* 4783 * Skip the first page. Abort the mapping if the rest of 4784 * the pages are not physically contiguous or have differing 4785 * memory attributes. 4786 */ 4787 p = TAILQ_NEXT(p, listq); 4788 for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; 4789 pa += PAGE_SIZE) { 4790 KASSERT(p->valid == VM_PAGE_BITS_ALL, 4791 ("%s: invalid page %p", __func__, p)); 4792 if (pa != VM_PAGE_TO_PHYS(p) || 4793 pat_mode != p->md.pat_mode) 4794 return; 4795 p = TAILQ_NEXT(p, listq); 4796 } 4797 4798 /* 4799 * Map using 1MB pages. 4800 * 4801 * QQQ: Well, we are mapping a section, so same condition must 4802 * be hold like during promotion. It looks that only RW mapping 4803 * is done here, so readonly mapping must be done elsewhere. 4804 */ 4805 l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A; 4806 l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode)); 4807 PMAP_LOCK(pmap); 4808 for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) { 4809 pte1p = pmap_pte1(pmap, addr); 4810 if (!pte1_is_valid(pte1_load(pte1p))) { 4811 pte1_store(pte1p, PTE1(pa, l1prot, l1attr)); 4812 pmap->pm_stats.resident_count += PTE1_SIZE / 4813 PAGE_SIZE; 4814 pmap_pte1_mappings++; 4815 } 4816 /* Else continue on if the PTE1 is already valid. */ 4817 addr += PTE1_SIZE; 4818 } 4819 PMAP_UNLOCK(pmap); 4820 } 4821} 4822 4823/* 4824 * Do the things to protect a 1mpage in a process. 4825 */ 4826static void 4827pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, 4828 vm_prot_t prot) 4829{ 4830 pt1_entry_t npte1, opte1; 4831 vm_offset_t eva, va; 4832 vm_page_t m; 4833 4834 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4835 KASSERT((sva & PTE1_OFFSET) == 0, 4836 ("%s: sva is not 1mpage aligned", __func__)); 4837 4838 opte1 = npte1 = pte1_load(pte1p); 4839 if (pte1_is_managed(opte1) && pte1_is_dirty(opte1)) { 4840 eva = sva + PTE1_SIZE; 4841 for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); 4842 va < eva; va += PAGE_SIZE, m++) 4843 vm_page_dirty(m); 4844 } 4845 if ((prot & VM_PROT_WRITE) == 0) 4846 npte1 |= PTE1_RO | PTE1_NM; 4847 if ((prot & VM_PROT_EXECUTE) == 0) 4848 npte1 |= PTE1_NX; 4849 4850 /* 4851 * QQQ: Herein, execute permission is never set. 4852 * It only can be cleared. So, no icache 4853 * syncing is needed. 4854 */ 4855 4856 if (npte1 != opte1) { 4857 pte1_store(pte1p, npte1); 4858 pmap_tlb_flush(pmap, sva); 4859 } 4860} 4861 4862/* 4863 * Set the physical protection on the 4864 * specified range of this map as requested. 4865 */ 4866void 4867pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 4868{ 4869 boolean_t pv_lists_locked; 4870 vm_offset_t nextva; 4871 pt1_entry_t *pte1p, pte1; 4872 pt2_entry_t *pte2p, opte2, npte2; 4873 4874 KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 4875 if (prot == VM_PROT_NONE) { 4876 pmap_remove(pmap, sva, eva); 4877 return; 4878 } 4879 4880 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 4881 (VM_PROT_WRITE | VM_PROT_EXECUTE)) 4882 return; 4883 4884 if (pmap_is_current(pmap)) 4885 pv_lists_locked = FALSE; 4886 else { 4887 pv_lists_locked = TRUE; 4888resume: 4889 rw_wlock(&pvh_global_lock); 4890 sched_pin(); 4891 } 4892 4893 PMAP_LOCK(pmap); 4894 for (; sva < eva; sva = nextva) { 4895 /* 4896 * Calculate address for next L2 page table. 4897 */ 4898 nextva = pte1_trunc(sva + PTE1_SIZE); 4899 if (nextva < sva) 4900 nextva = eva; 4901 4902 pte1p = pmap_pte1(pmap, sva); 4903 pte1 = pte1_load(pte1p); 4904 4905 /* 4906 * Weed out invalid mappings. Note: we assume that L1 page 4907 * page table is always allocated, and in kernel virtual. 4908 */ 4909 if (pte1 == 0) 4910 continue; 4911 4912 if (pte1_is_section(pte1)) { 4913 /* 4914 * Are we protecting the entire large page? If not, 4915 * demote the mapping and fall through. 4916 */ 4917 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 4918 pmap_protect_pte1(pmap, pte1p, sva, prot); 4919 continue; 4920 } else { 4921 if (!pv_lists_locked) { 4922 pv_lists_locked = TRUE; 4923 if (!rw_try_wlock(&pvh_global_lock)) { 4924 PMAP_UNLOCK(pmap); 4925 goto resume; 4926 } 4927 sched_pin(); 4928 } 4929 if (!pmap_demote_pte1(pmap, pte1p, sva)) { 4930 /* 4931 * The large page mapping 4932 * was destroyed. 4933 */ 4934 continue; 4935 } 4936#ifdef INVARIANTS 4937 else { 4938 /* Update pte1 after demotion */ 4939 pte1 = pte1_load(pte1p); 4940 } 4941#endif 4942 } 4943 } 4944 4945 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 4946 " is not link", __func__, pmap, sva, pte1, pte1p)); 4947 4948 /* 4949 * Limit our scan to either the end of the va represented 4950 * by the current L2 page table page, or to the end of the 4951 * range being protected. 4952 */ 4953 if (nextva > eva) 4954 nextva = eva; 4955 4956 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, 4957 sva += PAGE_SIZE) { 4958 vm_page_t m; 4959 4960 opte2 = npte2 = pte2_load(pte2p); 4961 if (!pte2_is_valid(opte2)) 4962 continue; 4963 4964 if ((prot & VM_PROT_WRITE) == 0) { 4965 if (pte2_is_managed(opte2) && 4966 pte2_is_dirty(opte2)) { 4967 m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); 4968 vm_page_dirty(m); 4969 } 4970 npte2 |= PTE2_RO | PTE2_NM; 4971 } 4972 4973 if ((prot & VM_PROT_EXECUTE) == 0) 4974 npte2 |= PTE2_NX; 4975 4976 /* 4977 * QQQ: Herein, execute permission is never set. 4978 * It only can be cleared. So, no icache 4979 * syncing is needed. 4980 */ 4981 4982 if (npte2 != opte2) { 4983 pte2_store(pte2p, npte2); 4984 pmap_tlb_flush(pmap, sva); 4985 } 4986 } 4987 } 4988 if (pv_lists_locked) { 4989 sched_unpin(); 4990 rw_wunlock(&pvh_global_lock); 4991 } 4992 PMAP_UNLOCK(pmap); 4993} 4994 4995/* 4996 * pmap_pvh_wired_mappings: 4997 * 4998 * Return the updated number "count" of managed mappings that are wired. 4999 */ 5000static int 5001pmap_pvh_wired_mappings(struct md_page *pvh, int count) 5002{ 5003 pmap_t pmap; 5004 pt1_entry_t pte1; 5005 pt2_entry_t pte2; 5006 pv_entry_t pv; 5007 5008 rw_assert(&pvh_global_lock, RA_WLOCKED); 5009 sched_pin(); 5010 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5011 pmap = PV_PMAP(pv); 5012 PMAP_LOCK(pmap); 5013 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5014 if (pte1_is_section(pte1)) { 5015 if (pte1_is_wired(pte1)) 5016 count++; 5017 } else { 5018 KASSERT(pte1_is_link(pte1), 5019 ("%s: pte1 %#x is not link", __func__, pte1)); 5020 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5021 if (pte2_is_wired(pte2)) 5022 count++; 5023 } 5024 PMAP_UNLOCK(pmap); 5025 } 5026 sched_unpin(); 5027 return (count); 5028} 5029 5030/* 5031 * pmap_page_wired_mappings: 5032 * 5033 * Return the number of managed mappings to the given physical page 5034 * that are wired. 5035 */ 5036int 5037pmap_page_wired_mappings(vm_page_t m) 5038{ 5039 int count; 5040 5041 count = 0; 5042 if ((m->oflags & VPO_UNMANAGED) != 0) 5043 return (count); 5044 rw_wlock(&pvh_global_lock); 5045 count = pmap_pvh_wired_mappings(&m->md, count); 5046 if ((m->flags & PG_FICTITIOUS) == 0) { 5047 count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 5048 count); 5049 } 5050 rw_wunlock(&pvh_global_lock); 5051 return (count); 5052} 5053 5054/* 5055 * Returns TRUE if any of the given mappings were used to modify 5056 * physical memory. Otherwise, returns FALSE. Both page and 1mpage 5057 * mappings are supported. 5058 */ 5059static boolean_t 5060pmap_is_modified_pvh(struct md_page *pvh) 5061{ 5062 pv_entry_t pv; 5063 pt1_entry_t pte1; 5064 pt2_entry_t pte2; 5065 pmap_t pmap; 5066 boolean_t rv; 5067 5068 rw_assert(&pvh_global_lock, RA_WLOCKED); 5069 rv = FALSE; 5070 sched_pin(); 5071 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5072 pmap = PV_PMAP(pv); 5073 PMAP_LOCK(pmap); 5074 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5075 if (pte1_is_section(pte1)) { 5076 rv = pte1_is_dirty(pte1); 5077 } else { 5078 KASSERT(pte1_is_link(pte1), 5079 ("%s: pte1 %#x is not link", __func__, pte1)); 5080 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5081 rv = pte2_is_dirty(pte2); 5082 } 5083 PMAP_UNLOCK(pmap); 5084 if (rv) 5085 break; 5086 } 5087 sched_unpin(); 5088 return (rv); 5089} 5090 5091/* 5092 * pmap_is_modified: 5093 * 5094 * Return whether or not the specified physical page was modified 5095 * in any physical maps. 5096 */ 5097boolean_t 5098pmap_is_modified(vm_page_t m) 5099{ 5100 boolean_t rv; 5101 5102 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5103 ("%s: page %p is not managed", __func__, m)); 5104 5105 /* 5106 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5107 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 5108 * is clear, no PTE2s can have PG_M set. 5109 */ 5110 VM_OBJECT_ASSERT_WLOCKED(m->object); 5111 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 5112 return (FALSE); 5113 rw_wlock(&pvh_global_lock); 5114 rv = pmap_is_modified_pvh(&m->md) || 5115 ((m->flags & PG_FICTITIOUS) == 0 && 5116 pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 5117 rw_wunlock(&pvh_global_lock); 5118 return (rv); 5119} 5120 5121/* 5122 * pmap_is_prefaultable: 5123 * 5124 * Return whether or not the specified virtual address is eligible 5125 * for prefault. 5126 */ 5127boolean_t 5128pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 5129{ 5130 pt1_entry_t pte1; 5131 pt2_entry_t pte2; 5132 boolean_t rv; 5133 5134 rv = FALSE; 5135 PMAP_LOCK(pmap); 5136 pte1 = pte1_load(pmap_pte1(pmap, addr)); 5137 if (pte1_is_link(pte1)) { 5138 pte2 = pte2_load(pt2map_entry(addr)); 5139 rv = !pte2_is_valid(pte2) ; 5140 } 5141 PMAP_UNLOCK(pmap); 5142 return (rv); 5143} 5144 5145/* 5146 * Returns TRUE if any of the given mappings were referenced and FALSE 5147 * otherwise. Both page and 1mpage mappings are supported. 5148 */ 5149static boolean_t 5150pmap_is_referenced_pvh(struct md_page *pvh) 5151{ 5152 5153 pv_entry_t pv; 5154 pt1_entry_t pte1; 5155 pt2_entry_t pte2; 5156 pmap_t pmap; 5157 boolean_t rv; 5158 5159 rw_assert(&pvh_global_lock, RA_WLOCKED); 5160 rv = FALSE; 5161 sched_pin(); 5162 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5163 pmap = PV_PMAP(pv); 5164 PMAP_LOCK(pmap); 5165 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5166 if (pte1_is_section(pte1)) { 5167 rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V); 5168 } else { 5169 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5170 rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V); 5171 } 5172 PMAP_UNLOCK(pmap); 5173 if (rv) 5174 break; 5175 } 5176 sched_unpin(); 5177 return (rv); 5178} 5179 5180/* 5181 * pmap_is_referenced: 5182 * 5183 * Return whether or not the specified physical page was referenced 5184 * in any physical maps. 5185 */ 5186boolean_t 5187pmap_is_referenced(vm_page_t m) 5188{ 5189 boolean_t rv; 5190 5191 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5192 ("%s: page %p is not managed", __func__, m)); 5193 rw_wlock(&pvh_global_lock); 5194 rv = pmap_is_referenced_pvh(&m->md) || 5195 ((m->flags & PG_FICTITIOUS) == 0 && 5196 pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 5197 rw_wunlock(&pvh_global_lock); 5198 return (rv); 5199} 5200 5201#define PMAP_TS_REFERENCED_MAX 5 5202 5203/* 5204 * pmap_ts_referenced: 5205 * 5206 * Return a count of reference bits for a page, clearing those bits. 5207 * It is not necessary for every reference bit to be cleared, but it 5208 * is necessary that 0 only be returned when there are truly no 5209 * reference bits set. 5210 * 5211 * XXX: The exact number of bits to check and clear is a matter that 5212 * should be tested and standardized at some point in the future for 5213 * optimal aging of shared pages. 5214 * 5215 * As an optimization, update the page's dirty field if a modified bit is 5216 * found while counting reference bits. This opportunistic update can be 5217 * performed at low cost and can eliminate the need for some future calls 5218 * to pmap_is_modified(). However, since this function stops after 5219 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 5220 * dirty pages. Those dirty pages will only be detected by a future call 5221 * to pmap_is_modified(). 5222 */ 5223int 5224pmap_ts_referenced(vm_page_t m) 5225{ 5226 struct md_page *pvh; 5227 pv_entry_t pv, pvf; 5228 pmap_t pmap; 5229 pt1_entry_t *pte1p, opte1; 5230 pt2_entry_t *pte2p, opte2; 5231 vm_paddr_t pa; 5232 int rtval = 0; 5233 5234 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5235 ("%s: page %p is not managed", __func__, m)); 5236 pa = VM_PAGE_TO_PHYS(m); 5237 pvh = pa_to_pvh(pa); 5238 rw_wlock(&pvh_global_lock); 5239 sched_pin(); 5240 if ((m->flags & PG_FICTITIOUS) != 0 || 5241 (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 5242 goto small_mappings; 5243 pv = pvf; 5244 do { 5245 pmap = PV_PMAP(pv); 5246 PMAP_LOCK(pmap); 5247 pte1p = pmap_pte1(pmap, pv->pv_va); 5248 opte1 = pte1_load(pte1p); 5249 if (pte1_is_dirty(opte1)) { 5250 /* 5251 * Although "opte1" is mapping a 1MB page, because 5252 * this function is called at a 4KB page granularity, 5253 * we only update the 4KB page under test. 5254 */ 5255 vm_page_dirty(m); 5256 } 5257 if ((opte1 & PTE1_A) != 0) { 5258 /* 5259 * Since this reference bit is shared by 256 4KB pages, 5260 * it should not be cleared every time it is tested. 5261 * Apply a simple "hash" function on the physical page 5262 * number, the virtual section number, and the pmap 5263 * address to select one 4KB page out of the 256 5264 * on which testing the reference bit will result 5265 * in clearing that bit. This function is designed 5266 * to avoid the selection of the same 4KB page 5267 * for every 1MB page mapping. 5268 * 5269 * On demotion, a mapping that hasn't been referenced 5270 * is simply destroyed. To avoid the possibility of a 5271 * subsequent page fault on a demoted wired mapping, 5272 * always leave its reference bit set. Moreover, 5273 * since the section is wired, the current state of 5274 * its reference bit won't affect page replacement. 5275 */ 5276 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^ 5277 (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 && 5278 !pte1_is_wired(opte1)) { 5279 pte1_clear_bit(pte1p, PTE1_A); 5280 pmap_tlb_flush(pmap, pv->pv_va); 5281 } 5282 rtval++; 5283 } 5284 PMAP_UNLOCK(pmap); 5285 /* Rotate the PV list if it has more than one entry. */ 5286 if (TAILQ_NEXT(pv, pv_next) != NULL) { 5287 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 5288 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 5289 } 5290 if (rtval >= PMAP_TS_REFERENCED_MAX) 5291 goto out; 5292 } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 5293small_mappings: 5294 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 5295 goto out; 5296 pv = pvf; 5297 do { 5298 pmap = PV_PMAP(pv); 5299 PMAP_LOCK(pmap); 5300 pte1p = pmap_pte1(pmap, pv->pv_va); 5301 KASSERT(pte1_is_link(pte1_load(pte1p)), 5302 ("%s: not found a link in page %p's pv list", __func__, m)); 5303 5304 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5305 opte2 = pte2_load(pte2p); 5306 if (pte2_is_dirty(opte2)) 5307 vm_page_dirty(m); 5308 if ((opte2 & PTE2_A) != 0) { 5309 pte2_clear_bit(pte2p, PTE2_A); 5310 pmap_tlb_flush(pmap, pv->pv_va); 5311 rtval++; 5312 } 5313 PMAP_UNLOCK(pmap); 5314 /* Rotate the PV list if it has more than one entry. */ 5315 if (TAILQ_NEXT(pv, pv_next) != NULL) { 5316 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 5317 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 5318 } 5319 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < 5320 PMAP_TS_REFERENCED_MAX); 5321out: 5322 sched_unpin(); 5323 rw_wunlock(&pvh_global_lock); 5324 return (rtval); 5325} 5326 5327/* 5328 * Clear the wired attribute from the mappings for the specified range of 5329 * addresses in the given pmap. Every valid mapping within that range 5330 * must have the wired attribute set. In contrast, invalid mappings 5331 * cannot have the wired attribute set, so they are ignored. 5332 * 5333 * The wired attribute of the page table entry is not a hardware feature, 5334 * so there is no need to invalidate any TLB entries. 5335 */ 5336void 5337pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 5338{ 5339 vm_offset_t nextva; 5340 pt1_entry_t *pte1p, pte1; 5341 pt2_entry_t *pte2p, pte2; 5342 boolean_t pv_lists_locked; 5343 5344 if (pmap_is_current(pmap)) 5345 pv_lists_locked = FALSE; 5346 else { 5347 pv_lists_locked = TRUE; 5348resume: 5349 rw_wlock(&pvh_global_lock); 5350 sched_pin(); 5351 } 5352 PMAP_LOCK(pmap); 5353 for (; sva < eva; sva = nextva) { 5354 nextva = pte1_trunc(sva + PTE1_SIZE); 5355 if (nextva < sva) 5356 nextva = eva; 5357 5358 pte1p = pmap_pte1(pmap, sva); 5359 pte1 = pte1_load(pte1p); 5360 5361 /* 5362 * Weed out invalid mappings. Note: we assume that L1 page 5363 * page table is always allocated, and in kernel virtual. 5364 */ 5365 if (pte1 == 0) 5366 continue; 5367 5368 if (pte1_is_section(pte1)) { 5369 if (!pte1_is_wired(pte1)) 5370 panic("%s: pte1 %#x not wired", __func__, pte1); 5371 5372 /* 5373 * Are we unwiring the entire large page? If not, 5374 * demote the mapping and fall through. 5375 */ 5376 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 5377 pte1_clear_bit(pte1p, PTE1_W); 5378 pmap->pm_stats.wired_count -= PTE1_SIZE / 5379 PAGE_SIZE; 5380 continue; 5381 } else { 5382 if (!pv_lists_locked) { 5383 pv_lists_locked = TRUE; 5384 if (!rw_try_wlock(&pvh_global_lock)) { 5385 PMAP_UNLOCK(pmap); 5386 /* Repeat sva. */ 5387 goto resume; 5388 } 5389 sched_pin(); 5390 } 5391 if (!pmap_demote_pte1(pmap, pte1p, sva)) 5392 panic("%s: demotion failed", __func__); 5393#ifdef INVARIANTS 5394 else { 5395 /* Update pte1 after demotion */ 5396 pte1 = pte1_load(pte1p); 5397 } 5398#endif 5399 } 5400 } 5401 5402 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 5403 " is not link", __func__, pmap, sva, pte1, pte1p)); 5404 5405 /* 5406 * Limit our scan to either the end of the va represented 5407 * by the current L2 page table page, or to the end of the 5408 * range being protected. 5409 */ 5410 if (nextva > eva) 5411 nextva = eva; 5412 5413 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, 5414 sva += PAGE_SIZE) { 5415 pte2 = pte2_load(pte2p); 5416 if (!pte2_is_valid(pte2)) 5417 continue; 5418 if (!pte2_is_wired(pte2)) 5419 panic("%s: pte2 %#x is missing PTE2_W", 5420 __func__, pte2); 5421 5422 /* 5423 * PTE2_W must be cleared atomically. Although the pmap 5424 * lock synchronizes access to PTE2_W, another processor 5425 * could be changing PTE2_NM and/or PTE2_A concurrently. 5426 */ 5427 pte2_clear_bit(pte2p, PTE2_W); 5428 pmap->pm_stats.wired_count--; 5429 } 5430 } 5431 if (pv_lists_locked) { 5432 sched_unpin(); 5433 rw_wunlock(&pvh_global_lock); 5434 } 5435 PMAP_UNLOCK(pmap); 5436} 5437 5438/* 5439 * Clear the write and modified bits in each of the given page's mappings. 5440 */ 5441void 5442pmap_remove_write(vm_page_t m) 5443{ 5444 struct md_page *pvh; 5445 pv_entry_t next_pv, pv; 5446 pmap_t pmap; 5447 pt1_entry_t *pte1p; 5448 pt2_entry_t *pte2p, opte2; 5449 vm_offset_t va; 5450 5451 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5452 ("%s: page %p is not managed", __func__, m)); 5453 5454 /* 5455 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5456 * set by another thread while the object is locked. Thus, 5457 * if PGA_WRITEABLE is clear, no page table entries need updating. 5458 */ 5459 VM_OBJECT_ASSERT_WLOCKED(m->object); 5460 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 5461 return; 5462 rw_wlock(&pvh_global_lock); 5463 sched_pin(); 5464 if ((m->flags & PG_FICTITIOUS) != 0) 5465 goto small_mappings; 5466 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5467 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 5468 va = pv->pv_va; 5469 pmap = PV_PMAP(pv); 5470 PMAP_LOCK(pmap); 5471 pte1p = pmap_pte1(pmap, va); 5472 if (!(pte1_load(pte1p) & PTE1_RO)) 5473 (void)pmap_demote_pte1(pmap, pte1p, va); 5474 PMAP_UNLOCK(pmap); 5475 } 5476small_mappings: 5477 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5478 pmap = PV_PMAP(pv); 5479 PMAP_LOCK(pmap); 5480 pte1p = pmap_pte1(pmap, pv->pv_va); 5481 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" 5482 " a section in page %p's pv list", __func__, m)); 5483 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5484 opte2 = pte2_load(pte2p); 5485 if (!(opte2 & PTE2_RO)) { 5486 pte2_store(pte2p, opte2 | PTE2_RO | PTE2_NM); 5487 if (pte2_is_dirty(opte2)) 5488 vm_page_dirty(m); 5489 pmap_tlb_flush(pmap, pv->pv_va); 5490 } 5491 PMAP_UNLOCK(pmap); 5492 } 5493 vm_page_aflag_clear(m, PGA_WRITEABLE); 5494 sched_unpin(); 5495 rw_wunlock(&pvh_global_lock); 5496} 5497 5498/* 5499 * Apply the given advice to the specified range of addresses within the 5500 * given pmap. Depending on the advice, clear the referenced and/or 5501 * modified flags in each mapping and set the mapped page's dirty field. 5502 */ 5503void 5504pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 5505{ 5506 pt1_entry_t *pte1p, opte1; 5507 pt2_entry_t *pte2p, pte2; 5508 vm_offset_t pdnxt; 5509 vm_page_t m; 5510 boolean_t pv_lists_locked; 5511 5512 if (advice != MADV_DONTNEED && advice != MADV_FREE) 5513 return; 5514 if (pmap_is_current(pmap)) 5515 pv_lists_locked = FALSE; 5516 else { 5517 pv_lists_locked = TRUE; 5518resume: 5519 rw_wlock(&pvh_global_lock); 5520 sched_pin(); 5521 } 5522 PMAP_LOCK(pmap); 5523 for (; sva < eva; sva = pdnxt) { 5524 pdnxt = pte1_trunc(sva + PTE1_SIZE); 5525 if (pdnxt < sva) 5526 pdnxt = eva; 5527 pte1p = pmap_pte1(pmap, sva); 5528 opte1 = pte1_load(pte1p); 5529 if (!pte1_is_valid(opte1)) /* XXX */ 5530 continue; 5531 else if (pte1_is_section(opte1)) { 5532 if (!pte1_is_managed(opte1)) 5533 continue; 5534 if (!pv_lists_locked) { 5535 pv_lists_locked = TRUE; 5536 if (!rw_try_wlock(&pvh_global_lock)) { 5537 PMAP_UNLOCK(pmap); 5538 goto resume; 5539 } 5540 sched_pin(); 5541 } 5542 if (!pmap_demote_pte1(pmap, pte1p, sva)) { 5543 /* 5544 * The large page mapping was destroyed. 5545 */ 5546 continue; 5547 } 5548 5549 /* 5550 * Unless the page mappings are wired, remove the 5551 * mapping to a single page so that a subsequent 5552 * access may repromote. Since the underlying L2 page 5553 * table is fully populated, this removal never 5554 * frees a L2 page table page. 5555 */ 5556 if (!pte1_is_wired(opte1)) { 5557 pte2p = pmap_pte2_quick(pmap, sva); 5558 KASSERT(pte2_is_valid(pte2_load(pte2p)), 5559 ("%s: invalid PTE2", __func__)); 5560 pmap_remove_pte2(pmap, pte2p, sva, NULL); 5561 } 5562 } 5563 if (pdnxt > eva) 5564 pdnxt = eva; 5565 for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++, 5566 sva += PAGE_SIZE) { 5567 pte2 = pte2_load(pte2p); 5568 if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2)) 5569 continue; 5570 else if (pte2_is_dirty(pte2)) { 5571 if (advice == MADV_DONTNEED) { 5572 /* 5573 * Future calls to pmap_is_modified() 5574 * can be avoided by making the page 5575 * dirty now. 5576 */ 5577 m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); 5578 vm_page_dirty(m); 5579 } 5580 pte2_set_bit(pte2p, PTE2_NM); 5581 pte2_clear_bit(pte2p, PTE2_A); 5582 } else if ((pte2 & PTE2_A) != 0) 5583 pte2_clear_bit(pte2p, PTE2_A); 5584 else 5585 continue; 5586 pmap_tlb_flush(pmap, sva); 5587 } 5588 } 5589 if (pv_lists_locked) { 5590 sched_unpin(); 5591 rw_wunlock(&pvh_global_lock); 5592 } 5593 PMAP_UNLOCK(pmap); 5594} 5595 5596/* 5597 * Clear the modify bits on the specified physical page. 5598 */ 5599void 5600pmap_clear_modify(vm_page_t m) 5601{ 5602 struct md_page *pvh; 5603 pv_entry_t next_pv, pv; 5604 pmap_t pmap; 5605 pt1_entry_t *pte1p, opte1; 5606 pt2_entry_t *pte2p, opte2; 5607 vm_offset_t va; 5608 5609 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5610 ("%s: page %p is not managed", __func__, m)); 5611 VM_OBJECT_ASSERT_WLOCKED(m->object); 5612 KASSERT(!vm_page_xbusied(m), 5613 ("%s: page %p is exclusive busy", __func__, m)); 5614 5615 /* 5616 * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM 5617 * cleared. If the object containing the page is locked and the page 5618 * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently 5619 * set. 5620 */ 5621 if ((m->flags & PGA_WRITEABLE) == 0) 5622 return; 5623 rw_wlock(&pvh_global_lock); 5624 sched_pin(); 5625 if ((m->flags & PG_FICTITIOUS) != 0) 5626 goto small_mappings; 5627 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5628 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 5629 va = pv->pv_va; 5630 pmap = PV_PMAP(pv); 5631 PMAP_LOCK(pmap); 5632 pte1p = pmap_pte1(pmap, va); 5633 opte1 = pte1_load(pte1p); 5634 if (!(opte1 & PTE1_RO)) { 5635 if (pmap_demote_pte1(pmap, pte1p, va) && 5636 !pte1_is_wired(opte1)) { 5637 /* 5638 * Write protect the mapping to a 5639 * single page so that a subsequent 5640 * write access may repromote. 5641 */ 5642 va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1); 5643 pte2p = pmap_pte2_quick(pmap, va); 5644 opte2 = pte2_load(pte2p); 5645 if ((opte2 & PTE2_V)) { 5646 pte2_set_bit(pte2p, PTE2_NM | PTE2_RO); 5647 vm_page_dirty(m); 5648 pmap_tlb_flush(pmap, va); 5649 } 5650 } 5651 } 5652 PMAP_UNLOCK(pmap); 5653 } 5654small_mappings: 5655 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5656 pmap = PV_PMAP(pv); 5657 PMAP_LOCK(pmap); 5658 pte1p = pmap_pte1(pmap, pv->pv_va); 5659 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" 5660 " a section in page %p's pv list", __func__, m)); 5661 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5662 if (pte2_is_dirty(pte2_load(pte2p))) { 5663 pte2_set_bit(pte2p, PTE2_NM); 5664 pmap_tlb_flush(pmap, pv->pv_va); 5665 } 5666 PMAP_UNLOCK(pmap); 5667 } 5668 sched_unpin(); 5669 rw_wunlock(&pvh_global_lock); 5670} 5671 5672 5673/* 5674 * Sets the memory attribute for the specified page. 5675 */ 5676void 5677pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5678{ 5679 pt2_entry_t *cmap2_pte2p; 5680 vm_memattr_t oma; 5681 vm_paddr_t pa; 5682 struct pcpu *pc; 5683 5684 oma = m->md.pat_mode; 5685 m->md.pat_mode = ma; 5686 5687 CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, 5688 VM_PAGE_TO_PHYS(m), oma, ma); 5689 if ((m->flags & PG_FICTITIOUS) != 0) 5690 return; 5691#if 0 5692 /* 5693 * If "m" is a normal page, flush it from the cache. 5694 * 5695 * First, try to find an existing mapping of the page by sf 5696 * buffer. sf_buf_invalidate_cache() modifies mapping and 5697 * flushes the cache. 5698 */ 5699 if (sf_buf_invalidate_cache(m, oma)) 5700 return; 5701#endif 5702 /* 5703 * If page is not mapped by sf buffer, map the page 5704 * transient and do invalidation. 5705 */ 5706 if (ma != oma) { 5707 pa = VM_PAGE_TO_PHYS(m); 5708 sched_pin(); 5709 pc = get_pcpu(); 5710 cmap2_pte2p = pc->pc_cmap2_pte2p; 5711 mtx_lock(&pc->pc_cmap_lock); 5712 if (pte2_load(cmap2_pte2p) != 0) 5713 panic("%s: CMAP2 busy", __func__); 5714 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, 5715 vm_memattr_to_pte2(ma))); 5716 dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); 5717 pte2_clear(cmap2_pte2p); 5718 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5719 sched_unpin(); 5720 mtx_unlock(&pc->pc_cmap_lock); 5721 } 5722} 5723 5724/* 5725 * Miscellaneous support routines follow 5726 */ 5727 5728/* 5729 * Returns TRUE if the given page is mapped individually or as part of 5730 * a 1mpage. Otherwise, returns FALSE. 5731 */ 5732boolean_t 5733pmap_page_is_mapped(vm_page_t m) 5734{ 5735 boolean_t rv; 5736 5737 if ((m->oflags & VPO_UNMANAGED) != 0) 5738 return (FALSE); 5739 rw_wlock(&pvh_global_lock); 5740 rv = !TAILQ_EMPTY(&m->md.pv_list) || 5741 ((m->flags & PG_FICTITIOUS) == 0 && 5742 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 5743 rw_wunlock(&pvh_global_lock); 5744 return (rv); 5745} 5746 5747/* 5748 * Returns true if the pmap's pv is one of the first 5749 * 16 pvs linked to from this page. This count may 5750 * be changed upwards or downwards in the future; it 5751 * is only necessary that true be returned for a small 5752 * subset of pmaps for proper page aging. 5753 */ 5754boolean_t 5755pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 5756{ 5757 struct md_page *pvh; 5758 pv_entry_t pv; 5759 int loops = 0; 5760 boolean_t rv; 5761 5762 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5763 ("%s: page %p is not managed", __func__, m)); 5764 rv = FALSE; 5765 rw_wlock(&pvh_global_lock); 5766 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5767 if (PV_PMAP(pv) == pmap) { 5768 rv = TRUE; 5769 break; 5770 } 5771 loops++; 5772 if (loops >= 16) 5773 break; 5774 } 5775 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 5776 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5777 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5778 if (PV_PMAP(pv) == pmap) { 5779 rv = TRUE; 5780 break; 5781 } 5782 loops++; 5783 if (loops >= 16) 5784 break; 5785 } 5786 } 5787 rw_wunlock(&pvh_global_lock); 5788 return (rv); 5789} 5790 5791/* 5792 * pmap_zero_page zeros the specified hardware page by mapping 5793 * the page into KVM and using bzero to clear its contents. 5794 */ 5795void 5796pmap_zero_page(vm_page_t m) 5797{ 5798 pt2_entry_t *cmap2_pte2p; 5799 struct pcpu *pc; 5800 5801 sched_pin(); 5802 pc = get_pcpu(); 5803 cmap2_pte2p = pc->pc_cmap2_pte2p; 5804 mtx_lock(&pc->pc_cmap_lock); 5805 if (pte2_load(cmap2_pte2p) != 0) 5806 panic("%s: CMAP2 busy", __func__); 5807 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5808 vm_page_pte2_attr(m))); 5809 pagezero(pc->pc_cmap2_addr); 5810 pte2_clear(cmap2_pte2p); 5811 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5812 sched_unpin(); 5813 mtx_unlock(&pc->pc_cmap_lock); 5814} 5815 5816/* 5817 * pmap_zero_page_area zeros the specified hardware page by mapping 5818 * the page into KVM and using bzero to clear its contents. 5819 * 5820 * off and size may not cover an area beyond a single hardware page. 5821 */ 5822void 5823pmap_zero_page_area(vm_page_t m, int off, int size) 5824{ 5825 pt2_entry_t *cmap2_pte2p; 5826 struct pcpu *pc; 5827 5828 sched_pin(); 5829 pc = get_pcpu(); 5830 cmap2_pte2p = pc->pc_cmap2_pte2p; 5831 mtx_lock(&pc->pc_cmap_lock); 5832 if (pte2_load(cmap2_pte2p) != 0) 5833 panic("%s: CMAP2 busy", __func__); 5834 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5835 vm_page_pte2_attr(m))); 5836 if (off == 0 && size == PAGE_SIZE) 5837 pagezero(pc->pc_cmap2_addr); 5838 else 5839 bzero(pc->pc_cmap2_addr + off, size); 5840 pte2_clear(cmap2_pte2p); 5841 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5842 sched_unpin(); 5843 mtx_unlock(&pc->pc_cmap_lock); 5844} 5845 5846/* 5847 * pmap_zero_page_idle zeros the specified hardware page by mapping 5848 * the page into KVM and using bzero to clear its contents. This 5849 * is intended to be called from the vm_pagezero process only and 5850 * outside of Giant. 5851 */ 5852void 5853pmap_zero_page_idle(vm_page_t m) 5854{ 5855 5856 if (pte2_load(CMAP3) != 0) 5857 panic("%s: CMAP3 busy", __func__); 5858 sched_pin(); 5859 pte2_store(CMAP3, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5860 vm_page_pte2_attr(m))); 5861 pagezero(CADDR3); 5862 pte2_clear(CMAP3); 5863 tlb_flush((vm_offset_t)CADDR3); 5864 sched_unpin(); 5865} 5866 5867/* 5868 * pmap_copy_page copies the specified (machine independent) 5869 * page by mapping the page into virtual memory and using 5870 * bcopy to copy the page, one machine dependent page at a 5871 * time. 5872 */ 5873void 5874pmap_copy_page(vm_page_t src, vm_page_t dst) 5875{ 5876 pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; 5877 struct pcpu *pc; 5878 5879 sched_pin(); 5880 pc = get_pcpu(); 5881 cmap1_pte2p = pc->pc_cmap1_pte2p; 5882 cmap2_pte2p = pc->pc_cmap2_pte2p; 5883 mtx_lock(&pc->pc_cmap_lock); 5884 if (pte2_load(cmap1_pte2p) != 0) 5885 panic("%s: CMAP1 busy", __func__); 5886 if (pte2_load(cmap2_pte2p) != 0) 5887 panic("%s: CMAP2 busy", __func__); 5888 pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src), 5889 PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src))); 5890 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst), 5891 PTE2_AP_KRW, vm_page_pte2_attr(dst))); 5892 bcopy(pc->pc_cmap1_addr, pc->pc_cmap2_addr, PAGE_SIZE); 5893 pte2_clear(cmap1_pte2p); 5894 tlb_flush((vm_offset_t)pc->pc_cmap1_addr); 5895 pte2_clear(cmap2_pte2p); 5896 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5897 sched_unpin(); 5898 mtx_unlock(&pc->pc_cmap_lock); 5899} 5900 5901int unmapped_buf_allowed = 1; 5902 5903void 5904pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 5905 vm_offset_t b_offset, int xfersize) 5906{ 5907 pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; 5908 vm_page_t a_pg, b_pg; 5909 char *a_cp, *b_cp; 5910 vm_offset_t a_pg_offset, b_pg_offset; 5911 struct pcpu *pc; 5912 int cnt; 5913 5914 sched_pin(); 5915 pc = get_pcpu(); 5916 cmap1_pte2p = pc->pc_cmap1_pte2p; 5917 cmap2_pte2p = pc->pc_cmap2_pte2p; 5918 mtx_lock(&pc->pc_cmap_lock); 5919 if (pte2_load(cmap1_pte2p) != 0) 5920 panic("pmap_copy_pages: CMAP1 busy"); 5921 if (pte2_load(cmap2_pte2p) != 0) 5922 panic("pmap_copy_pages: CMAP2 busy"); 5923 while (xfersize > 0) { 5924 a_pg = ma[a_offset >> PAGE_SHIFT]; 5925 a_pg_offset = a_offset & PAGE_MASK; 5926 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 5927 b_pg = mb[b_offset >> PAGE_SHIFT]; 5928 b_pg_offset = b_offset & PAGE_MASK; 5929 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 5930 pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg), 5931 PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg))); 5932 tlb_flush_local((vm_offset_t)pc->pc_cmap1_addr); 5933 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg), 5934 PTE2_AP_KRW, vm_page_pte2_attr(b_pg))); 5935 tlb_flush_local((vm_offset_t)pc->pc_cmap2_addr); 5936 a_cp = pc->pc_cmap1_addr + a_pg_offset; 5937 b_cp = pc->pc_cmap2_addr + b_pg_offset; 5938 bcopy(a_cp, b_cp, cnt); 5939 a_offset += cnt; 5940 b_offset += cnt; 5941 xfersize -= cnt; 5942 } 5943 pte2_clear(cmap1_pte2p); 5944 tlb_flush((vm_offset_t)pc->pc_cmap1_addr); 5945 pte2_clear(cmap2_pte2p); 5946 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5947 sched_unpin(); 5948 mtx_unlock(&pc->pc_cmap_lock); 5949} 5950 5951vm_offset_t 5952pmap_quick_enter_page(vm_page_t m) 5953{ 5954 struct pcpu *pc; 5955 pt2_entry_t *pte2p; 5956 5957 critical_enter(); 5958 pc = get_pcpu(); 5959 pte2p = pc->pc_qmap_pte2p; 5960 5961 KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__)); 5962 5963 pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5964 vm_page_pte2_attr(m))); 5965 return (pc->pc_qmap_addr); 5966} 5967 5968void 5969pmap_quick_remove_page(vm_offset_t addr) 5970{ 5971 struct pcpu *pc; 5972 pt2_entry_t *pte2p; 5973 5974 pc = get_pcpu(); 5975 pte2p = pc->pc_qmap_pte2p; 5976 5977 KASSERT(addr == pc->pc_qmap_addr, ("%s: invalid address", __func__)); 5978 KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__)); 5979 5980 pte2_clear(pte2p); 5981 tlb_flush(pc->pc_qmap_addr); 5982 critical_exit(); 5983} 5984 5985/* 5986 * Copy the range specified by src_addr/len 5987 * from the source map to the range dst_addr/len 5988 * in the destination map. 5989 * 5990 * This routine is only advisory and need not do anything. 5991 */ 5992void 5993pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 5994 vm_offset_t src_addr) 5995{ 5996 struct spglist free; 5997 vm_offset_t addr; 5998 vm_offset_t end_addr = src_addr + len; 5999 vm_offset_t nextva; 6000 6001 if (dst_addr != src_addr) 6002 return; 6003 6004 if (!pmap_is_current(src_pmap)) 6005 return; 6006 6007 rw_wlock(&pvh_global_lock); 6008 if (dst_pmap < src_pmap) { 6009 PMAP_LOCK(dst_pmap); 6010 PMAP_LOCK(src_pmap); 6011 } else { 6012 PMAP_LOCK(src_pmap); 6013 PMAP_LOCK(dst_pmap); 6014 } 6015 sched_pin(); 6016 for (addr = src_addr; addr < end_addr; addr = nextva) { 6017 pt2_entry_t *src_pte2p, *dst_pte2p; 6018 vm_page_t dst_mpt2pg, src_mpt2pg; 6019 pt1_entry_t src_pte1; 6020 u_int pte1_idx; 6021 6022 KASSERT(addr < VM_MAXUSER_ADDRESS, 6023 ("%s: invalid to pmap_copy page tables", __func__)); 6024 6025 nextva = pte1_trunc(addr + PTE1_SIZE); 6026 if (nextva < addr) 6027 nextva = end_addr; 6028 6029 pte1_idx = pte1_index(addr); 6030 src_pte1 = src_pmap->pm_pt1[pte1_idx]; 6031 if (pte1_is_section(src_pte1)) { 6032 if ((addr & PTE1_OFFSET) != 0 || 6033 (addr + PTE1_SIZE) > end_addr) 6034 continue; 6035 if (dst_pmap->pm_pt1[pte1_idx] == 0 && 6036 (!pte1_is_managed(src_pte1) || 6037 pmap_pv_insert_pte1(dst_pmap, addr, 6038 pte1_pa(src_pte1)))) { 6039 dst_pmap->pm_pt1[pte1_idx] = src_pte1 & 6040 ~PTE1_W; 6041 dst_pmap->pm_stats.resident_count += 6042 PTE1_SIZE / PAGE_SIZE; 6043 pmap_pte1_mappings++; 6044 } 6045 continue; 6046 } else if (!pte1_is_link(src_pte1)) 6047 continue; 6048 6049 src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1)); 6050 6051 /* 6052 * We leave PT2s to be linked from PT1 even if they are not 6053 * referenced until all PT2s in a page are without reference. 6054 * 6055 * QQQ: It could be changed ... 6056 */ 6057#if 0 /* single_pt2_link_is_cleared */ 6058 KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0, 6059 ("%s: source page table page is unused", __func__)); 6060#else 6061 if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0) 6062 continue; 6063#endif 6064 if (nextva > end_addr) 6065 nextva = end_addr; 6066 6067 src_pte2p = pt2map_entry(addr); 6068 while (addr < nextva) { 6069 pt2_entry_t temp_pte2; 6070 temp_pte2 = pte2_load(src_pte2p); 6071 /* 6072 * we only virtual copy managed pages 6073 */ 6074 if (pte2_is_managed(temp_pte2)) { 6075 dst_mpt2pg = pmap_allocpte2(dst_pmap, addr, 6076 PMAP_ENTER_NOSLEEP); 6077 if (dst_mpt2pg == NULL) 6078 goto out; 6079 dst_pte2p = pmap_pte2_quick(dst_pmap, addr); 6080 if (!pte2_is_valid(pte2_load(dst_pte2p)) && 6081 pmap_try_insert_pv_entry(dst_pmap, addr, 6082 PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) { 6083 /* 6084 * Clear the wired, modified, and 6085 * accessed (referenced) bits 6086 * during the copy. 6087 */ 6088 temp_pte2 &= ~(PTE2_W | PTE2_A); 6089 temp_pte2 |= PTE2_NM; 6090 pte2_store(dst_pte2p, temp_pte2); 6091 dst_pmap->pm_stats.resident_count++; 6092 } else { 6093 SLIST_INIT(&free); 6094 if (pmap_unwire_pt2(dst_pmap, addr, 6095 dst_mpt2pg, &free)) { 6096 pmap_tlb_flush(dst_pmap, addr); 6097 pmap_free_zero_pages(&free); 6098 } 6099 goto out; 6100 } 6101 if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >= 6102 pt2_wirecount_get(src_mpt2pg, pte1_idx)) 6103 break; 6104 } 6105 addr += PAGE_SIZE; 6106 src_pte2p++; 6107 } 6108 } 6109out: 6110 sched_unpin(); 6111 rw_wunlock(&pvh_global_lock); 6112 PMAP_UNLOCK(src_pmap); 6113 PMAP_UNLOCK(dst_pmap); 6114} 6115 6116/* 6117 * Increase the starting virtual address of the given mapping if a 6118 * different alignment might result in more section mappings. 6119 */ 6120void 6121pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 6122 vm_offset_t *addr, vm_size_t size) 6123{ 6124 vm_offset_t pte1_offset; 6125 6126 if (size < PTE1_SIZE) 6127 return; 6128 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 6129 offset += ptoa(object->pg_color); 6130 pte1_offset = offset & PTE1_OFFSET; 6131 if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE || 6132 (*addr & PTE1_OFFSET) == pte1_offset) 6133 return; 6134 if ((*addr & PTE1_OFFSET) < pte1_offset) 6135 *addr = pte1_trunc(*addr) + pte1_offset; 6136 else 6137 *addr = pte1_roundup(*addr) + pte1_offset; 6138} 6139 6140void 6141pmap_activate(struct thread *td) 6142{ 6143 pmap_t pmap, oldpmap; 6144 u_int cpuid, ttb; 6145 6146 PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td)); 6147 6148 critical_enter(); 6149 pmap = vmspace_pmap(td->td_proc->p_vmspace); 6150 oldpmap = PCPU_GET(curpmap); 6151 cpuid = PCPU_GET(cpuid); 6152 6153#if defined(SMP) 6154 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 6155 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 6156#else 6157 CPU_CLR(cpuid, &oldpmap->pm_active); 6158 CPU_SET(cpuid, &pmap->pm_active); 6159#endif 6160 6161 ttb = pmap_ttb_get(pmap); 6162 6163 /* 6164 * pmap_activate is for the current thread on the current cpu 6165 */ 6166 td->td_pcb->pcb_pagedir = ttb; 6167 cp15_ttbr_set(ttb); 6168 PCPU_SET(curpmap, pmap); 6169 critical_exit(); 6170} 6171 6172/* 6173 * Perform the pmap work for mincore. 6174 */ 6175int 6176pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 6177{ 6178 pt1_entry_t *pte1p, pte1; 6179 pt2_entry_t *pte2p, pte2; 6180 vm_paddr_t pa; 6181 bool managed; 6182 int val; 6183 6184 PMAP_LOCK(pmap); 6185retry: 6186 pte1p = pmap_pte1(pmap, addr); 6187 pte1 = pte1_load(pte1p); 6188 if (pte1_is_section(pte1)) { 6189 pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET)); 6190 managed = pte1_is_managed(pte1); 6191 val = MINCORE_SUPER | MINCORE_INCORE; 6192 if (pte1_is_dirty(pte1)) 6193 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 6194 if (pte1 & PTE1_A) 6195 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 6196 } else if (pte1_is_link(pte1)) { 6197 pte2p = pmap_pte2(pmap, addr); 6198 pte2 = pte2_load(pte2p); 6199 pmap_pte2_release(pte2p); 6200 pa = pte2_pa(pte2); 6201 managed = pte2_is_managed(pte2); 6202 val = MINCORE_INCORE; 6203 if (pte2_is_dirty(pte2)) 6204 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 6205 if (pte2 & PTE2_A) 6206 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 6207 } else { 6208 managed = false; 6209 val = 0; 6210 } 6211 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 6212 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 6213 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 6214 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 6215 goto retry; 6216 } else 6217 PA_UNLOCK_COND(*locked_pa); 6218 PMAP_UNLOCK(pmap); 6219 return (val); 6220} 6221 6222void 6223pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) 6224{ 6225 vm_offset_t sva; 6226 uint32_t l2attr; 6227 6228 KASSERT((size & PAGE_MASK) == 0, 6229 ("%s: device mapping not page-sized", __func__)); 6230 6231 sva = va; 6232 l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE); 6233 while (size != 0) { 6234 pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr); 6235 va += PAGE_SIZE; 6236 pa += PAGE_SIZE; 6237 size -= PAGE_SIZE; 6238 } 6239 tlb_flush_range(sva, va - sva); 6240} 6241 6242void 6243pmap_kremove_device(vm_offset_t va, vm_size_t size) 6244{ 6245 vm_offset_t sva; 6246 6247 KASSERT((size & PAGE_MASK) == 0, 6248 ("%s: device mapping not page-sized", __func__)); 6249 6250 sva = va; 6251 while (size != 0) { 6252 pmap_kremove(va); 6253 va += PAGE_SIZE; 6254 size -= PAGE_SIZE; 6255 } 6256 tlb_flush_range(sva, va - sva); 6257} 6258 6259void 6260pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) 6261{ 6262 6263 pcb->pcb_pagedir = pmap_ttb_get(pmap); 6264} 6265 6266 6267/* 6268 * Clean L1 data cache range by physical address. 6269 * The range must be within a single page. 6270 */ 6271static void 6272pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr) 6273{ 6274 pt2_entry_t *cmap2_pte2p; 6275 struct pcpu *pc; 6276 6277 KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE, 6278 ("%s: not on single page", __func__)); 6279 6280 sched_pin(); 6281 pc = get_pcpu(); 6282 cmap2_pte2p = pc->pc_cmap2_pte2p; 6283 mtx_lock(&pc->pc_cmap_lock); 6284 if (pte2_load(cmap2_pte2p) != 0) 6285 panic("%s: CMAP2 busy", __func__); 6286 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr)); 6287 dcache_wb_pou((vm_offset_t)pc->pc_cmap2_addr + (pa & PAGE_MASK), size); 6288 pte2_clear(cmap2_pte2p); 6289 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 6290 sched_unpin(); 6291 mtx_unlock(&pc->pc_cmap_lock); 6292} 6293 6294/* 6295 * Sync instruction cache range which is not mapped yet. 6296 */ 6297void 6298cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 6299{ 6300 uint32_t len, offset; 6301 vm_page_t m; 6302 6303 /* Write back d-cache on given address range. */ 6304 offset = pa & PAGE_MASK; 6305 for ( ; size != 0; size -= len, pa += len, offset = 0) { 6306 len = min(PAGE_SIZE - offset, size); 6307 m = PHYS_TO_VM_PAGE(pa); 6308 KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", 6309 __func__, pa)); 6310 pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); 6311 } 6312 /* 6313 * I-cache is VIPT. Only way how to flush all virtual mappings 6314 * on given physical address is to invalidate all i-cache. 6315 */ 6316 icache_inv_all(); 6317} 6318 6319void 6320pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size) 6321{ 6322 6323 /* Write back d-cache on given address range. */ 6324 if (va >= VM_MIN_KERNEL_ADDRESS) { 6325 dcache_wb_pou(va, size); 6326 } else { 6327 uint32_t len, offset; 6328 vm_paddr_t pa; 6329 vm_page_t m; 6330 6331 offset = va & PAGE_MASK; 6332 for ( ; size != 0; size -= len, va += len, offset = 0) { 6333 pa = pmap_extract(pmap, va); /* offset is preserved */ 6334 len = min(PAGE_SIZE - offset, size); 6335 m = PHYS_TO_VM_PAGE(pa); 6336 KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", 6337 __func__, pa)); 6338 pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); 6339 } 6340 } 6341 /* 6342 * I-cache is VIPT. Only way how to flush all virtual mappings 6343 * on given physical address is to invalidate all i-cache. 6344 */ 6345 icache_inv_all(); 6346} 6347 6348/* 6349 * The implementation of pmap_fault() uses IN_RANGE2() macro which 6350 * depends on the fact that given range size is a power of 2. 6351 */ 6352CTASSERT(powerof2(NB_IN_PT1)); 6353CTASSERT(powerof2(PT2MAP_SIZE)); 6354 6355#define IN_RANGE2(addr, start, size) \ 6356 ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1))) 6357 6358/* 6359 * Handle access and R/W emulation faults. 6360 */ 6361int 6362pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode) 6363{ 6364 pt1_entry_t *pte1p, pte1; 6365 pt2_entry_t *pte2p, pte2; 6366 6367 if (pmap == NULL) 6368 pmap = kernel_pmap; 6369 6370 /* 6371 * In kernel, we should never get abort with FAR which is in range of 6372 * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here 6373 * and print out a useful abort message and even get to the debugger 6374 * otherwise it likely ends with never ending loop of aborts. 6375 */ 6376 if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) { 6377 /* 6378 * All L1 tables should always be mapped and present. 6379 * However, we check only current one herein. For user mode, 6380 * only permission abort from malicious user is not fatal. 6381 * And alignment abort as it may have higher priority. 6382 */ 6383 if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) { 6384 CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x", 6385 __func__, pmap, pmap->pm_pt1, far); 6386 panic("%s: pm_pt1 abort", __func__); 6387 } 6388 return (KERN_INVALID_ADDRESS); 6389 } 6390 if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) { 6391 /* 6392 * PT2MAP should be always mapped and present in current 6393 * L1 table. However, only existing L2 tables are mapped 6394 * in PT2MAP. For user mode, only L2 translation abort and 6395 * permission abort from malicious user is not fatal. 6396 * And alignment abort as it may have higher priority. 6397 */ 6398 if (!usermode || (idx != FAULT_ALIGN && 6399 idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) { 6400 CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x", 6401 __func__, pmap, PT2MAP, far); 6402 panic("%s: PT2MAP abort", __func__); 6403 } 6404 return (KERN_INVALID_ADDRESS); 6405 } 6406 6407 /* 6408 * A pmap lock is used below for handling of access and R/W emulation 6409 * aborts. They were handled by atomic operations before so some 6410 * analysis of new situation is needed to answer the following question: 6411 * Is it safe to use the lock even for these aborts? 6412 * 6413 * There may happen two cases in general: 6414 * 6415 * (1) Aborts while the pmap lock is locked already - this should not 6416 * happen as pmap lock is not recursive. However, under pmap lock only 6417 * internal kernel data should be accessed and such data should be 6418 * mapped with A bit set and NM bit cleared. If double abort happens, 6419 * then a mapping of data which has caused it must be fixed. Further, 6420 * all new mappings are always made with A bit set and the bit can be 6421 * cleared only on managed mappings. 6422 * 6423 * (2) Aborts while another lock(s) is/are locked - this already can 6424 * happen. However, there is no difference here if it's either access or 6425 * R/W emulation abort, or if it's some other abort. 6426 */ 6427 6428 PMAP_LOCK(pmap); 6429#ifdef SMP 6430 /* 6431 * Special treatment is due to break-before-make approach done when 6432 * pte1 is updated for userland mapping during section promotion or 6433 * demotion. If not caught here, pmap_enter() can find a section 6434 * mapping on faulting address. That is not allowed. 6435 */ 6436 if (idx == FAULT_TRAN_L1 && usermode && cp15_ats1cur_check(far) == 0) { 6437 PMAP_UNLOCK(pmap); 6438 return (KERN_SUCCESS); 6439 } 6440#endif 6441 /* 6442 * Accesss bits for page and section. Note that the entry 6443 * is not in TLB yet, so TLB flush is not necessary. 6444 * 6445 * QQQ: This is hardware emulation, we do not call userret() 6446 * for aborts from user mode. 6447 */ 6448 if (idx == FAULT_ACCESS_L2) { 6449 pte2p = pt2map_entry(far); 6450 pte2 = pte2_load(pte2p); 6451 if (pte2_is_valid(pte2)) { 6452 pte2_store(pte2p, pte2 | PTE2_A); 6453 PMAP_UNLOCK(pmap); 6454 return (KERN_SUCCESS); 6455 } 6456 } 6457 if (idx == FAULT_ACCESS_L1) { 6458 pte1p = pmap_pte1(pmap, far); 6459 pte1 = pte1_load(pte1p); 6460 if (pte1_is_section(pte1)) { 6461 pte1_store(pte1p, pte1 | PTE1_A); 6462 PMAP_UNLOCK(pmap); 6463 return (KERN_SUCCESS); 6464 } 6465 } 6466 6467 /* 6468 * Handle modify bits for page and section. Note that the modify 6469 * bit is emulated by software. So PTEx_RO is software read only 6470 * bit and PTEx_NM flag is real hardware read only bit. 6471 * 6472 * QQQ: This is hardware emulation, we do not call userret() 6473 * for aborts from user mode. 6474 */ 6475 if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) { 6476 pte2p = pt2map_entry(far); 6477 pte2 = pte2_load(pte2p); 6478 if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) && 6479 (pte2 & PTE2_NM)) { 6480 pte2_store(pte2p, pte2 & ~PTE2_NM); 6481 tlb_flush(trunc_page(far)); 6482 PMAP_UNLOCK(pmap); 6483 return (KERN_SUCCESS); 6484 } 6485 } 6486 if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) { 6487 pte1p = pmap_pte1(pmap, far); 6488 pte1 = pte1_load(pte1p); 6489 if (pte1_is_section(pte1) && !(pte1 & PTE1_RO) && 6490 (pte1 & PTE1_NM)) { 6491 pte1_store(pte1p, pte1 & ~PTE1_NM); 6492 tlb_flush(pte1_trunc(far)); 6493 PMAP_UNLOCK(pmap); 6494 return (KERN_SUCCESS); 6495 } 6496 } 6497 6498 /* 6499 * QQQ: The previous code, mainly fast handling of access and 6500 * modify bits aborts, could be moved to ASM. Now we are 6501 * starting to deal with not fast aborts. 6502 */ 6503 6504#ifdef INVARIANTS 6505 /* 6506 * Read an entry in PT2TAB associated with both pmap and far. 6507 * It's safe because PT2TAB is always mapped. 6508 */ 6509 pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far)); 6510 if (pte2_is_valid(pte2)) { 6511 /* 6512 * Now, when we know that L2 page table is allocated, 6513 * we can use PT2MAP to get L2 page table entry. 6514 */ 6515 pte2 = pte2_load(pt2map_entry(far)); 6516 if (pte2_is_valid(pte2)) { 6517 /* 6518 * If L2 page table entry is valid, make sure that 6519 * L1 page table entry is valid too. Note that we 6520 * leave L2 page entries untouched when promoted. 6521 */ 6522 pte1 = pte1_load(pmap_pte1(pmap, far)); 6523 if (!pte1_is_valid(pte1)) { 6524 panic("%s: missing L1 page entry (%p, %#x)", 6525 __func__, pmap, far); 6526 } 6527 } 6528 } 6529#endif 6530 PMAP_UNLOCK(pmap); 6531 return (KERN_FAILURE); 6532} 6533 6534#if defined(PMAP_DEBUG) 6535/* 6536 * Reusing of KVA used in pmap_zero_page function !!! 6537 */ 6538static void 6539pmap_zero_page_check(vm_page_t m) 6540{ 6541 pt2_entry_t *cmap2_pte2p; 6542 uint32_t *p, *end; 6543 struct pcpu *pc; 6544 6545 sched_pin(); 6546 pc = get_pcpu(); 6547 cmap2_pte2p = pc->pc_cmap2_pte2p; 6548 mtx_lock(&pc->pc_cmap_lock); 6549 if (pte2_load(cmap2_pte2p) != 0) 6550 panic("%s: CMAP2 busy", __func__); 6551 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 6552 vm_page_pte2_attr(m))); 6553 end = (uint32_t*)(pc->pc_cmap2_addr + PAGE_SIZE); 6554 for (p = (uint32_t*)pc->pc_cmap2_addr; p < end; p++) 6555 if (*p != 0) 6556 panic("%s: page %p not zero, va: %p", __func__, m, 6557 pc->pc_cmap2_addr); 6558 pte2_clear(cmap2_pte2p); 6559 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 6560 sched_unpin(); 6561 mtx_unlock(&pc->pc_cmap_lock); 6562} 6563 6564int 6565pmap_pid_dump(int pid) 6566{ 6567 pmap_t pmap; 6568 struct proc *p; 6569 int npte2 = 0; 6570 int i, j, index; 6571 6572 sx_slock(&allproc_lock); 6573 FOREACH_PROC_IN_SYSTEM(p) { 6574 if (p->p_pid != pid || p->p_vmspace == NULL) 6575 continue; 6576 index = 0; 6577 pmap = vmspace_pmap(p->p_vmspace); 6578 for (i = 0; i < NPTE1_IN_PT1; i++) { 6579 pt1_entry_t pte1; 6580 pt2_entry_t *pte2p, pte2; 6581 vm_offset_t base, va; 6582 vm_paddr_t pa; 6583 vm_page_t m; 6584 6585 base = i << PTE1_SHIFT; 6586 pte1 = pte1_load(&pmap->pm_pt1[i]); 6587 6588 if (pte1_is_section(pte1)) { 6589 /* 6590 * QQQ: Do something here! 6591 */ 6592 } else if (pte1_is_link(pte1)) { 6593 for (j = 0; j < NPTE2_IN_PT2; j++) { 6594 va = base + (j << PAGE_SHIFT); 6595 if (va >= VM_MIN_KERNEL_ADDRESS) { 6596 if (index) { 6597 index = 0; 6598 printf("\n"); 6599 } 6600 sx_sunlock(&allproc_lock); 6601 return (npte2); 6602 } 6603 pte2p = pmap_pte2(pmap, va); 6604 pte2 = pte2_load(pte2p); 6605 pmap_pte2_release(pte2p); 6606 if (!pte2_is_valid(pte2)) 6607 continue; 6608 6609 pa = pte2_pa(pte2); 6610 m = PHYS_TO_VM_PAGE(pa); 6611 printf("va: 0x%x, pa: 0x%x, h: %d, w:" 6612 " %d, f: 0x%x", va, pa, 6613 m->hold_count, m->wire_count, 6614 m->flags); 6615 npte2++; 6616 index++; 6617 if (index >= 2) { 6618 index = 0; 6619 printf("\n"); 6620 } else { 6621 printf(" "); 6622 } 6623 } 6624 } 6625 } 6626 } 6627 sx_sunlock(&allproc_lock); 6628 return (npte2); 6629} 6630 6631#endif 6632 6633#ifdef DDB 6634static pt2_entry_t * 6635pmap_pte2_ddb(pmap_t pmap, vm_offset_t va) 6636{ 6637 pt1_entry_t pte1; 6638 vm_paddr_t pt2pg_pa; 6639 6640 pte1 = pte1_load(pmap_pte1(pmap, va)); 6641 if (!pte1_is_link(pte1)) 6642 return (NULL); 6643 6644 if (pmap_is_current(pmap)) 6645 return (pt2map_entry(va)); 6646 6647 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 6648 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 6649 if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) { 6650 pte2_store(PMAP3, PTE2_KPT(pt2pg_pa)); 6651#ifdef SMP 6652 PMAP3cpu = PCPU_GET(cpuid); 6653#endif 6654 tlb_flush_local((vm_offset_t)PADDR3); 6655 } 6656#ifdef SMP 6657 else if (PMAP3cpu != PCPU_GET(cpuid)) { 6658 PMAP3cpu = PCPU_GET(cpuid); 6659 tlb_flush_local((vm_offset_t)PADDR3); 6660 } 6661#endif 6662 return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 6663} 6664 6665static void 6666dump_pmap(pmap_t pmap) 6667{ 6668 6669 printf("pmap %p\n", pmap); 6670 printf(" pm_pt1: %p\n", pmap->pm_pt1); 6671 printf(" pm_pt2tab: %p\n", pmap->pm_pt2tab); 6672 printf(" pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]); 6673} 6674 6675DB_SHOW_COMMAND(pmaps, pmap_list_pmaps) 6676{ 6677 6678 pmap_t pmap; 6679 LIST_FOREACH(pmap, &allpmaps, pm_list) { 6680 dump_pmap(pmap); 6681 } 6682} 6683 6684static int 6685pte2_class(pt2_entry_t pte2) 6686{ 6687 int cls; 6688 6689 cls = (pte2 >> 2) & 0x03; 6690 cls |= (pte2 >> 4) & 0x04; 6691 return (cls); 6692} 6693 6694static void 6695dump_section(pmap_t pmap, uint32_t pte1_idx) 6696{ 6697} 6698 6699static void 6700dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) 6701{ 6702 uint32_t i; 6703 vm_offset_t va; 6704 pt2_entry_t *pte2p, pte2; 6705 vm_page_t m; 6706 6707 va = pte1_idx << PTE1_SHIFT; 6708 pte2p = pmap_pte2_ddb(pmap, va); 6709 for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) { 6710 pte2 = pte2_load(pte2p); 6711 if (pte2 == 0) 6712 continue; 6713 if (!pte2_is_valid(pte2)) { 6714 printf(" 0x%08X: 0x%08X", va, pte2); 6715 if (!invalid_ok) 6716 printf(" - not valid !!!"); 6717 printf("\n"); 6718 continue; 6719 } 6720 m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); 6721 printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, 6722 pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); 6723 if (m != NULL) { 6724 printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, 6725 m->hold_count, m->wire_count, m->flags); 6726 } else { 6727 printf("\n"); 6728 } 6729 } 6730} 6731 6732static __inline boolean_t 6733is_pv_chunk_space(vm_offset_t va) 6734{ 6735 6736 if ((((vm_offset_t)pv_chunkbase) <= va) && 6737 (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks))) 6738 return (TRUE); 6739 return (FALSE); 6740} 6741 6742DB_SHOW_COMMAND(pmap, pmap_pmap_print) 6743{ 6744 /* XXX convert args. */ 6745 pmap_t pmap = (pmap_t)addr; 6746 pt1_entry_t pte1; 6747 pt2_entry_t pte2; 6748 vm_offset_t va, eva; 6749 vm_page_t m; 6750 uint32_t i; 6751 boolean_t invalid_ok, dump_link_ok, dump_pv_chunk; 6752 6753 if (have_addr) { 6754 pmap_t pm; 6755 6756 LIST_FOREACH(pm, &allpmaps, pm_list) 6757 if (pm == pmap) break; 6758 if (pm == NULL) { 6759 printf("given pmap %p is not in allpmaps list\n", pmap); 6760 return; 6761 } 6762 } else 6763 pmap = PCPU_GET(curpmap); 6764 6765 eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF; 6766 dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */ 6767 6768 printf("pmap: 0x%08X\n", (uint32_t)pmap); 6769 printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); 6770 printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); 6771 6772 for(i = 0; i < NPTE1_IN_PT1; i++) { 6773 pte1 = pte1_load(&pmap->pm_pt1[i]); 6774 if (pte1 == 0) 6775 continue; 6776 va = i << PTE1_SHIFT; 6777 if (va >= eva) 6778 break; 6779 6780 if (pte1_is_section(pte1)) { 6781 printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1, 6782 !!(pte1 & PTE1_S), !(pte1 & PTE1_NG)); 6783 dump_section(pmap, i); 6784 } else if (pte1_is_link(pte1)) { 6785 dump_link_ok = TRUE; 6786 invalid_ok = FALSE; 6787 pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); 6788 m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 6789 printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p", 6790 va, pte1, pte2, m); 6791 if (is_pv_chunk_space(va)) { 6792 printf(" - pv_chunk space"); 6793 if (dump_pv_chunk) 6794 invalid_ok = TRUE; 6795 else 6796 dump_link_ok = FALSE; 6797 } 6798 else if (m != NULL) 6799 printf(" w:%d w2:%u", m->wire_count, 6800 pt2_wirecount_get(m, pte1_index(va))); 6801 if (pte2 == 0) 6802 printf(" !!! pt2tab entry is ZERO"); 6803 else if (pte2_pa(pte1) != pte2_pa(pte2)) 6804 printf(" !!! pt2tab entry is DIFFERENT - m: %p", 6805 PHYS_TO_VM_PAGE(pte2_pa(pte2))); 6806 printf("\n"); 6807 if (dump_link_ok) 6808 dump_link(pmap, i, invalid_ok); 6809 } else 6810 printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); 6811 } 6812} 6813 6814static void 6815dump_pt2tab(pmap_t pmap) 6816{ 6817 uint32_t i; 6818 pt2_entry_t pte2; 6819 vm_offset_t va; 6820 vm_paddr_t pa; 6821 vm_page_t m; 6822 6823 printf("PT2TAB:\n"); 6824 for (i = 0; i < PT2TAB_ENTRIES; i++) { 6825 pte2 = pte2_load(&pmap->pm_pt2tab[i]); 6826 if (!pte2_is_valid(pte2)) 6827 continue; 6828 va = i << PT2TAB_SHIFT; 6829 pa = pte2_pa(pte2); 6830 m = PHYS_TO_VM_PAGE(pa); 6831 printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, 6832 pte2_class(pte2), !!(pte2 & PTE2_S), m); 6833 if (m != NULL) 6834 printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", 6835 m->hold_count, m->wire_count, m->flags, m->pindex); 6836 printf("\n"); 6837 } 6838} 6839 6840DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print) 6841{ 6842 /* XXX convert args. */ 6843 pmap_t pmap = (pmap_t)addr; 6844 pt1_entry_t pte1; 6845 pt2_entry_t pte2; 6846 vm_offset_t va; 6847 uint32_t i, start; 6848 6849 if (have_addr) { 6850 printf("supported only on current pmap\n"); 6851 return; 6852 } 6853 6854 pmap = PCPU_GET(curpmap); 6855 printf("curpmap: 0x%08X\n", (uint32_t)pmap); 6856 printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); 6857 printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); 6858 6859 start = pte1_index((vm_offset_t)PT2MAP); 6860 for (i = start; i < (start + NPT2_IN_PT2TAB); i++) { 6861 pte1 = pte1_load(&pmap->pm_pt1[i]); 6862 if (pte1 == 0) 6863 continue; 6864 va = i << PTE1_SHIFT; 6865 if (pte1_is_section(pte1)) { 6866 printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1, 6867 !!(pte1 & PTE1_S)); 6868 dump_section(pmap, i); 6869 } else if (pte1_is_link(pte1)) { 6870 pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); 6871 printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va, 6872 pte1, pte2); 6873 if (pte2 == 0) 6874 printf(" !!! pt2tab entry is ZERO\n"); 6875 } else 6876 printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); 6877 } 6878 dump_pt2tab(pmap); 6879} 6880#endif 6881