1/*	$NetBSD: pmap.c,v 1.80 2024/05/06 07:18:19 skrll Exp $	*/
2
3/*-
4 * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and by Chris G. Demetriou.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1992, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department and Ralph Campbell.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)pmap.c	8.4 (Berkeley) 1/26/94
66 */
67
68#include <sys/cdefs.h>
69
70__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.80 2024/05/06 07:18:19 skrll Exp $");
71
72/*
73 *	Manages physical address maps.
74 *
75 *	In addition to hardware address maps, this
76 *	module is called upon to provide software-use-only
77 *	maps which may or may not be stored in the same
78 *	form as hardware maps.  These pseudo-maps are
79 *	used to store intermediate results from copy
80 *	operations to and from address spaces.
81 *
82 *	Since the information managed by this module is
83 *	also stored by the logical address mapping module,
84 *	this module may throw away valid virtual-to-physical
85 *	mappings at almost any time.  However, invalidations
86 *	of virtual-to-physical mappings must be done as
87 *	requested.
88 *
89 *	In order to cope with hardware architectures which
90 *	make virtual-to-physical map invalidates expensive,
91 *	this module may delay invalidate or reduced protection
92 *	operations until such time as they are actually
93 *	necessary.  This module is given full information as
94 *	to which processors are currently using which maps,
95 *	and to when physical maps must be made correct.
96 */
97
98#include "opt_ddb.h"
99#include "opt_efi.h"
100#include "opt_modular.h"
101#include "opt_multiprocessor.h"
102#include "opt_sysv.h"
103#include "opt_uvmhist.h"
104
105#define __PMAP_PRIVATE
106
107#include <sys/param.h>
108
109#include <sys/asan.h>
110#include <sys/atomic.h>
111#include <sys/buf.h>
112#include <sys/cpu.h>
113#include <sys/mutex.h>
114#include <sys/pool.h>
115
116#include <uvm/uvm.h>
117#include <uvm/uvm_physseg.h>
118#include <uvm/pmap/pmap_pvt.h>
119
120#if defined(MULTIPROCESSOR) && defined(PMAP_VIRTUAL_CACHE_ALIASES) \
121    && !defined(PMAP_NO_PV_UNCACHED)
122#error PMAP_VIRTUAL_CACHE_ALIASES with MULTIPROCESSOR requires \
123 PMAP_NO_PV_UNCACHED to be defined
124#endif
125
126#if defined(PMAP_PV_TRACK_ONLY_STUBS)
127#undef	__HAVE_PMAP_PV_TRACK
128#endif
129
130PMAP_COUNTER(remove_kernel_calls, "remove kernel calls");
131PMAP_COUNTER(remove_kernel_pages, "kernel pages unmapped");
132PMAP_COUNTER(remove_user_calls, "remove user calls");
133PMAP_COUNTER(remove_user_pages, "user pages unmapped");
134PMAP_COUNTER(remove_flushes, "remove cache flushes");
135PMAP_COUNTER(remove_tlb_ops, "remove tlb ops");
136PMAP_COUNTER(remove_pvfirst, "remove pv first");
137PMAP_COUNTER(remove_pvsearch, "remove pv search");
138
139PMAP_COUNTER(prefer_requests, "prefer requests");
140PMAP_COUNTER(prefer_adjustments, "prefer adjustments");
141
142PMAP_COUNTER(idlezeroed_pages, "pages idle zeroed");
143
144PMAP_COUNTER(kenter_pa, "kernel fast mapped pages");
145PMAP_COUNTER(kenter_pa_bad, "kernel fast mapped pages (bad color)");
146PMAP_COUNTER(kenter_pa_unmanaged, "kernel fast mapped unmanaged pages");
147PMAP_COUNTER(kremove_pages, "kernel fast unmapped pages");
148
149PMAP_COUNTER(page_cache_evictions, "pages changed to uncacheable");
150PMAP_COUNTER(page_cache_restorations, "pages changed to cacheable");
151
152PMAP_COUNTER(kernel_mappings_bad, "kernel pages mapped (bad color)");
153PMAP_COUNTER(user_mappings_bad, "user pages mapped (bad color)");
154PMAP_COUNTER(kernel_mappings, "kernel pages mapped");
155PMAP_COUNTER(user_mappings, "user pages mapped");
156PMAP_COUNTER(user_mappings_changed, "user mapping changed");
157PMAP_COUNTER(kernel_mappings_changed, "kernel mapping changed");
158PMAP_COUNTER(uncached_mappings, "uncached pages mapped");
159PMAP_COUNTER(unmanaged_mappings, "unmanaged pages mapped");
160PMAP_COUNTER(pvtracked_mappings, "pv-tracked unmanaged pages mapped");
161PMAP_COUNTER(efirt_mappings, "EFI RT pages mapped");
162PMAP_COUNTER(managed_mappings, "managed pages mapped");
163PMAP_COUNTER(mappings, "pages mapped");
164PMAP_COUNTER(remappings, "pages remapped");
165PMAP_COUNTER(unmappings, "pages unmapped");
166PMAP_COUNTER(primary_mappings, "page initial mappings");
167PMAP_COUNTER(primary_unmappings, "page final unmappings");
168PMAP_COUNTER(tlb_hit, "page mapping");
169
170PMAP_COUNTER(exec_mappings, "exec pages mapped");
171PMAP_COUNTER(exec_synced_mappings, "exec pages synced");
172PMAP_COUNTER(exec_synced_remove, "exec pages synced (PR)");
173PMAP_COUNTER(exec_synced_clear_modify, "exec pages synced (CM)");
174PMAP_COUNTER(exec_synced_page_protect, "exec pages synced (PP)");
175PMAP_COUNTER(exec_synced_protect, "exec pages synced (P)");
176PMAP_COUNTER(exec_uncached_page_protect, "exec pages uncached (PP)");
177PMAP_COUNTER(exec_uncached_clear_modify, "exec pages uncached (CM)");
178PMAP_COUNTER(exec_uncached_zero_page, "exec pages uncached (ZP)");
179PMAP_COUNTER(exec_uncached_copy_page, "exec pages uncached (CP)");
180PMAP_COUNTER(exec_uncached_remove, "exec pages uncached (PR)");
181
182PMAP_COUNTER(create, "creates");
183PMAP_COUNTER(reference, "references");
184PMAP_COUNTER(dereference, "dereferences");
185PMAP_COUNTER(destroy, "destroyed");
186PMAP_COUNTER(activate, "activations");
187PMAP_COUNTER(deactivate, "deactivations");
188PMAP_COUNTER(update, "updates");
189#ifdef MULTIPROCESSOR
190PMAP_COUNTER(shootdown_ipis, "shootdown IPIs");
191#endif
192PMAP_COUNTER(unwire, "unwires");
193PMAP_COUNTER(copy, "copies");
194PMAP_COUNTER(clear_modify, "clear_modifies");
195PMAP_COUNTER(protect, "protects");
196PMAP_COUNTER(page_protect, "page_protects");
197
198#define PMAP_ASID_RESERVED 0
199CTASSERT(PMAP_ASID_RESERVED == 0);
200
201#ifdef PMAP_HWPAGEWALKER
202#ifndef PMAP_PDETAB_ALIGN
203#define PMAP_PDETAB_ALIGN	/* nothing */
204#endif
205
206#ifdef _LP64
207pmap_pdetab_t	pmap_kstart_pdetab PMAP_PDETAB_ALIGN; /* first mid-level pdetab for kernel */
208#endif
209pmap_pdetab_t	pmap_kern_pdetab PMAP_PDETAB_ALIGN;
210#endif
211
212#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
213#ifndef PMAP_SEGTAB_ALIGN
214#define PMAP_SEGTAB_ALIGN	/* nothing */
215#endif
216#ifdef _LP64
217pmap_segtab_t	pmap_kstart_segtab PMAP_SEGTAB_ALIGN; /* first mid-level segtab for kernel */
218#endif
219pmap_segtab_t	pmap_kern_segtab PMAP_SEGTAB_ALIGN = { /* top level segtab for kernel */
220#ifdef _LP64
221	.seg_seg[(VM_MIN_KERNEL_ADDRESS >> XSEGSHIFT) & (NSEGPG - 1)] = &pmap_kstart_segtab,
222#endif
223};
224#endif
225
226struct pmap_kernel kernel_pmap_store = {
227	.kernel_pmap = {
228		.pm_refcnt = 1,
229#ifdef PMAP_HWPAGEWALKER
230		.pm_pdetab = PMAP_INVALID_PDETAB_ADDRESS,
231#endif
232#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
233		.pm_segtab = &pmap_kern_segtab,
234#endif
235		.pm_minaddr = VM_MIN_KERNEL_ADDRESS,
236		.pm_maxaddr = VM_MAX_KERNEL_ADDRESS,
237	},
238};
239
240struct pmap * const kernel_pmap_ptr = &kernel_pmap_store.kernel_pmap;
241
242#if defined(EFI_RUNTIME)
243static struct pmap efirt_pmap;
244
245pmap_t
246pmap_efirt(void)
247{
248	return &efirt_pmap;
249}
250#else
251static inline pt_entry_t
252pte_make_enter_efirt(paddr_t pa, vm_prot_t prot, u_int flags)
253{
254	panic("not supported");
255}
256#endif
257
258/* The current top of kernel VM - gets updated by pmap_growkernel */
259vaddr_t pmap_curmaxkvaddr;
260
261struct pmap_limits pmap_limits = {	/* VA and PA limits */
262	.virtual_start = VM_MIN_KERNEL_ADDRESS,
263	.virtual_end = VM_MAX_KERNEL_ADDRESS,
264};
265
266#ifdef UVMHIST
267static struct kern_history_ent pmapexechistbuf[10000];
268static struct kern_history_ent pmaphistbuf[10000];
269static struct kern_history_ent pmapxtabhistbuf[5000];
270UVMHIST_DEFINE(pmapexechist) = UVMHIST_INITIALIZER(pmapexechist, pmapexechistbuf);
271UVMHIST_DEFINE(pmaphist) = UVMHIST_INITIALIZER(pmaphist, pmaphistbuf);
272UVMHIST_DEFINE(pmapxtabhist) = UVMHIST_INITIALIZER(pmapxtabhist, pmapxtabhistbuf);
273#endif
274
275/*
276 * The pools from which pmap structures and sub-structures are allocated.
277 */
278struct pool pmap_pmap_pool;
279struct pool pmap_pv_pool;
280
281#ifndef PMAP_PV_LOWAT
282#define	PMAP_PV_LOWAT	16
283#endif
284int	pmap_pv_lowat = PMAP_PV_LOWAT;
285
286bool	pmap_initialized = false;
287#define	PMAP_PAGE_COLOROK_P(a, b) \
288		((((int)(a) ^ (int)(b)) & pmap_page_colormask) == 0)
289u_int	pmap_page_colormask;
290
291#define PAGE_IS_MANAGED(pa)	(pmap_initialized && uvm_pageismanaged(pa))
292
293#define PMAP_IS_ACTIVE(pm)						\
294	((pm) == pmap_kernel() ||					\
295	 (pm) == curlwp->l_proc->p_vmspace->vm_map.pmap)
296
297/* Forward function declarations */
298void pmap_page_remove(struct vm_page_md *);
299static void pmap_pvlist_check(struct vm_page_md *);
300void pmap_remove_pv(pmap_t, vaddr_t, struct vm_page *, bool);
301void pmap_enter_pv(pmap_t, vaddr_t, paddr_t, struct vm_page_md *, pt_entry_t *, u_int);
302
303/*
304 * PV table management functions.
305 */
306void	*pmap_pv_page_alloc(struct pool *, int);
307void	pmap_pv_page_free(struct pool *, void *);
308
309struct pool_allocator pmap_pv_page_allocator = {
310	pmap_pv_page_alloc, pmap_pv_page_free, 0,
311};
312
313#define	pmap_pv_alloc()		pool_get(&pmap_pv_pool, PR_NOWAIT)
314#define	pmap_pv_free(pv)	pool_put(&pmap_pv_pool, (pv))
315
316#ifndef PMAP_NEED_TLB_MISS_LOCK
317
318#if defined(PMAP_MD_NEED_TLB_MISS_LOCK) || defined(DEBUG)
319#define	PMAP_NEED_TLB_MISS_LOCK
320#endif /* PMAP_MD_NEED_TLB_MISS_LOCK || DEBUG */
321
322#endif /* PMAP_NEED_TLB_MISS_LOCK */
323
324#ifdef PMAP_NEED_TLB_MISS_LOCK
325
326#ifdef PMAP_MD_NEED_TLB_MISS_LOCK
327#define	pmap_tlb_miss_lock_init()	__nothing /* MD code deals with this */
328#define	pmap_tlb_miss_lock_enter()	pmap_md_tlb_miss_lock_enter()
329#define	pmap_tlb_miss_lock_exit()	pmap_md_tlb_miss_lock_exit()
330#else
331kmutex_t pmap_tlb_miss_lock		__cacheline_aligned;
332
333static void
334pmap_tlb_miss_lock_init(void)
335{
336	mutex_init(&pmap_tlb_miss_lock, MUTEX_SPIN, IPL_HIGH);
337}
338
339static inline void
340pmap_tlb_miss_lock_enter(void)
341{
342	mutex_spin_enter(&pmap_tlb_miss_lock);
343}
344
345static inline void
346pmap_tlb_miss_lock_exit(void)
347{
348	mutex_spin_exit(&pmap_tlb_miss_lock);
349}
350#endif /* PMAP_MD_NEED_TLB_MISS_LOCK */
351
352#else
353
354#define	pmap_tlb_miss_lock_init()	__nothing
355#define	pmap_tlb_miss_lock_enter()	__nothing
356#define	pmap_tlb_miss_lock_exit()	__nothing
357
358#endif /* PMAP_NEED_TLB_MISS_LOCK */
359
360#ifndef MULTIPROCESSOR
361kmutex_t pmap_pvlist_mutex	__cacheline_aligned;
362#endif
363
364/*
365 * Debug functions.
366 */
367
368#ifdef DEBUG
369
370bool pmap_stealdebug = false;
371
372#define DPRINTF(...)							     \
373    do { if (pmap_stealdebug) { printf(__VA_ARGS__); } } while (false)
374
375static inline void
376pmap_asid_check(pmap_t pm, const char *func)
377{
378	if (!PMAP_IS_ACTIVE(pm))
379		return;
380
381	struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(curcpu()));
382	tlb_asid_t asid = tlb_get_asid();
383	if (asid != pai->pai_asid)
384		panic("%s: inconsistency for active TLB update: %u <-> %u",
385		    func, asid, pai->pai_asid);
386}
387#else
388
389#define DPRINTF(...) __nothing
390
391#endif
392
393static void
394pmap_addr_range_check(pmap_t pmap, vaddr_t sva, vaddr_t eva, const char *func)
395{
396#ifdef DEBUG
397	if (pmap == pmap_kernel()) {
398		if (sva < VM_MIN_KERNEL_ADDRESS)
399			panic("%s: kva %#"PRIxVADDR" not in range",
400			    func, sva);
401		if (eva >= pmap_limits.virtual_end)
402			panic("%s: kva %#"PRIxVADDR" not in range",
403			    func, eva);
404	} else {
405		if (eva > VM_MAXUSER_ADDRESS)
406			panic("%s: uva %#"PRIxVADDR" not in range",
407			    func, eva);
408		pmap_asid_check(pmap, func);
409	}
410#endif
411}
412
413/*
414 * Misc. functions.
415 */
416
417bool
418pmap_page_clear_attributes(struct vm_page_md *mdpg, u_long clear_attributes)
419{
420	volatile u_long * const attrp = &mdpg->mdpg_attrs;
421
422#ifdef MULTIPROCESSOR
423	for (;;) {
424		u_long old_attr = *attrp;
425		if ((old_attr & clear_attributes) == 0)
426			return false;
427		u_long new_attr = old_attr & ~clear_attributes;
428		if (old_attr == atomic_cas_ulong(attrp, old_attr, new_attr))
429			return true;
430	}
431#else
432	u_long old_attr = *attrp;
433	if ((old_attr & clear_attributes) == 0)
434		return false;
435	*attrp &= ~clear_attributes;
436	return true;
437#endif
438}
439
440void
441pmap_page_set_attributes(struct vm_page_md *mdpg, u_long set_attributes)
442{
443#ifdef MULTIPROCESSOR
444	atomic_or_ulong(&mdpg->mdpg_attrs, set_attributes);
445#else
446	mdpg->mdpg_attrs |= set_attributes;
447#endif
448}
449
450static void
451pmap_page_syncicache(struct vm_page *pg)
452{
453	UVMHIST_FUNC(__func__);
454	UVMHIST_CALLED(pmaphist);
455#ifndef MULTIPROCESSOR
456	struct pmap * const curpmap = curlwp->l_proc->p_vmspace->vm_map.pmap;
457#endif
458	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
459	pv_entry_t pv = &mdpg->mdpg_first;
460	kcpuset_t *onproc;
461#ifdef MULTIPROCESSOR
462	kcpuset_create(&onproc, true);
463	KASSERT(onproc != NULL);
464#else
465	onproc = NULL;
466#endif
467	VM_PAGEMD_PVLIST_READLOCK(mdpg);
468	pmap_pvlist_check(mdpg);
469
470	UVMHIST_LOG(pmaphist, "pv %#jx pv_pmap %#jx", (uintptr_t)pv,
471	    (uintptr_t)pv->pv_pmap, 0, 0);
472
473	if (pv->pv_pmap != NULL) {
474		for (; pv != NULL; pv = pv->pv_next) {
475#ifdef MULTIPROCESSOR
476			UVMHIST_LOG(pmaphist, "pv %#jx pv_pmap %#jx",
477			    (uintptr_t)pv, (uintptr_t)pv->pv_pmap, 0, 0);
478			kcpuset_merge(onproc, pv->pv_pmap->pm_onproc);
479			if (kcpuset_match(onproc, kcpuset_running)) {
480				break;
481			}
482#else
483			if (pv->pv_pmap == curpmap) {
484				onproc = curcpu()->ci_kcpuset;
485				break;
486			}
487#endif
488		}
489	}
490	pmap_pvlist_check(mdpg);
491	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
492	kpreempt_disable();
493	pmap_md_page_syncicache(mdpg, onproc);
494	kpreempt_enable();
495#ifdef MULTIPROCESSOR
496	kcpuset_destroy(onproc);
497#endif
498}
499
500/*
501 * Define the initial bounds of the kernel virtual address space.
502 */
503void
504pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
505{
506	*vstartp = pmap_limits.virtual_start;
507	*vendp = pmap_limits.virtual_end;
508}
509
510vaddr_t
511pmap_growkernel(vaddr_t maxkvaddr)
512{
513	UVMHIST_FUNC(__func__);
514	UVMHIST_CALLARGS(pmaphist, "maxkvaddr=%#jx (%#jx)", maxkvaddr,
515	    pmap_curmaxkvaddr, 0, 0);
516
517	vaddr_t virtual_end = pmap_curmaxkvaddr;
518	maxkvaddr = pmap_round_seg(maxkvaddr) - 1;
519
520	/*
521	 * Don't exceed VM_MAX_KERNEL_ADDRESS!
522	 */
523	if (maxkvaddr == 0 || maxkvaddr > VM_MAX_KERNEL_ADDRESS)
524		maxkvaddr = VM_MAX_KERNEL_ADDRESS;
525
526	/*
527	 * Reserve PTEs for the new KVA space.
528	 */
529	for (; virtual_end < maxkvaddr; virtual_end += NBSEG) {
530		pmap_pte_reserve(pmap_kernel(), virtual_end, 0);
531	}
532
533	kasan_shadow_map((void *)pmap_curmaxkvaddr,
534	    (size_t)(virtual_end - pmap_curmaxkvaddr));
535
536	/*
537	 * Update new end.
538	 */
539	pmap_curmaxkvaddr = virtual_end;
540
541	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
542
543	return virtual_end;
544}
545
546/*
547 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
548 * This function allows for early dynamic memory allocation until the virtual
549 * memory system has been bootstrapped.  After that point, either kmem_alloc
550 * or malloc should be used.  This function works by stealing pages from the
551 * (to be) managed page pool, then implicitly mapping the pages (by using
552 * their direct mapped addresses) and zeroing them.
553 *
554 * It may be used once the physical memory segments have been pre-loaded
555 * into the vm_physmem[] array.  Early memory allocation MUST use this
556 * interface!  This cannot be used after vm_page_startup(), and will
557 * generate a panic if tried.
558 *
559 * Note that this memory will never be freed, and in essence it is wired
560 * down.
561 *
562 * We must adjust *vstartp and/or *vendp iff we use address space
563 * from the kernel virtual address range defined by pmap_virtual_space().
564 */
565vaddr_t
566pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
567{
568	size_t npgs;
569	paddr_t pa;
570	vaddr_t va;
571
572	uvm_physseg_t maybe_bank = UVM_PHYSSEG_TYPE_INVALID;
573
574	size = round_page(size);
575	npgs = atop(size);
576
577	DPRINTF("%s: need %zu pages\n", __func__, npgs);
578
579	for (uvm_physseg_t bank = uvm_physseg_get_first();
580	     uvm_physseg_valid_p(bank);
581	     bank = uvm_physseg_get_next(bank)) {
582
583		if (uvm.page_init_done == true)
584			panic("pmap_steal_memory: called _after_ bootstrap");
585
586		DPRINTF("%s: seg %"PRIxPHYSSEG": %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR"\n",
587		    __func__, bank,
588		    uvm_physseg_get_avail_start(bank), uvm_physseg_get_start(bank),
589		    uvm_physseg_get_avail_end(bank), uvm_physseg_get_end(bank));
590
591		if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank)
592		    || uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank)) {
593			DPRINTF("%s: seg %"PRIxPHYSSEG": bad start\n", __func__, bank);
594			continue;
595		}
596
597		if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank) < npgs) {
598			DPRINTF("%s: seg %"PRIxPHYSSEG": too small for %zu pages\n",
599			    __func__, bank, npgs);
600			continue;
601		}
602
603		if (!pmap_md_ok_to_steal_p(bank, npgs)) {
604			continue;
605		}
606
607		/*
608		 * Always try to allocate from the segment with the least
609		 * amount of space left.
610		 */
611#define VM_PHYSMEM_SPACE(b)	((uvm_physseg_get_avail_end(b)) - (uvm_physseg_get_avail_start(b)))
612		if (uvm_physseg_valid_p(maybe_bank) == false
613		    || VM_PHYSMEM_SPACE(bank) < VM_PHYSMEM_SPACE(maybe_bank)) {
614			maybe_bank = bank;
615		}
616	}
617
618	if (uvm_physseg_valid_p(maybe_bank)) {
619		const uvm_physseg_t bank = maybe_bank;
620
621		/*
622		 * There are enough pages here; steal them!
623		 */
624		pa = ptoa(uvm_physseg_get_start(bank));
625		uvm_physseg_unplug(atop(pa), npgs);
626
627		DPRINTF("%s: seg %"PRIxPHYSSEG": %zu pages stolen (%#"PRIxPADDR" left)\n",
628		    __func__, bank, npgs, VM_PHYSMEM_SPACE(bank));
629
630		va = pmap_md_map_poolpage(pa, size);
631		memset((void *)va, 0, size);
632		return va;
633	}
634
635	/*
636	 * If we got here, there was no memory left.
637	 */
638	panic("pmap_steal_memory: no memory to steal %zu pages", npgs);
639}
640
641/*
642 *	Bootstrap the system enough to run with virtual memory.
643 *	(Common routine called by machine-dependent bootstrap code.)
644 */
645void
646pmap_bootstrap_common(void)
647{
648	UVMHIST_LINK_STATIC(pmapexechist);
649	UVMHIST_LINK_STATIC(pmaphist);
650	UVMHIST_LINK_STATIC(pmapxtabhist);
651
652	static const struct uvm_pagerops pmap_pager = {
653		/* nothing */
654	};
655
656	pmap_t pm = pmap_kernel();
657
658	rw_init(&pm->pm_obj_lock);
659	uvm_obj_init(&pm->pm_uobject, &pmap_pager, false, 1);
660	uvm_obj_setlock(&pm->pm_uobject, &pm->pm_obj_lock);
661
662	TAILQ_INIT(&pm->pm_ppg_list);
663
664#if defined(PMAP_HWPAGEWALKER)
665	TAILQ_INIT(&pm->pm_pdetab_list);
666#endif
667#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
668	TAILQ_INIT(&pm->pm_segtab_list);
669#endif
670
671#if defined(EFI_RUNTIME)
672
673	const pmap_t efipm = pmap_efirt();
674	struct pmap_asid_info * const efipai = PMAP_PAI(efipm, cpu_tlb_info(ci));
675
676	rw_init(&efipm->pm_obj_lock);
677	uvm_obj_init(&efipm->pm_uobject, &pmap_pager, false, 1);
678	uvm_obj_setlock(&efipm->pm_uobject, &efipm->pm_obj_lock);
679
680	efipai->pai_asid = KERNEL_PID;
681
682	TAILQ_INIT(&efipm->pm_ppg_list);
683
684#if defined(PMAP_HWPAGEWALKER)
685	TAILQ_INIT(&efipm->pm_pdetab_list);
686#endif
687#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
688	TAILQ_INIT(&efipm->pm_segtab_list);
689#endif
690
691#endif
692
693	/*
694	 * Initialize the segtab lock.
695	 */
696	mutex_init(&pmap_segtab_lock, MUTEX_DEFAULT, IPL_HIGH);
697
698	pmap_tlb_miss_lock_init();
699}
700
701/*
702 *	Initialize the pmap module.
703 *	Called by vm_init, to initialize any structures that the pmap
704 *	system needs to map virtual memory.
705 */
706void
707pmap_init(void)
708{
709	UVMHIST_FUNC(__func__);
710	UVMHIST_CALLED(pmaphist);
711
712	/*
713	 * Set a low water mark on the pv_entry pool, so that we are
714	 * more likely to have these around even in extreme memory
715	 * starvation.
716	 */
717	pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
718
719	/*
720	 * Set the page colormask but allow pmap_md_init to override it.
721	 */
722	pmap_page_colormask = ptoa(uvmexp.colormask);
723
724	pmap_md_init();
725
726	/*
727	 * Now it is safe to enable pv entry recording.
728	 */
729	pmap_initialized = true;
730}
731
732/*
733 *	Create and return a physical map.
734 *
735 *	If the size specified for the map
736 *	is zero, the map is an actual physical
737 *	map, and may be referenced by the
738 *	hardware.
739 *
740 *	If the size specified is non-zero,
741 *	the map will be used in software only, and
742 *	is bounded by that size.
743 */
744pmap_t
745pmap_create(void)
746{
747	UVMHIST_FUNC(__func__);
748	UVMHIST_CALLED(pmaphist);
749	PMAP_COUNT(create);
750
751	static const struct uvm_pagerops pmap_pager = {
752		/* nothing */
753	};
754
755	pmap_t pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
756	memset(pmap, 0, PMAP_SIZE);
757
758	KASSERT(pmap->pm_pai[0].pai_link.le_prev == NULL);
759
760	pmap->pm_refcnt = 1;
761	pmap->pm_minaddr = VM_MIN_ADDRESS;
762	pmap->pm_maxaddr = VM_MAXUSER_ADDRESS;
763
764	rw_init(&pmap->pm_obj_lock);
765	uvm_obj_init(&pmap->pm_uobject, &pmap_pager, false, 1);
766	uvm_obj_setlock(&pmap->pm_uobject, &pmap->pm_obj_lock);
767
768	TAILQ_INIT(&pmap->pm_ppg_list);
769#if defined(PMAP_HWPAGEWALKER)
770	TAILQ_INIT(&pmap->pm_pdetab_list);
771#endif
772#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
773	TAILQ_INIT(&pmap->pm_segtab_list);
774#endif
775
776	pmap_segtab_init(pmap);
777
778#ifdef MULTIPROCESSOR
779	kcpuset_create(&pmap->pm_active, true);
780	kcpuset_create(&pmap->pm_onproc, true);
781	KASSERT(pmap->pm_active != NULL);
782	KASSERT(pmap->pm_onproc != NULL);
783#endif
784
785	UVMHIST_LOG(pmaphist, " <-- done (pmap=%#jx)", (uintptr_t)pmap,
786	    0, 0, 0);
787
788	return pmap;
789}
790
791/*
792 *	Retire the given physical map from service.
793 *	Should only be called if the map contains
794 *	no valid mappings.
795 */
796void
797pmap_destroy(pmap_t pmap)
798{
799	UVMHIST_FUNC(__func__);
800	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0);
801	UVMHIST_CALLARGS(pmapxtabhist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0);
802
803	membar_release();
804	if (atomic_dec_uint_nv(&pmap->pm_refcnt) > 0) {
805		PMAP_COUNT(dereference);
806		UVMHIST_LOG(pmaphist, " <-- done (deref)", 0, 0, 0, 0);
807		UVMHIST_LOG(pmapxtabhist, " <-- done (deref)", 0, 0, 0, 0);
808		return;
809	}
810	membar_acquire();
811
812	PMAP_COUNT(destroy);
813	KASSERT(pmap->pm_refcnt == 0);
814	kpreempt_disable();
815	pmap_tlb_miss_lock_enter();
816	pmap_tlb_asid_release_all(pmap);
817	pmap_tlb_miss_lock_exit();
818	pmap_segtab_destroy(pmap, NULL, 0);
819
820	KASSERT(TAILQ_EMPTY(&pmap->pm_ppg_list));
821
822#ifdef _LP64
823#if defined(PMAP_HWPAGEWALKER)
824	KASSERT(TAILQ_EMPTY(&pmap->pm_pdetab_list));
825#endif
826#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
827	KASSERT(TAILQ_EMPTY(&pmap->pm_segtab_list));
828#endif
829#endif
830	KASSERT(pmap->pm_uobject.uo_npages == 0);
831
832	uvm_obj_destroy(&pmap->pm_uobject, false);
833	rw_destroy(&pmap->pm_obj_lock);
834
835#ifdef MULTIPROCESSOR
836	kcpuset_destroy(pmap->pm_active);
837	kcpuset_destroy(pmap->pm_onproc);
838	pmap->pm_active = NULL;
839	pmap->pm_onproc = NULL;
840#endif
841
842	pool_put(&pmap_pmap_pool, pmap);
843	kpreempt_enable();
844
845	UVMHIST_LOG(pmaphist, " <-- done (freed)", 0, 0, 0, 0);
846	UVMHIST_LOG(pmapxtabhist, " <-- done (freed)", 0, 0, 0, 0);
847}
848
849/*
850 *	Add a reference to the specified pmap.
851 */
852void
853pmap_reference(pmap_t pmap)
854{
855	UVMHIST_FUNC(__func__);
856	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0);
857	PMAP_COUNT(reference);
858
859	if (pmap != NULL) {
860		atomic_inc_uint(&pmap->pm_refcnt);
861	}
862
863	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
864}
865
866/*
867 *	Make a new pmap (vmspace) active for the given process.
868 */
869void
870pmap_activate(struct lwp *l)
871{
872	pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
873
874	UVMHIST_FUNC(__func__);
875	UVMHIST_CALLARGS(pmaphist, "(l=%#jx pmap=%#jx)", (uintptr_t)l,
876	    (uintptr_t)pmap, 0, 0);
877	PMAP_COUNT(activate);
878
879	kpreempt_disable();
880	pmap_tlb_miss_lock_enter();
881	pmap_tlb_asid_acquire(pmap, l);
882	pmap_segtab_activate(pmap, l);
883	pmap_tlb_miss_lock_exit();
884	kpreempt_enable();
885
886	UVMHIST_LOG(pmaphist, " <-- done (%ju:%ju)", l->l_proc->p_pid,
887	    l->l_lid, 0, 0);
888}
889
890/*
891 * Remove this page from all physical maps in which it resides.
892 * Reflects back modify bits to the pager.
893 */
894void
895pmap_page_remove(struct vm_page_md *mdpg)
896{
897	kpreempt_disable();
898	VM_PAGEMD_PVLIST_LOCK(mdpg);
899	pmap_pvlist_check(mdpg);
900
901	struct vm_page * const pg =
902	    VM_PAGEMD_VMPAGE_P(mdpg) ? VM_MD_TO_PAGE(mdpg) : NULL;
903
904	UVMHIST_FUNC(__func__);
905	if (pg) {
906		UVMHIST_CALLARGS(pmaphist, "mdpg %#jx pg %#jx (pa %#jx): "
907		    "execpage cleared", (uintptr_t)mdpg, (uintptr_t)pg,
908		    VM_PAGE_TO_PHYS(pg), 0);
909	} else {
910		UVMHIST_CALLARGS(pmaphist, "mdpg %#jx", (uintptr_t)mdpg, 0,
911		    0, 0);
912	}
913
914#ifdef PMAP_VIRTUAL_CACHE_ALIASES
915	pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE | VM_PAGEMD_UNCACHED);
916#else
917	pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE);
918#endif
919	PMAP_COUNT(exec_uncached_remove);
920
921	pv_entry_t pv = &mdpg->mdpg_first;
922	if (pv->pv_pmap == NULL) {
923		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
924		kpreempt_enable();
925		UVMHIST_LOG(pmaphist, " <-- done (empty)", 0, 0, 0, 0);
926		return;
927	}
928
929	pv_entry_t npv;
930	pv_entry_t pvp = NULL;
931
932	for (; pv != NULL; pv = npv) {
933		npv = pv->pv_next;
934#ifdef PMAP_VIRTUAL_CACHE_ALIASES
935		if (PV_ISKENTER_P(pv)) {
936			UVMHIST_LOG(pmaphist, " pv %#jx pmap %#jx va %#jx"
937			    " skip", (uintptr_t)pv, (uintptr_t)pv->pv_pmap,
938			    pv->pv_va, 0);
939
940			KASSERT(pv->pv_pmap == pmap_kernel());
941
942			/* Assume no more - it'll get fixed if there are */
943			pv->pv_next = NULL;
944
945			/*
946			 * pvp is non-null when we already have a PV_KENTER
947			 * pv in pvh_first; otherwise we haven't seen a
948			 * PV_KENTER pv and we need to copy this one to
949			 * pvh_first
950			 */
951			if (pvp) {
952				/*
953				 * The previous PV_KENTER pv needs to point to
954				 * this PV_KENTER pv
955				 */
956				pvp->pv_next = pv;
957			} else {
958				pv_entry_t fpv = &mdpg->mdpg_first;
959				*fpv = *pv;
960				KASSERT(fpv->pv_pmap == pmap_kernel());
961			}
962			pvp = pv;
963			continue;
964		}
965#endif
966		const pmap_t pmap = pv->pv_pmap;
967		vaddr_t va = trunc_page(pv->pv_va);
968		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
969		KASSERTMSG(ptep != NULL, "%#"PRIxVADDR " %#"PRIxVADDR, va,
970		    pmap_limits.virtual_end);
971		pt_entry_t pte = *ptep;
972		UVMHIST_LOG(pmaphist, " pv %#jx pmap %#jx va %#jx"
973		    " pte %#jx", (uintptr_t)pv, (uintptr_t)pmap, va,
974		    pte_value(pte));
975		if (!pte_valid_p(pte))
976			continue;
977		const bool is_kernel_pmap_p = (pmap == pmap_kernel());
978		if (is_kernel_pmap_p) {
979			PMAP_COUNT(remove_kernel_pages);
980		} else {
981			PMAP_COUNT(remove_user_pages);
982		}
983		if (pte_wired_p(pte))
984			pmap->pm_stats.wired_count--;
985		pmap->pm_stats.resident_count--;
986
987		pmap_tlb_miss_lock_enter();
988		const pt_entry_t npte = pte_nv_entry(is_kernel_pmap_p);
989		pte_set(ptep, npte);
990		if (__predict_true(!(pmap->pm_flags & PMAP_DEFERRED_ACTIVATE))) {
991			/*
992			 * Flush the TLB for the given address.
993			 */
994			pmap_tlb_invalidate_addr(pmap, va);
995		}
996		pmap_tlb_miss_lock_exit();
997
998		/*
999		 * non-null means this is a non-pvh_first pv, so we should
1000		 * free it.
1001		 */
1002		if (pvp) {
1003			KASSERT(pvp->pv_pmap == pmap_kernel());
1004			KASSERT(pvp->pv_next == NULL);
1005			pmap_pv_free(pv);
1006		} else {
1007			pv->pv_pmap = NULL;
1008			pv->pv_next = NULL;
1009		}
1010	}
1011
1012	pmap_pvlist_check(mdpg);
1013	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
1014	kpreempt_enable();
1015
1016	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1017}
1018
1019#ifdef __HAVE_PMAP_PV_TRACK
1020/*
1021 * pmap_pv_protect: change protection of an unmanaged pv-tracked page from
1022 * all pmaps that map it
1023 */
1024void
1025pmap_pv_protect(paddr_t pa, vm_prot_t prot)
1026{
1027
1028	/* the only case is remove at the moment */
1029	KASSERT(prot == VM_PROT_NONE);
1030	struct pmap_page *pp;
1031
1032	pp = pmap_pv_tracked(pa);
1033	if (pp == NULL)
1034		panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR,
1035		    pa);
1036
1037	struct vm_page_md *mdpg = PMAP_PAGE_TO_MD(pp);
1038	pmap_page_remove(mdpg);
1039}
1040#endif
1041
1042/*
1043 *	Make a previously active pmap (vmspace) inactive.
1044 */
1045void
1046pmap_deactivate(struct lwp *l)
1047{
1048	pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
1049
1050	UVMHIST_FUNC(__func__);
1051	UVMHIST_CALLARGS(pmaphist, "(l=%#jx pmap=%#jx)", (uintptr_t)l,
1052	    (uintptr_t)pmap, 0, 0);
1053	PMAP_COUNT(deactivate);
1054
1055	kpreempt_disable();
1056	KASSERT(l == curlwp || l->l_cpu == curlwp->l_cpu);
1057	pmap_tlb_miss_lock_enter();
1058	pmap_tlb_asid_deactivate(pmap);
1059	pmap_segtab_deactivate(pmap);
1060	pmap_tlb_miss_lock_exit();
1061	kpreempt_enable();
1062
1063	UVMHIST_LOG(pmaphist, " <-- done (%ju:%ju)", l->l_proc->p_pid,
1064	    l->l_lid, 0, 0);
1065}
1066
1067void
1068pmap_update(struct pmap *pmap)
1069{
1070	UVMHIST_FUNC(__func__);
1071	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0);
1072	PMAP_COUNT(update);
1073
1074	kpreempt_disable();
1075#if defined(MULTIPROCESSOR) && defined(PMAP_TLB_NEED_SHOOTDOWN)
1076	u_int pending = atomic_swap_uint(&pmap->pm_shootdown_pending, 0);
1077	if (pending && pmap_tlb_shootdown_bystanders(pmap))
1078		PMAP_COUNT(shootdown_ipis);
1079#endif
1080	pmap_tlb_miss_lock_enter();
1081#if defined(DEBUG) && !defined(MULTIPROCESSOR)
1082	pmap_tlb_check(pmap, pmap_md_tlb_check_entry);
1083#endif /* DEBUG */
1084
1085	/*
1086	 * If pmap_remove_all was called, we deactivated ourselves and nuked
1087	 * our ASID.  Now we have to reactivate ourselves.
1088	 */
1089	if (__predict_false(pmap->pm_flags & PMAP_DEFERRED_ACTIVATE)) {
1090		pmap->pm_flags ^= PMAP_DEFERRED_ACTIVATE;
1091		pmap_tlb_asid_acquire(pmap, curlwp);
1092		pmap_segtab_activate(pmap, curlwp);
1093	}
1094	pmap_tlb_miss_lock_exit();
1095	kpreempt_enable();
1096
1097	UVMHIST_LOG(pmaphist, " <-- done (kernel=%jd)",
1098		    (pmap == pmap_kernel() ? 1 : 0), 0, 0, 0);
1099}
1100
1101/*
1102 *	Remove the given range of addresses from the specified map.
1103 *
1104 *	It is assumed that the start and end are properly
1105 *	rounded to the page size.
1106 */
1107
1108static bool
1109pmap_pte_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *ptep,
1110    uintptr_t flags)
1111{
1112	const pt_entry_t npte = flags;
1113	const bool is_kernel_pmap_p = (pmap == pmap_kernel());
1114
1115	UVMHIST_FUNC(__func__);
1116	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx kernel=%jd va=%#jx..%#jx)",
1117	    (uintptr_t)pmap, (is_kernel_pmap_p ? 1 : 0), sva, eva);
1118	UVMHIST_LOG(pmaphist, "ptep=%#jx, flags(npte)=%#jx)",
1119	    (uintptr_t)ptep, flags, 0, 0);
1120
1121	KASSERT(kpreempt_disabled());
1122
1123	for (; sva < eva; sva += NBPG, ptep++) {
1124		const pt_entry_t pte = *ptep;
1125		if (!pte_valid_p(pte))
1126			continue;
1127		if (is_kernel_pmap_p) {
1128			PMAP_COUNT(remove_kernel_pages);
1129		} else {
1130			PMAP_COUNT(remove_user_pages);
1131		}
1132		if (pte_wired_p(pte))
1133			pmap->pm_stats.wired_count--;
1134		pmap->pm_stats.resident_count--;
1135		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
1136		if (__predict_true(pg != NULL)) {
1137			pmap_remove_pv(pmap, sva, pg, pte_modified_p(pte));
1138		}
1139		pmap_tlb_miss_lock_enter();
1140		pte_set(ptep, npte);
1141		if (__predict_true(!(pmap->pm_flags & PMAP_DEFERRED_ACTIVATE))) {
1142			/*
1143			 * Flush the TLB for the given address.
1144			 */
1145			pmap_tlb_invalidate_addr(pmap, sva);
1146		}
1147		pmap_tlb_miss_lock_exit();
1148	}
1149
1150	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1151
1152	return false;
1153}
1154
1155void
1156pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1157{
1158	const bool is_kernel_pmap_p = (pmap == pmap_kernel());
1159	const pt_entry_t npte = pte_nv_entry(is_kernel_pmap_p);
1160
1161	UVMHIST_FUNC(__func__);
1162	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx, va=%#jx..%#jx)",
1163	    (uintptr_t)pmap, sva, eva, 0);
1164
1165	if (is_kernel_pmap_p) {
1166		PMAP_COUNT(remove_kernel_calls);
1167	} else {
1168		PMAP_COUNT(remove_user_calls);
1169	}
1170#ifdef PMAP_FAULTINFO
1171	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
1172	curpcb->pcb_faultinfo.pfi_repeats = 0;
1173	curpcb->pcb_faultinfo.pfi_faultptep = NULL;
1174#endif
1175	kpreempt_disable();
1176	pmap_addr_range_check(pmap, sva, eva, __func__);
1177	pmap_pte_process(pmap, sva, eva, pmap_pte_remove, npte);
1178	kpreempt_enable();
1179
1180	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1181}
1182
1183/*
1184 *	pmap_page_protect:
1185 *
1186 *	Lower the permission for all mappings to a given page.
1187 */
1188void
1189pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1190{
1191	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
1192	pv_entry_t pv;
1193	vaddr_t va;
1194
1195	UVMHIST_FUNC(__func__);
1196	UVMHIST_CALLARGS(pmaphist, "(pg=%#jx (pa %#jx) prot=%#jx)",
1197	    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), prot, 0);
1198	PMAP_COUNT(page_protect);
1199
1200	switch (prot) {
1201	case VM_PROT_READ | VM_PROT_WRITE:
1202	case VM_PROT_ALL:
1203		break;
1204
1205	/* copy_on_write */
1206	case VM_PROT_READ:
1207	case VM_PROT_READ | VM_PROT_EXECUTE:
1208		pv = &mdpg->mdpg_first;
1209		kpreempt_disable();
1210		VM_PAGEMD_PVLIST_READLOCK(mdpg);
1211		pmap_pvlist_check(mdpg);
1212		/*
1213		 * Loop over all current mappings setting/clearing as
1214		 * appropriate.
1215		 */
1216		if (pv->pv_pmap != NULL) {
1217			while (pv != NULL) {
1218#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1219				if (PV_ISKENTER_P(pv)) {
1220					pv = pv->pv_next;
1221					continue;
1222				}
1223#endif
1224				const pmap_t pmap = pv->pv_pmap;
1225				va = trunc_page(pv->pv_va);
1226				const uintptr_t gen =
1227				    VM_PAGEMD_PVLIST_UNLOCK(mdpg);
1228				pmap_protect(pmap, va, va + PAGE_SIZE, prot);
1229				KASSERT(pv->pv_pmap == pmap);
1230				pmap_update(pmap);
1231				if (gen != VM_PAGEMD_PVLIST_READLOCK(mdpg)) {
1232					pv = &mdpg->mdpg_first;
1233				} else {
1234					pv = pv->pv_next;
1235				}
1236				pmap_pvlist_check(mdpg);
1237			}
1238		}
1239		pmap_pvlist_check(mdpg);
1240		VM_PAGEMD_PVLIST_UNLOCK(mdpg);
1241		kpreempt_enable();
1242		break;
1243
1244	/* remove_all */
1245	default:
1246		pmap_page_remove(mdpg);
1247	}
1248
1249	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1250}
1251
1252static bool
1253pmap_pte_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *ptep,
1254	uintptr_t flags)
1255{
1256	const vm_prot_t prot = (flags & VM_PROT_ALL);
1257
1258	UVMHIST_FUNC(__func__);
1259	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx kernel=%jd va=%#jx..%#jx)",
1260	    (uintptr_t)pmap, (pmap == pmap_kernel() ? 1 : 0), sva, eva);
1261	UVMHIST_LOG(pmaphist, "ptep=%#jx, flags(npte)=%#jx)",
1262	    (uintptr_t)ptep, flags, 0, 0);
1263
1264	KASSERT(kpreempt_disabled());
1265	/*
1266	 * Change protection on every valid mapping within this segment.
1267	 */
1268	for (; sva < eva; sva += NBPG, ptep++) {
1269		pt_entry_t pte = *ptep;
1270		if (!pte_valid_p(pte))
1271			continue;
1272		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
1273		if (pg != NULL && pte_modified_p(pte)) {
1274			struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
1275			if (VM_PAGEMD_EXECPAGE_P(mdpg)) {
1276				KASSERT(!VM_PAGEMD_PVLIST_EMPTY_P(mdpg));
1277#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1278				if (VM_PAGEMD_CACHED_P(mdpg)) {
1279#endif
1280					UVMHIST_LOG(pmapexechist,
1281					    "pg %#jx (pa %#jx): "
1282					    "syncicached performed",
1283					    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg),
1284					    0, 0);
1285					pmap_page_syncicache(pg);
1286					PMAP_COUNT(exec_synced_protect);
1287#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1288				}
1289#endif
1290			}
1291		}
1292		pte = pte_prot_downgrade(pte, prot);
1293		if (*ptep != pte) {
1294			pmap_tlb_miss_lock_enter();
1295			pte_set(ptep, pte);
1296			/*
1297			 * Update the TLB if needed.
1298			 */
1299			pmap_tlb_update_addr(pmap, sva, pte, PMAP_TLB_NEED_IPI);
1300			pmap_tlb_miss_lock_exit();
1301		}
1302	}
1303
1304	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1305
1306	return false;
1307}
1308
1309/*
1310 *	Set the physical protection on the
1311 *	specified range of this map as requested.
1312 */
1313void
1314pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1315{
1316	UVMHIST_FUNC(__func__);
1317	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx, va=%#jx..%#jx, prot=%ju)",
1318	    (uintptr_t)pmap, sva, eva, prot);
1319	PMAP_COUNT(protect);
1320
1321	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1322		pmap_remove(pmap, sva, eva);
1323		UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1324		return;
1325	}
1326
1327	/*
1328	 * Change protection on every valid mapping within this segment.
1329	 */
1330	kpreempt_disable();
1331	pmap_addr_range_check(pmap, sva, eva, __func__);
1332	pmap_pte_process(pmap, sva, eva, pmap_pte_protect, prot);
1333	kpreempt_enable();
1334
1335	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1336}
1337
1338#if defined(PMAP_VIRTUAL_CACHE_ALIASES) && !defined(PMAP_NO_PV_UNCACHED)
1339/*
1340 *	pmap_page_cache:
1341 *
1342 *	Change all mappings of a managed page to cached/uncached.
1343 */
1344void
1345pmap_page_cache(struct vm_page_md *mdpg, bool cached)
1346{
1347#ifdef UVMHIST
1348	const bool vmpage_p = VM_PAGEMD_VMPAGE_P(mdpg);
1349	struct vm_page * const pg = vmpage_p ? VM_MD_TO_PAGE(mdpg) : NULL;
1350#endif
1351
1352	UVMHIST_FUNC(__func__);
1353	UVMHIST_CALLARGS(pmaphist, "(mdpg=%#jx (pa %#jx) cached=%jd vmpage %jd)",
1354	    (uintptr_t)mdpg, pg ? VM_PAGE_TO_PHYS(pg) : 0, cached, vmpage_p);
1355
1356	KASSERT(kpreempt_disabled());
1357	KASSERT(VM_PAGEMD_PVLIST_LOCKED_P(mdpg));
1358
1359	if (cached) {
1360		pmap_page_clear_attributes(mdpg, VM_PAGEMD_UNCACHED);
1361		PMAP_COUNT(page_cache_restorations);
1362	} else {
1363		pmap_page_set_attributes(mdpg, VM_PAGEMD_UNCACHED);
1364		PMAP_COUNT(page_cache_evictions);
1365	}
1366
1367	for (pv_entry_t pv = &mdpg->mdpg_first; pv != NULL; pv = pv->pv_next) {
1368		pmap_t pmap = pv->pv_pmap;
1369		vaddr_t va = trunc_page(pv->pv_va);
1370
1371		KASSERT(pmap != NULL);
1372		KASSERT(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va));
1373		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
1374		if (ptep == NULL)
1375			continue;
1376		pt_entry_t pte = *ptep;
1377		if (pte_valid_p(pte)) {
1378			pte = pte_cached_change(pte, cached);
1379			pmap_tlb_miss_lock_enter();
1380			pte_set(ptep, pte);
1381			pmap_tlb_update_addr(pmap, va, pte, PMAP_TLB_NEED_IPI);
1382			pmap_tlb_miss_lock_exit();
1383		}
1384	}
1385
1386	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1387}
1388#endif	/* PMAP_VIRTUAL_CACHE_ALIASES && !PMAP_NO_PV_UNCACHED */
1389
1390/*
1391 *	Insert the given physical page (p) at
1392 *	the specified virtual address (v) in the
1393 *	target physical map with the protection requested.
1394 *
1395 *	If specified, the page will be wired down, meaning
1396 *	that the related pte can not be reclaimed.
1397 *
1398 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1399 *	or lose information.  That is, this routine must actually
1400 *	insert this page into the given map NOW.
1401 */
1402int
1403pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1404{
1405	const bool wired = (flags & PMAP_WIRED) != 0;
1406	const bool is_kernel_pmap_p = (pmap == pmap_kernel());
1407#if defined(EFI_RUNTIME)
1408	const bool is_efirt_pmap_p = (pmap == pmap_efirt());
1409#else
1410	const bool is_efirt_pmap_p = false;
1411#endif
1412	u_int update_flags = (flags & VM_PROT_ALL) != 0 ? PMAP_TLB_INSERT : 0;
1413#ifdef UVMHIST
1414	struct kern_history * const histp =
1415	    ((prot & VM_PROT_EXECUTE) ? &pmapexechist : &pmaphist);
1416#endif
1417
1418	UVMHIST_FUNC(__func__);
1419	UVMHIST_CALLARGS(*histp, "(pmap=%#jx, va=%#jx, pa=%#jx",
1420	    (uintptr_t)pmap, va, pa, 0);
1421	UVMHIST_LOG(*histp, "prot=%#jx flags=%#jx)", prot, flags, 0, 0);
1422
1423	const bool good_color = PMAP_PAGE_COLOROK_P(pa, va);
1424	if (is_kernel_pmap_p) {
1425		PMAP_COUNT(kernel_mappings);
1426		if (!good_color)
1427			PMAP_COUNT(kernel_mappings_bad);
1428	} else {
1429		PMAP_COUNT(user_mappings);
1430		if (!good_color)
1431			PMAP_COUNT(user_mappings_bad);
1432	}
1433	pmap_addr_range_check(pmap, va, va, __func__);
1434
1435	KASSERTMSG(prot & VM_PROT_READ, "no READ (%#x) in prot %#x",
1436	    VM_PROT_READ, prot);
1437
1438	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
1439	struct vm_page_md * const mdpg = (pg ? VM_PAGE_TO_MD(pg) : NULL);
1440
1441	struct vm_page_md *mdpp = NULL;
1442#ifdef __HAVE_PMAP_PV_TRACK
1443	struct pmap_page *pp = pmap_pv_tracked(pa);
1444	mdpp = pp ? PMAP_PAGE_TO_MD(pp) : NULL;
1445#endif
1446
1447	if (mdpg) {
1448		/* Set page referenced/modified status based on flags */
1449		if (flags & VM_PROT_WRITE) {
1450			pmap_page_set_attributes(mdpg, VM_PAGEMD_MODIFIED | VM_PAGEMD_REFERENCED);
1451		} else if (flags & VM_PROT_ALL) {
1452			pmap_page_set_attributes(mdpg, VM_PAGEMD_REFERENCED);
1453		}
1454
1455#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1456		if (!VM_PAGEMD_CACHED_P(mdpg)) {
1457			flags |= PMAP_NOCACHE;
1458			PMAP_COUNT(uncached_mappings);
1459		}
1460#endif
1461
1462		PMAP_COUNT(managed_mappings);
1463	} else if (mdpp) {
1464#ifdef __HAVE_PMAP_PV_TRACK
1465		pmap_page_set_attributes(mdpg, VM_PAGEMD_REFERENCED);
1466
1467		PMAP_COUNT(pvtracked_mappings);
1468#endif
1469	} else if (is_efirt_pmap_p) {
1470		PMAP_COUNT(efirt_mappings);
1471	} else {
1472		/*
1473		 * Assumption: if it is not part of our managed memory
1474		 * then it must be device memory which may be volatile.
1475		 */
1476		if ((flags & PMAP_CACHE_MASK) == 0)
1477			flags |= PMAP_NOCACHE;
1478		PMAP_COUNT(unmanaged_mappings);
1479	}
1480
1481	KASSERTMSG(mdpg == NULL || mdpp == NULL || is_efirt_pmap_p,
1482	    "mdpg %p mdpp %p efirt %s", mdpg, mdpp,
1483	    is_efirt_pmap_p ? "true" : "false");
1484
1485	struct vm_page_md *md = (mdpg != NULL) ? mdpg : mdpp;
1486	pt_entry_t npte = is_efirt_pmap_p ?
1487	    pte_make_enter_efirt(pa, prot, flags) :
1488	    pte_make_enter(pa, md, prot, flags, is_kernel_pmap_p);
1489
1490	kpreempt_disable();
1491
1492	pt_entry_t * const ptep = pmap_pte_reserve(pmap, va, flags);
1493	if (__predict_false(ptep == NULL)) {
1494		kpreempt_enable();
1495		UVMHIST_LOG(*histp, " <-- ENOMEM", 0, 0, 0, 0);
1496		return ENOMEM;
1497	}
1498	const pt_entry_t opte = *ptep;
1499	const bool resident = pte_valid_p(opte);
1500	bool remap = false;
1501	if (resident) {
1502		if (pte_to_paddr(opte) != pa) {
1503			KASSERT(!is_kernel_pmap_p);
1504			const pt_entry_t rpte = pte_nv_entry(false);
1505
1506			pmap_addr_range_check(pmap, va, va + NBPG, __func__);
1507			pmap_pte_process(pmap, va, va + NBPG, pmap_pte_remove,
1508			    rpte);
1509			PMAP_COUNT(user_mappings_changed);
1510			remap = true;
1511		}
1512		update_flags |= PMAP_TLB_NEED_IPI;
1513	}
1514
1515	if (!resident || remap) {
1516		pmap->pm_stats.resident_count++;
1517	}
1518
1519	/* Done after case that may sleep/return. */
1520	if (md)
1521		pmap_enter_pv(pmap, va, pa, md, &npte, 0);
1522
1523	/*
1524	 * Now validate mapping with desired protection/wiring.
1525	 */
1526	if (wired) {
1527		pmap->pm_stats.wired_count++;
1528		npte = pte_wire_entry(npte);
1529	}
1530
1531	UVMHIST_LOG(*histp, "new pte %#jx (pa %#jx)",
1532	    pte_value(npte), pa, 0, 0);
1533
1534	KASSERT(pte_valid_p(npte));
1535
1536	pmap_tlb_miss_lock_enter();
1537	pte_set(ptep, npte);
1538	pmap_tlb_update_addr(pmap, va, npte, update_flags);
1539	pmap_tlb_miss_lock_exit();
1540	kpreempt_enable();
1541
1542	if (pg != NULL && (prot == (VM_PROT_READ | VM_PROT_EXECUTE))) {
1543		KASSERT(mdpg != NULL);
1544		PMAP_COUNT(exec_mappings);
1545		if (!VM_PAGEMD_EXECPAGE_P(mdpg) && pte_cached_p(npte)) {
1546			if (!pte_deferred_exec_p(npte)) {
1547				UVMHIST_LOG(*histp, "va=%#jx pg %#jx: "
1548				    "immediate syncicache",
1549				    va, (uintptr_t)pg, 0, 0);
1550				pmap_page_syncicache(pg);
1551				pmap_page_set_attributes(mdpg,
1552				    VM_PAGEMD_EXECPAGE);
1553				PMAP_COUNT(exec_synced_mappings);
1554			} else {
1555				UVMHIST_LOG(*histp, "va=%#jx pg %#jx: defer "
1556				    "syncicache: pte %#jx",
1557				    va, (uintptr_t)pg, npte, 0);
1558			}
1559		} else {
1560			UVMHIST_LOG(*histp,
1561			    "va=%#jx pg %#jx: no syncicache cached %jd",
1562			    va, (uintptr_t)pg, pte_cached_p(npte), 0);
1563		}
1564	} else if (pg != NULL && (prot & VM_PROT_EXECUTE)) {
1565		KASSERT(mdpg != NULL);
1566		KASSERT(prot & VM_PROT_WRITE);
1567		PMAP_COUNT(exec_mappings);
1568		pmap_page_syncicache(pg);
1569		pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE);
1570		UVMHIST_LOG(*histp,
1571		    "va=%#jx pg %#jx: immediate syncicache (writeable)",
1572		    va, (uintptr_t)pg, 0, 0);
1573	}
1574
1575	UVMHIST_LOG(*histp, " <-- 0 (OK)", 0, 0, 0, 0);
1576	return 0;
1577}
1578
1579void
1580pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1581{
1582	pmap_t pmap = pmap_kernel();
1583	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
1584	struct vm_page_md * const mdpg = (pg ? VM_PAGE_TO_MD(pg) : NULL);
1585
1586	UVMHIST_FUNC(__func__);
1587	UVMHIST_CALLARGS(pmaphist, "(va=%#jx pa=%#jx prot=%ju, flags=%#jx)",
1588	    va, pa, prot, flags);
1589	PMAP_COUNT(kenter_pa);
1590
1591	if (mdpg == NULL) {
1592		PMAP_COUNT(kenter_pa_unmanaged);
1593		if ((flags & PMAP_CACHE_MASK) == 0)
1594			flags |= PMAP_NOCACHE;
1595	} else {
1596		if ((flags & PMAP_NOCACHE) == 0 && !PMAP_PAGE_COLOROK_P(pa, va))
1597			PMAP_COUNT(kenter_pa_bad);
1598	}
1599
1600	pt_entry_t npte = pte_make_kenter_pa(pa, mdpg, prot, flags);
1601	kpreempt_disable();
1602	pt_entry_t * const ptep = pmap_pte_reserve(pmap, va, 0);
1603
1604	KASSERTMSG(ptep != NULL, "%#"PRIxVADDR " %#"PRIxVADDR, va,
1605	    pmap_limits.virtual_end);
1606	KASSERT(!pte_valid_p(*ptep));
1607
1608	/*
1609	 * No need to track non-managed pages or PMAP_KMPAGEs pages for aliases
1610	 */
1611#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1612	if (pg != NULL && (flags & PMAP_KMPAGE) == 0
1613	    && pmap_md_virtual_cache_aliasing_p()) {
1614		pmap_enter_pv(pmap, va, pa, mdpg, &npte, PV_KENTER);
1615	}
1616#endif
1617
1618	/*
1619	 * We have the option to force this mapping into the TLB but we
1620	 * don't.  Instead let the next reference to the page do it.
1621	 */
1622	pmap_tlb_miss_lock_enter();
1623	pte_set(ptep, npte);
1624	pmap_tlb_update_addr(pmap_kernel(), va, npte, 0);
1625	pmap_tlb_miss_lock_exit();
1626	kpreempt_enable();
1627#if DEBUG > 1
1628	for (u_int i = 0; i < PAGE_SIZE / sizeof(long); i++) {
1629		if (((long *)va)[i] != ((long *)pa)[i])
1630			panic("%s: contents (%lx) of va %#"PRIxVADDR
1631			    " != contents (%lx) of pa %#"PRIxPADDR, __func__,
1632			    ((long *)va)[i], va, ((long *)pa)[i], pa);
1633	}
1634#endif
1635
1636	UVMHIST_LOG(pmaphist, " <-- done (ptep=%#jx)", (uintptr_t)ptep, 0, 0,
1637	    0);
1638}
1639
1640/*
1641 *	Remove the given range of addresses from the kernel map.
1642 *
1643 *	It is assumed that the start and end are properly
1644 *	rounded to the page size.
1645 */
1646
1647static bool
1648pmap_pte_kremove(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *ptep,
1649	uintptr_t flags)
1650{
1651	const pt_entry_t new_pte = pte_nv_entry(true);
1652
1653	UVMHIST_FUNC(__func__);
1654	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx, sva=%#jx eva=%#jx ptep=%#jx)",
1655	    (uintptr_t)pmap, sva, eva, (uintptr_t)ptep);
1656
1657	KASSERT(kpreempt_disabled());
1658
1659	for (; sva < eva; sva += NBPG, ptep++) {
1660		pt_entry_t pte = *ptep;
1661		if (!pte_valid_p(pte))
1662			continue;
1663
1664		PMAP_COUNT(kremove_pages);
1665#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1666		struct vm_page * const pg = PHYS_TO_VM_PAGE(pte_to_paddr(pte));
1667		if (pg != NULL && pmap_md_virtual_cache_aliasing_p()) {
1668			pmap_remove_pv(pmap, sva, pg, !pte_readonly_p(pte));
1669		}
1670#endif
1671
1672		pmap_tlb_miss_lock_enter();
1673		pte_set(ptep, new_pte);
1674		pmap_tlb_invalidate_addr(pmap, sva);
1675		pmap_tlb_miss_lock_exit();
1676	}
1677
1678	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1679
1680	return false;
1681}
1682
1683void
1684pmap_kremove(vaddr_t va, vsize_t len)
1685{
1686	const vaddr_t sva = trunc_page(va);
1687	const vaddr_t eva = round_page(va + len);
1688
1689	UVMHIST_FUNC(__func__);
1690	UVMHIST_CALLARGS(pmaphist, "(va=%#jx len=%#jx)", va, len, 0, 0);
1691
1692	kpreempt_disable();
1693	pmap_pte_process(pmap_kernel(), sva, eva, pmap_pte_kremove, 0);
1694	kpreempt_enable();
1695
1696	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1697}
1698
1699bool
1700pmap_remove_all(struct pmap *pmap)
1701{
1702	UVMHIST_FUNC(__func__);
1703	UVMHIST_CALLARGS(pmaphist, "(pm=%#jx)", (uintptr_t)pmap, 0, 0, 0);
1704
1705	KASSERT(pmap != pmap_kernel());
1706
1707	kpreempt_disable();
1708	/*
1709	 * Free all of our ASIDs which means we can skip doing all the
1710	 * tlb_invalidate_addrs().
1711	 */
1712	pmap_tlb_miss_lock_enter();
1713#ifdef MULTIPROCESSOR
1714	// This should be the last CPU with this pmap onproc
1715	KASSERT(!kcpuset_isotherset(pmap->pm_onproc, cpu_index(curcpu())));
1716	if (kcpuset_isset(pmap->pm_onproc, cpu_index(curcpu())))
1717#endif
1718		pmap_tlb_asid_deactivate(pmap);
1719#ifdef MULTIPROCESSOR
1720	KASSERT(kcpuset_iszero(pmap->pm_onproc));
1721#endif
1722	pmap_tlb_asid_release_all(pmap);
1723	pmap_tlb_miss_lock_exit();
1724	pmap->pm_flags |= PMAP_DEFERRED_ACTIVATE;
1725
1726#ifdef PMAP_FAULTINFO
1727	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
1728	curpcb->pcb_faultinfo.pfi_repeats = 0;
1729	curpcb->pcb_faultinfo.pfi_faultptep = NULL;
1730#endif
1731	kpreempt_enable();
1732
1733	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1734	return false;
1735}
1736
1737/*
1738 *	Routine:	pmap_unwire
1739 *	Function:	Clear the wired attribute for a map/virtual-address
1740 *			pair.
1741 *	In/out conditions:
1742 *			The mapping must already exist in the pmap.
1743 */
1744void
1745pmap_unwire(pmap_t pmap, vaddr_t va)
1746{
1747	UVMHIST_FUNC(__func__);
1748	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx, va=%#jx)", (uintptr_t)pmap, va,
1749	    0, 0);
1750	PMAP_COUNT(unwire);
1751
1752	/*
1753	 * Don't need to flush the TLB since PG_WIRED is only in software.
1754	 */
1755	kpreempt_disable();
1756	pmap_addr_range_check(pmap, va, va, __func__);
1757	pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
1758	KASSERTMSG(ptep != NULL, "pmap %p va %#"PRIxVADDR" invalid STE",
1759	    pmap, va);
1760	pt_entry_t pte = *ptep;
1761	KASSERTMSG(pte_valid_p(pte),
1762	    "pmap %p va %#" PRIxVADDR " invalid PTE %#" PRIxPTE " @ %p",
1763	    pmap, va, pte_value(pte), ptep);
1764
1765	if (pte_wired_p(pte)) {
1766		pmap_tlb_miss_lock_enter();
1767		pte_set(ptep, pte_unwire_entry(pte));
1768		pmap_tlb_miss_lock_exit();
1769		pmap->pm_stats.wired_count--;
1770	}
1771#ifdef DIAGNOSTIC
1772	else {
1773		printf("%s: wiring for pmap %p va %#"PRIxVADDR" unchanged!\n",
1774		    __func__, pmap, va);
1775	}
1776#endif
1777	kpreempt_enable();
1778
1779	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
1780}
1781
1782/*
1783 *	Routine:	pmap_extract
1784 *	Function:
1785 *		Extract the physical page address associated
1786 *		with the given map/virtual_address pair.
1787 */
1788bool
1789pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
1790{
1791	paddr_t pa;
1792
1793	if (pmap == pmap_kernel()) {
1794		if (pmap_md_direct_mapped_vaddr_p(va)) {
1795			pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
1796			goto done;
1797		}
1798		if (pmap_md_io_vaddr_p(va))
1799			panic("pmap_extract: io address %#"PRIxVADDR"", va);
1800
1801		if (va >= pmap_limits.virtual_end)
1802			panic("%s: illegal kernel mapped address %#"PRIxVADDR,
1803			    __func__, va);
1804	}
1805	kpreempt_disable();
1806	const pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
1807	if (ptep == NULL || !pte_valid_p(*ptep)) {
1808		kpreempt_enable();
1809		return false;
1810	}
1811	pa = pte_to_paddr(*ptep) | (va & PGOFSET);
1812	kpreempt_enable();
1813done:
1814	if (pap != NULL) {
1815		*pap = pa;
1816	}
1817	return true;
1818}
1819
1820/*
1821 *	Copy the range specified by src_addr/len
1822 *	from the source map to the range dst_addr/len
1823 *	in the destination map.
1824 *
1825 *	This routine is only advisory and need not do anything.
1826 */
1827void
1828pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr, vsize_t len,
1829    vaddr_t src_addr)
1830{
1831	UVMHIST_FUNC(__func__);
1832	UVMHIST_CALLARGS(pmaphist, "(dpm=#%jx spm=%#jx dva=%#jx sva=%#jx",
1833	    (uintptr_t)dst_pmap, (uintptr_t)src_pmap, dst_addr, src_addr);
1834	UVMHIST_LOG(pmaphist, "... len=%#jx)", len, 0, 0, 0);
1835	PMAP_COUNT(copy);
1836}
1837
1838/*
1839 *	pmap_clear_reference:
1840 *
1841 *	Clear the reference bit on the specified physical page.
1842 */
1843bool
1844pmap_clear_reference(struct vm_page *pg)
1845{
1846	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
1847
1848	UVMHIST_FUNC(__func__);
1849	UVMHIST_CALLARGS(pmaphist, "(pg=%#jx (pa %#jx))",
1850	   (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), 0,0);
1851
1852	bool rv = pmap_page_clear_attributes(mdpg, VM_PAGEMD_REFERENCED);
1853
1854	UVMHIST_LOG(pmaphist, " <-- wasref %ju", rv, 0, 0, 0);
1855
1856	return rv;
1857}
1858
1859/*
1860 *	pmap_is_referenced:
1861 *
1862 *	Return whether or not the specified physical page is referenced
1863 *	by any physical maps.
1864 */
1865bool
1866pmap_is_referenced(struct vm_page *pg)
1867{
1868	return VM_PAGEMD_REFERENCED_P(VM_PAGE_TO_MD(pg));
1869}
1870
1871/*
1872 *	Clear the modify bits on the specified physical page.
1873 */
1874bool
1875pmap_clear_modify(struct vm_page *pg)
1876{
1877	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
1878	pv_entry_t pv = &mdpg->mdpg_first;
1879	pv_entry_t pv_next;
1880
1881	UVMHIST_FUNC(__func__);
1882	UVMHIST_CALLARGS(pmaphist, "(pg=%#jx (%#jx))",
1883	    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), 0,0);
1884	PMAP_COUNT(clear_modify);
1885
1886	if (VM_PAGEMD_EXECPAGE_P(mdpg)) {
1887		if (pv->pv_pmap == NULL) {
1888			UVMHIST_LOG(pmapexechist,
1889			    "pg %#jx (pa %#jx): execpage cleared",
1890			    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), 0, 0);
1891			pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE);
1892			PMAP_COUNT(exec_uncached_clear_modify);
1893		} else {
1894			UVMHIST_LOG(pmapexechist,
1895			    "pg %#jx (pa %#jx): syncicache performed",
1896			    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), 0, 0);
1897			pmap_page_syncicache(pg);
1898			PMAP_COUNT(exec_synced_clear_modify);
1899		}
1900	}
1901	if (!pmap_page_clear_attributes(mdpg, VM_PAGEMD_MODIFIED)) {
1902		UVMHIST_LOG(pmaphist, " <-- false", 0, 0, 0, 0);
1903		return false;
1904	}
1905	if (pv->pv_pmap == NULL) {
1906		UVMHIST_LOG(pmaphist, " <-- true (no mappings)", 0, 0, 0, 0);
1907		return true;
1908	}
1909
1910	/*
1911	 * remove write access from any pages that are dirty
1912	 * so we can tell if they are written to again later.
1913	 * flush the VAC first if there is one.
1914	 */
1915	kpreempt_disable();
1916	VM_PAGEMD_PVLIST_READLOCK(mdpg);
1917	pmap_pvlist_check(mdpg);
1918	for (; pv != NULL; pv = pv_next) {
1919		pmap_t pmap = pv->pv_pmap;
1920		vaddr_t va = trunc_page(pv->pv_va);
1921
1922		pv_next = pv->pv_next;
1923#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1924		if (PV_ISKENTER_P(pv))
1925			continue;
1926#endif
1927		pt_entry_t * const ptep = pmap_pte_lookup(pmap, va);
1928		KASSERT(ptep);
1929		pt_entry_t pte = pte_prot_nowrite(*ptep);
1930		if (*ptep == pte) {
1931			continue;
1932		}
1933		KASSERT(pte_valid_p(pte));
1934		const uintptr_t gen = VM_PAGEMD_PVLIST_UNLOCK(mdpg);
1935		pmap_tlb_miss_lock_enter();
1936		pte_set(ptep, pte);
1937		pmap_tlb_invalidate_addr(pmap, va);
1938		pmap_tlb_miss_lock_exit();
1939		pmap_update(pmap);
1940		if (__predict_false(gen != VM_PAGEMD_PVLIST_READLOCK(mdpg))) {
1941			/*
1942			 * The list changed!  So restart from the beginning.
1943			 */
1944			pv_next = &mdpg->mdpg_first;
1945			pmap_pvlist_check(mdpg);
1946		}
1947	}
1948	pmap_pvlist_check(mdpg);
1949	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
1950	kpreempt_enable();
1951
1952	UVMHIST_LOG(pmaphist, " <-- true (mappings changed)", 0, 0, 0, 0);
1953	return true;
1954}
1955
1956/*
1957 *	pmap_is_modified:
1958 *
1959 *	Return whether or not the specified physical page is modified
1960 *	by any physical maps.
1961 */
1962bool
1963pmap_is_modified(struct vm_page *pg)
1964{
1965	return VM_PAGEMD_MODIFIED_P(VM_PAGE_TO_MD(pg));
1966}
1967
1968/*
1969 *	pmap_set_modified:
1970 *
1971 *	Sets the page modified reference bit for the specified page.
1972 */
1973void
1974pmap_set_modified(paddr_t pa)
1975{
1976	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
1977	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
1978	pmap_page_set_attributes(mdpg, VM_PAGEMD_MODIFIED | VM_PAGEMD_REFERENCED);
1979}
1980
1981/******************** pv_entry management ********************/
1982
1983static void
1984pmap_pvlist_check(struct vm_page_md *mdpg)
1985{
1986#ifdef DEBUG
1987	pv_entry_t pv = &mdpg->mdpg_first;
1988	if (pv->pv_pmap != NULL) {
1989#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1990		const u_int colormask = uvmexp.colormask;
1991		u_int colors = 0;
1992#endif
1993		for (; pv != NULL; pv = pv->pv_next) {
1994			KASSERT(pv->pv_pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(pv->pv_va));
1995#ifdef PMAP_VIRTUAL_CACHE_ALIASES
1996			colors |= __BIT(atop(pv->pv_va) & colormask);
1997#endif
1998		}
1999#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2000		// Assert that if there is more than 1 color mapped, that the
2001		// page is uncached.
2002		KASSERTMSG(!pmap_md_virtual_cache_aliasing_p()
2003		    || colors == 0 || (colors & (colors-1)) == 0
2004		    || VM_PAGEMD_UNCACHED_P(mdpg), "colors=%#x uncached=%u",
2005		    colors, VM_PAGEMD_UNCACHED_P(mdpg));
2006#endif
2007	} else {
2008		KASSERT(pv->pv_next == NULL);
2009	}
2010#endif /* DEBUG */
2011}
2012
2013/*
2014 * Enter the pmap and virtual address into the
2015 * physical to virtual map table.
2016 */
2017void
2018pmap_enter_pv(pmap_t pmap, vaddr_t va, paddr_t pa, struct vm_page_md *mdpg,
2019    pt_entry_t *nptep, u_int flags)
2020{
2021	pv_entry_t pv, npv, apv;
2022#ifdef UVMHIST
2023	bool first = false;
2024	struct vm_page *pg = VM_PAGEMD_VMPAGE_P(mdpg) ? VM_MD_TO_PAGE(mdpg) :
2025	    NULL;
2026#endif
2027
2028	UVMHIST_FUNC(__func__);
2029	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx va=%#jx pg=%#jx (%#jx)",
2030	    (uintptr_t)pmap, va, (uintptr_t)pg, pa);
2031	UVMHIST_LOG(pmaphist, "nptep=%#jx (%#jx))",
2032	    (uintptr_t)nptep, pte_value(*nptep), 0, 0);
2033
2034	KASSERT(kpreempt_disabled());
2035	KASSERT(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va));
2036	KASSERTMSG(pmap != pmap_kernel() || !pmap_md_io_vaddr_p(va),
2037	    "va %#"PRIxVADDR, va);
2038
2039	apv = NULL;
2040	VM_PAGEMD_PVLIST_LOCK(mdpg);
2041again:
2042	pv = &mdpg->mdpg_first;
2043	pmap_pvlist_check(mdpg);
2044	if (pv->pv_pmap == NULL) {
2045		KASSERT(pv->pv_next == NULL);
2046		/*
2047		 * No entries yet, use header as the first entry
2048		 */
2049		PMAP_COUNT(primary_mappings);
2050		PMAP_COUNT(mappings);
2051#ifdef UVMHIST
2052		first = true;
2053#endif
2054#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2055		KASSERT(VM_PAGEMD_CACHED_P(mdpg));
2056		// If the new mapping has an incompatible color the last
2057		// mapping of this page, clean the page before using it.
2058		if (!PMAP_PAGE_COLOROK_P(va, pv->pv_va)) {
2059			pmap_md_vca_clean(mdpg, PMAP_WBINV);
2060		}
2061#endif
2062		pv->pv_pmap = pmap;
2063		pv->pv_va = va | flags;
2064	} else {
2065#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2066		if (pmap_md_vca_add(mdpg, va, nptep)) {
2067			goto again;
2068		}
2069#endif
2070
2071		/*
2072		 * There is at least one other VA mapping this page.
2073		 * Place this entry after the header.
2074		 *
2075		 * Note: the entry may already be in the table if
2076		 * we are only changing the protection bits.
2077		 */
2078
2079		for (npv = pv; npv; npv = npv->pv_next) {
2080			if (pmap == npv->pv_pmap
2081			    && va == trunc_page(npv->pv_va)) {
2082#ifdef PARANOIADIAG
2083				pt_entry_t *ptep = pmap_pte_lookup(pmap, va);
2084				pt_entry_t pte = (ptep != NULL) ? *ptep : 0;
2085				if (!pte_valid_p(pte) || pte_to_paddr(pte) != pa)
2086					printf("%s: found va %#"PRIxVADDR
2087					    " pa %#"PRIxPADDR
2088					    " in pv_table but != %#"PRIxPTE"\n",
2089					    __func__, va, pa, pte_value(pte));
2090#endif
2091				PMAP_COUNT(remappings);
2092				VM_PAGEMD_PVLIST_UNLOCK(mdpg);
2093				if (__predict_false(apv != NULL))
2094					pmap_pv_free(apv);
2095
2096				UVMHIST_LOG(pmaphist,
2097				    " <-- done pv=%#jx (reused)",
2098				    (uintptr_t)pv, 0, 0, 0);
2099				return;
2100			}
2101		}
2102		if (__predict_true(apv == NULL)) {
2103			/*
2104			 * To allocate a PV, we have to release the PVLIST lock
2105			 * so get the page generation.  We allocate the PV, and
2106			 * then reacquire the lock.
2107			 */
2108			pmap_pvlist_check(mdpg);
2109			const uintptr_t gen = VM_PAGEMD_PVLIST_UNLOCK(mdpg);
2110
2111			apv = (pv_entry_t)pmap_pv_alloc();
2112			if (apv == NULL)
2113				panic("pmap_enter_pv: pmap_pv_alloc() failed");
2114
2115			/*
2116			 * If the generation has changed, then someone else
2117			 * tinkered with this page so we should start over.
2118			 */
2119			if (gen != VM_PAGEMD_PVLIST_LOCK(mdpg))
2120				goto again;
2121		}
2122		npv = apv;
2123		apv = NULL;
2124#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2125		/*
2126		 * If need to deal with virtual cache aliases, keep mappings
2127		 * in the kernel pmap at the head of the list.  This allows
2128		 * the VCA code to easily use them for cache operations if
2129		 * present.
2130		 */
2131		pmap_t kpmap = pmap_kernel();
2132		if (pmap != kpmap) {
2133			while (pv->pv_pmap == kpmap && pv->pv_next != NULL) {
2134				pv = pv->pv_next;
2135			}
2136		}
2137#endif
2138		npv->pv_va = va | flags;
2139		npv->pv_pmap = pmap;
2140		npv->pv_next = pv->pv_next;
2141		pv->pv_next = npv;
2142		PMAP_COUNT(mappings);
2143	}
2144	pmap_pvlist_check(mdpg);
2145	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
2146	if (__predict_false(apv != NULL))
2147		pmap_pv_free(apv);
2148
2149	UVMHIST_LOG(pmaphist, " <-- done pv=%#jx (first %ju)", (uintptr_t)pv,
2150	    first, 0, 0);
2151}
2152
2153/*
2154 * Remove a physical to virtual address translation.
2155 * If cache was inhibited on this page, and there are no more cache
2156 * conflicts, restore caching.
2157 * Flush the cache if the last page is removed (should always be cached
2158 * at this point).
2159 */
2160void
2161pmap_remove_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, bool dirty)
2162{
2163	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
2164	pv_entry_t pv, npv;
2165	bool last;
2166
2167	UVMHIST_FUNC(__func__);
2168	UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx, va=%#jx, pg=%#jx (pa %#jx)",
2169	    (uintptr_t)pmap, va, (uintptr_t)pg, VM_PAGE_TO_PHYS(pg));
2170	UVMHIST_LOG(pmaphist, "dirty=%ju)", dirty, 0, 0, 0);
2171
2172	KASSERT(kpreempt_disabled());
2173	KASSERT((va & PAGE_MASK) == 0);
2174	pv = &mdpg->mdpg_first;
2175
2176	VM_PAGEMD_PVLIST_LOCK(mdpg);
2177	pmap_pvlist_check(mdpg);
2178
2179	/*
2180	 * If it is the first entry on the list, it is actually
2181	 * in the header and we must copy the following entry up
2182	 * to the header.  Otherwise we must search the list for
2183	 * the entry.  In either case we free the now unused entry.
2184	 */
2185
2186	last = false;
2187	if (pmap == pv->pv_pmap && va == trunc_page(pv->pv_va)) {
2188		npv = pv->pv_next;
2189		if (npv) {
2190			*pv = *npv;
2191			KASSERT(pv->pv_pmap != NULL);
2192		} else {
2193#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2194			pmap_page_clear_attributes(mdpg, VM_PAGEMD_UNCACHED);
2195#endif
2196			pv->pv_pmap = NULL;
2197			last = true;	/* Last mapping removed */
2198		}
2199		PMAP_COUNT(remove_pvfirst);
2200	} else {
2201		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
2202			PMAP_COUNT(remove_pvsearch);
2203			if (pmap == npv->pv_pmap && va == trunc_page(npv->pv_va))
2204				break;
2205		}
2206		if (npv) {
2207			pv->pv_next = npv->pv_next;
2208		}
2209	}
2210
2211	pmap_pvlist_check(mdpg);
2212	VM_PAGEMD_PVLIST_UNLOCK(mdpg);
2213
2214#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2215	pmap_md_vca_remove(pg, va, dirty, last);
2216#endif
2217
2218	/*
2219	 * Free the pv_entry if needed.
2220	 */
2221	if (npv)
2222		pmap_pv_free(npv);
2223	if (VM_PAGEMD_EXECPAGE_P(mdpg) && dirty) {
2224		if (last) {
2225			/*
2226			 * If this was the page's last mapping, we no longer
2227			 * care about its execness.
2228			 */
2229			UVMHIST_LOG(pmapexechist,
2230			    "pg %#jx (pa %#jx)last %ju: execpage cleared",
2231			    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), last, 0);
2232			pmap_page_clear_attributes(mdpg, VM_PAGEMD_EXECPAGE);
2233			PMAP_COUNT(exec_uncached_remove);
2234		} else {
2235			/*
2236			 * Someone still has it mapped as an executable page
2237			 * so we must sync it.
2238			 */
2239			UVMHIST_LOG(pmapexechist,
2240			    "pg %#jx (pa %#jx) last %ju: performed syncicache",
2241			    (uintptr_t)pg, VM_PAGE_TO_PHYS(pg), last, 0);
2242			pmap_page_syncicache(pg);
2243			PMAP_COUNT(exec_synced_remove);
2244		}
2245	}
2246
2247	UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0);
2248}
2249
2250#if defined(MULTIPROCESSOR)
2251struct pmap_pvlist_info {
2252	kmutex_t *pli_locks[PAGE_SIZE / 32];
2253	volatile u_int pli_lock_refs[PAGE_SIZE / 32];
2254	volatile u_int pli_lock_index;
2255	u_int pli_lock_mask;
2256} pmap_pvlist_info;
2257
2258void
2259pmap_pvlist_lock_init(size_t cache_line_size)
2260{
2261	struct pmap_pvlist_info * const pli = &pmap_pvlist_info;
2262	const vaddr_t lock_page = uvm_pageboot_alloc(PAGE_SIZE);
2263	vaddr_t lock_va = lock_page;
2264	if (sizeof(kmutex_t) > cache_line_size) {
2265		cache_line_size = roundup2(sizeof(kmutex_t), cache_line_size);
2266	}
2267	const size_t nlocks = PAGE_SIZE / cache_line_size;
2268	KASSERT((nlocks & (nlocks - 1)) == 0);
2269	/*
2270	 * Now divide the page into a number of mutexes, one per cacheline.
2271	 */
2272	for (size_t i = 0; i < nlocks; lock_va += cache_line_size, i++) {
2273		kmutex_t * const lock = (kmutex_t *)lock_va;
2274		mutex_init(lock, MUTEX_DEFAULT, IPL_HIGH);
2275		pli->pli_locks[i] = lock;
2276	}
2277	pli->pli_lock_mask = nlocks - 1;
2278}
2279
2280kmutex_t *
2281pmap_pvlist_lock_addr(struct vm_page_md *mdpg)
2282{
2283	struct pmap_pvlist_info * const pli = &pmap_pvlist_info;
2284	kmutex_t *lock = mdpg->mdpg_lock;
2285
2286	/*
2287	 * Allocate a lock on an as-needed basis.  This will hopefully give us
2288	 * semi-random distribution not based on page color.
2289	 */
2290	if (__predict_false(lock == NULL)) {
2291		size_t locknum = atomic_add_int_nv(&pli->pli_lock_index, 37);
2292		size_t lockid = locknum & pli->pli_lock_mask;
2293		kmutex_t * const new_lock = pli->pli_locks[lockid];
2294		/*
2295		 * Set the lock.  If some other thread already did, just use
2296		 * the one they assigned.
2297		 */
2298		lock = atomic_cas_ptr(&mdpg->mdpg_lock, NULL, new_lock);
2299		if (lock == NULL) {
2300			lock = new_lock;
2301			atomic_inc_uint(&pli->pli_lock_refs[lockid]);
2302		}
2303	}
2304
2305	/*
2306	 * Now finally provide the lock.
2307	 */
2308	return lock;
2309}
2310#else /* !MULTIPROCESSOR */
2311void
2312pmap_pvlist_lock_init(size_t cache_line_size)
2313{
2314	mutex_init(&pmap_pvlist_mutex, MUTEX_DEFAULT, IPL_HIGH);
2315}
2316
2317#ifdef MODULAR
2318kmutex_t *
2319pmap_pvlist_lock_addr(struct vm_page_md *mdpg)
2320{
2321	/*
2322	 * We just use a global lock.
2323	 */
2324	if (__predict_false(mdpg->mdpg_lock == NULL)) {
2325		mdpg->mdpg_lock = &pmap_pvlist_mutex;
2326	}
2327
2328	/*
2329	 * Now finally provide the lock.
2330	 */
2331	return mdpg->mdpg_lock;
2332}
2333#endif /* MODULAR */
2334#endif /* !MULTIPROCESSOR */
2335
2336/*
2337 * pmap_pv_page_alloc:
2338 *
2339 *	Allocate a page for the pv_entry pool.
2340 */
2341void *
2342pmap_pv_page_alloc(struct pool *pp, int flags)
2343{
2344	struct vm_page * const pg = pmap_md_alloc_poolpage(UVM_PGA_USERESERVE);
2345	if (pg == NULL)
2346		return NULL;
2347
2348	return (void *)pmap_md_map_poolpage(VM_PAGE_TO_PHYS(pg), PAGE_SIZE);
2349}
2350
2351/*
2352 * pmap_pv_page_free:
2353 *
2354 *	Free a pv_entry pool page.
2355 */
2356void
2357pmap_pv_page_free(struct pool *pp, void *v)
2358{
2359	vaddr_t va = (vaddr_t)v;
2360
2361	KASSERT(pmap_md_direct_mapped_vaddr_p(va));
2362	const paddr_t pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
2363	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2364	KASSERT(pg != NULL);
2365#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2366	kpreempt_disable();
2367	pmap_md_vca_remove(pg, va, true, true);
2368	kpreempt_enable();
2369#endif
2370	pmap_page_clear_attributes(VM_PAGE_TO_MD(pg), VM_PAGEMD_POOLPAGE);
2371	KASSERT(!VM_PAGEMD_EXECPAGE_P(VM_PAGE_TO_MD(pg)));
2372	uvm_pagefree(pg);
2373}
2374
2375#ifdef PMAP_PREFER
2376/*
2377 * Find first virtual address >= *vap that doesn't cause
2378 * a cache alias conflict.
2379 */
2380void
2381pmap_prefer(vaddr_t foff, vaddr_t *vap, vsize_t sz, int td)
2382{
2383	vsize_t prefer_mask = ptoa(uvmexp.colormask);
2384
2385	PMAP_COUNT(prefer_requests);
2386
2387	prefer_mask |= pmap_md_cache_prefer_mask();
2388
2389	if (prefer_mask) {
2390		vaddr_t	va = *vap;
2391		vsize_t d = (foff - va) & prefer_mask;
2392		if (d) {
2393			if (td)
2394				*vap = trunc_page(va - ((-d) & prefer_mask));
2395			else
2396				*vap = round_page(va + d);
2397			PMAP_COUNT(prefer_adjustments);
2398		}
2399	}
2400}
2401#endif /* PMAP_PREFER */
2402
2403#ifdef PMAP_MAP_POOLPAGE
2404vaddr_t
2405pmap_map_poolpage(paddr_t pa)
2406{
2407	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2408	KASSERT(pg);
2409
2410	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
2411	KASSERT(!VM_PAGEMD_EXECPAGE_P(mdpg));
2412
2413	pmap_page_set_attributes(mdpg, VM_PAGEMD_POOLPAGE);
2414
2415	return pmap_md_map_poolpage(pa, NBPG);
2416}
2417
2418paddr_t
2419pmap_unmap_poolpage(vaddr_t va)
2420{
2421	KASSERT(pmap_md_direct_mapped_vaddr_p(va));
2422	paddr_t pa = pmap_md_direct_mapped_vaddr_to_paddr(va);
2423
2424	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2425	KASSERT(pg != NULL);
2426	KASSERT(!VM_PAGEMD_EXECPAGE_P(VM_PAGE_TO_MD(pg)));
2427
2428	pmap_page_clear_attributes(VM_PAGE_TO_MD(pg), VM_PAGEMD_POOLPAGE);
2429	pmap_md_unmap_poolpage(va, NBPG);
2430
2431	return pa;
2432}
2433#endif /* PMAP_MAP_POOLPAGE */
2434
2435#ifdef DDB
2436void
2437pmap_db_mdpg_print(struct vm_page *pg, void (*pr)(const char *, ...) __printflike(1, 2))
2438{
2439	struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg);
2440	pv_entry_t pv = &mdpg->mdpg_first;
2441
2442	if (pv->pv_pmap == NULL) {
2443		pr(" no mappings\n");
2444		return;
2445	}
2446
2447	int lcount = 0;
2448	if (VM_PAGEMD_VMPAGE_P(mdpg)) {
2449		pr(" vmpage");
2450		lcount++;
2451	}
2452	if (VM_PAGEMD_POOLPAGE_P(mdpg)) {
2453		if (lcount != 0)
2454			pr(",");
2455		pr(" pool");
2456		lcount++;
2457	}
2458#ifdef PMAP_VIRTUAL_CACHE_ALIASES
2459	if (VM_PAGEMD_UNCACHED_P(mdpg)) {
2460		if (lcount != 0)
2461			pr(",");
2462		pr(" uncached\n");
2463	}
2464#endif
2465	pr("\n");
2466
2467	lcount = 0;
2468	if (VM_PAGEMD_REFERENCED_P(mdpg)) {
2469		pr(" referened");
2470		lcount++;
2471	}
2472	if (VM_PAGEMD_MODIFIED_P(mdpg)) {
2473		if (lcount != 0)
2474			pr(",");
2475		pr(" modified");
2476		lcount++;
2477	}
2478	if (VM_PAGEMD_EXECPAGE_P(mdpg)) {
2479		if (lcount != 0)
2480			pr(",");
2481		pr(" exec");
2482		lcount++;
2483	}
2484	pr("\n");
2485
2486	for (size_t i = 0; pv != NULL; pv = pv->pv_next) {
2487		pr("  pv[%zu] pv=%p\n", i, pv);
2488		pr("    pv[%zu].pv_pmap = %p", i, pv->pv_pmap);
2489		pr("    pv[%zu].pv_va   = %" PRIxVADDR " (kenter=%s)\n",
2490		    i, trunc_page(pv->pv_va), PV_ISKENTER_P(pv) ? "true" : "false");
2491		i++;
2492	}
2493}
2494
2495void
2496pmap_db_pmap_print(struct pmap *pm,
2497    void (*pr)(const char *, ...) __printflike(1, 2))
2498{
2499#if defined(PMAP_HWPAGEWALKER)
2500	pr(" pm_pdetab     = %p\n", pm->pm_pdetab);
2501#endif
2502#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE)
2503	pr(" pm_segtab     = %p\n", pm->pm_segtab);
2504#endif
2505
2506	pmap_db_tlb_print(pm, pr);
2507}
2508#endif /* DDB */
2509