1281494Sandrew/*-
2281494Sandrew * Copyright (c) 1991 Regents of the University of California.
3281494Sandrew * All rights reserved.
4281494Sandrew * Copyright (c) 1994 John S. Dyson
5281494Sandrew * All rights reserved.
6281494Sandrew * Copyright (c) 1994 David Greenman
7281494Sandrew * All rights reserved.
8281494Sandrew * Copyright (c) 2003 Peter Wemm
9281494Sandrew * All rights reserved.
10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11281494Sandrew * All rights reserved.
12281494Sandrew * Copyright (c) 2014 Andrew Turner
13281494Sandrew * All rights reserved.
14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation
15281494Sandrew * All rights reserved.
16281494Sandrew *
17281494Sandrew * This code is derived from software contributed to Berkeley by
18281494Sandrew * the Systems Programming Group of the University of Utah Computer
19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc.
20281494Sandrew *
21281494Sandrew * This software was developed by Andrew Turner under sponsorship from
22281494Sandrew * the FreeBSD Foundation.
23281494Sandrew *
24281494Sandrew * Redistribution and use in source and binary forms, with or without
25281494Sandrew * modification, are permitted provided that the following conditions
26281494Sandrew * are met:
27281494Sandrew * 1. Redistributions of source code must retain the above copyright
28281494Sandrew *    notice, this list of conditions and the following disclaimer.
29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright
30281494Sandrew *    notice, this list of conditions and the following disclaimer in the
31281494Sandrew *    documentation and/or other materials provided with the distribution.
32281494Sandrew * 3. All advertising materials mentioning features or use of this software
33281494Sandrew *    must display the following acknowledgement:
34281494Sandrew *	This product includes software developed by the University of
35281494Sandrew *	California, Berkeley and its contributors.
36281494Sandrew * 4. Neither the name of the University nor the names of its contributors
37281494Sandrew *    may be used to endorse or promote products derived from this software
38281494Sandrew *    without specific prior written permission.
39281494Sandrew *
40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43281494Sandrew * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50281494Sandrew * SUCH DAMAGE.
51281494Sandrew *
52281494Sandrew *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
53281494Sandrew */
54281494Sandrew/*-
55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc.
56281494Sandrew * All rights reserved.
57281494Sandrew *
58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder,
59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the
60281494Sandrew * Security Research Division of Network Associates, Inc. under
61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62281494Sandrew * CHATS research program.
63281494Sandrew *
64281494Sandrew * Redistribution and use in source and binary forms, with or without
65281494Sandrew * modification, are permitted provided that the following conditions
66281494Sandrew * are met:
67281494Sandrew * 1. Redistributions of source code must retain the above copyright
68281494Sandrew *    notice, this list of conditions and the following disclaimer.
69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright
70281494Sandrew *    notice, this list of conditions and the following disclaimer in the
71281494Sandrew *    documentation and/or other materials provided with the distribution.
72281494Sandrew *
73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76281494Sandrew * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83281494Sandrew * SUCH DAMAGE.
84281494Sandrew */
85281494Sandrew
86281494Sandrew#include <sys/cdefs.h>
87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 338484 2018-09-05 21:28:33Z kib $");
88281494Sandrew
89281494Sandrew/*
90281494Sandrew *	Manages physical address maps.
91281494Sandrew *
92281494Sandrew *	Since the information managed by this module is
93281494Sandrew *	also stored by the logical address mapping module,
94281494Sandrew *	this module may throw away valid virtual-to-physical
95281494Sandrew *	mappings at almost any time.  However, invalidations
96281494Sandrew *	of virtual-to-physical mappings must be done as
97281494Sandrew *	requested.
98281494Sandrew *
99281494Sandrew *	In order to cope with hardware architectures which
100281494Sandrew *	make virtual-to-physical map invalidates expensive,
101281494Sandrew *	this module may delay invalidate or reduced protection
102281494Sandrew *	operations until such time as they are actually
103281494Sandrew *	necessary.  This module is given full information as
104281494Sandrew *	to which processors are currently using which maps,
105281494Sandrew *	and to when physical maps must be made correct.
106281494Sandrew */
107281494Sandrew
108325238Smarkj#include "opt_vm.h"
109325238Smarkj
110281494Sandrew#include <sys/param.h>
111305882Sandrew#include <sys/bitstring.h>
112281494Sandrew#include <sys/bus.h>
113281494Sandrew#include <sys/systm.h>
114281494Sandrew#include <sys/kernel.h>
115281494Sandrew#include <sys/ktr.h>
116281494Sandrew#include <sys/lock.h>
117281494Sandrew#include <sys/malloc.h>
118281494Sandrew#include <sys/mman.h>
119281494Sandrew#include <sys/msgbuf.h>
120281494Sandrew#include <sys/mutex.h>
121281494Sandrew#include <sys/proc.h>
122281494Sandrew#include <sys/rwlock.h>
123281494Sandrew#include <sys/sx.h>
124281494Sandrew#include <sys/vmem.h>
125281494Sandrew#include <sys/vmmeter.h>
126281494Sandrew#include <sys/sched.h>
127281494Sandrew#include <sys/sysctl.h>
128281494Sandrew#include <sys/_unrhdr.h>
129281494Sandrew#include <sys/smp.h>
130281494Sandrew
131281494Sandrew#include <vm/vm.h>
132281494Sandrew#include <vm/vm_param.h>
133281494Sandrew#include <vm/vm_kern.h>
134281494Sandrew#include <vm/vm_page.h>
135281494Sandrew#include <vm/vm_map.h>
136281494Sandrew#include <vm/vm_object.h>
137281494Sandrew#include <vm/vm_extern.h>
138281494Sandrew#include <vm/vm_pageout.h>
139281494Sandrew#include <vm/vm_pager.h>
140305882Sandrew#include <vm/vm_phys.h>
141281494Sandrew#include <vm/vm_radix.h>
142281494Sandrew#include <vm/vm_reserv.h>
143281494Sandrew#include <vm/uma.h>
144281494Sandrew
145281494Sandrew#include <machine/machdep.h>
146281494Sandrew#include <machine/md_var.h>
147281494Sandrew#include <machine/pcb.h>
148281494Sandrew
149297446Sandrew#define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
150297446Sandrew#define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
151297446Sandrew#define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
152297446Sandrew#define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
153281494Sandrew
154297446Sandrew#define	NUL0E		L0_ENTRIES
155297446Sandrew#define	NUL1E		(NUL0E * NL1PG)
156297446Sandrew#define	NUL2E		(NUL1E * NL2PG)
157297446Sandrew
158281494Sandrew#if !defined(DIAGNOSTIC)
159281494Sandrew#ifdef __GNUC_GNU_INLINE__
160281494Sandrew#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
161281494Sandrew#else
162281494Sandrew#define PMAP_INLINE	extern inline
163281494Sandrew#endif
164281494Sandrew#else
165281494Sandrew#define PMAP_INLINE
166281494Sandrew#endif
167281494Sandrew
168281494Sandrew/*
169281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S
170281494Sandrew */
171281494Sandrew#define	DEVICE_MEMORY	0
172281494Sandrew#define	UNCACHED_MEMORY	1
173281494Sandrew#define	CACHED_MEMORY	2
174281494Sandrew
175281494Sandrew
176281494Sandrew#ifdef PV_STATS
177281494Sandrew#define PV_STAT(x)	do { x ; } while (0)
178281494Sandrew#else
179281494Sandrew#define PV_STAT(x)	do { } while (0)
180281494Sandrew#endif
181281494Sandrew
182281494Sandrew#define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
183305882Sandrew#define	pa_to_pvh(pa)		(&pv_table[pmap_l2_pindex(pa)])
184281494Sandrew
185281494Sandrew#define	NPV_LIST_LOCKS	MAXCPU
186281494Sandrew
187281494Sandrew#define	PHYS_TO_PV_LIST_LOCK(pa)	\
188281494Sandrew			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
189281494Sandrew
190281494Sandrew#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
191281494Sandrew	struct rwlock **_lockp = (lockp);		\
192281494Sandrew	struct rwlock *_new_lock;			\
193281494Sandrew							\
194281494Sandrew	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
195281494Sandrew	if (_new_lock != *_lockp) {			\
196281494Sandrew		if (*_lockp != NULL)			\
197281494Sandrew			rw_wunlock(*_lockp);		\
198281494Sandrew		*_lockp = _new_lock;			\
199281494Sandrew		rw_wlock(*_lockp);			\
200281494Sandrew	}						\
201281494Sandrew} while (0)
202281494Sandrew
203281494Sandrew#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
204281494Sandrew			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
205281494Sandrew
206281494Sandrew#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
207281494Sandrew	struct rwlock **_lockp = (lockp);		\
208281494Sandrew							\
209281494Sandrew	if (*_lockp != NULL) {				\
210281494Sandrew		rw_wunlock(*_lockp);			\
211281494Sandrew		*_lockp = NULL;				\
212281494Sandrew	}						\
213281494Sandrew} while (0)
214281494Sandrew
215281494Sandrew#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
216281494Sandrew			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
217281494Sandrew
218281494Sandrewstruct pmap kernel_pmap_store;
219281494Sandrew
220281494Sandrewvm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
221281494Sandrewvm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
222281494Sandrewvm_offset_t kernel_vm_end = 0;
223281494Sandrew
224281494Sandrewstruct msgbuf *msgbufp = NULL;
225281494Sandrew
226305882Sandrew/*
227305882Sandrew * Data for the pv entry allocation mechanism.
228305882Sandrew * Updates to pv_invl_gen are protected by the pv_list_locks[]
229305882Sandrew * elements, but reads are not.
230305882Sandrew */
231305882Sandrewstatic struct md_page *pv_table;
232305882Sandrewstatic struct md_page pv_dummy;
233305882Sandrew
234291246Sandrewvm_paddr_t dmap_phys_base;	/* The start of the dmap region */
235297958Sandrewvm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
236297958Sandrewvm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
237291246Sandrew
238297914Sandrew/* This code assumes all L1 DMAP entries will be used */
239297914SandrewCTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
240297914SandrewCTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
241297914Sandrew
242297914Sandrew#define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
243297914Sandrewextern pt_entry_t pagetable_dmap[];
244297914Sandrew
245305882Sandrewstatic SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
246305882Sandrew
247305882Sandrewstatic int superpages_enabled = 0;
248305882SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
249305882Sandrew    CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
250305882Sandrew    "Are large page mappings enabled?");
251305882Sandrew
252281494Sandrew/*
253281494Sandrew * Data for the pv entry allocation mechanism
254281494Sandrew */
255281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
256281494Sandrewstatic struct mtx pv_chunks_mutex;
257281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS];
258281494Sandrew
259281494Sandrewstatic void	free_pv_chunk(struct pv_chunk *pc);
260281494Sandrewstatic void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
261281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
262281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
263281494Sandrewstatic void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
264281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
265281494Sandrew		    vm_offset_t va);
266305882Sandrew
267305882Sandrewstatic int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode);
268305882Sandrewstatic int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
269305882Sandrewstatic pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
270305882Sandrewstatic pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
271305882Sandrew    vm_offset_t va, struct rwlock **lockp);
272305882Sandrewstatic pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
273281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
274281494Sandrew    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
275281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
276281494Sandrew    pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
277281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
278281494Sandrew    vm_page_t m, struct rwlock **lockp);
279281494Sandrew
280281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
281281494Sandrew		struct rwlock **lockp);
282281494Sandrew
283281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
284281494Sandrew    struct spglist *free);
285281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
286281494Sandrew
287288445Sandrew/*
288288445Sandrew * These load the old table data and store the new value.
289288445Sandrew * They need to be atomic as the System MMU may write to the table at
290288445Sandrew * the same time as the CPU.
291288445Sandrew */
292288445Sandrew#define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
293288445Sandrew#define	pmap_set(table, mask) atomic_set_64(table, mask)
294288445Sandrew#define	pmap_load_clear(table) atomic_swap_64(table, 0)
295288445Sandrew#define	pmap_load(table) (*table)
296288445Sandrew
297281494Sandrew/********************/
298281494Sandrew/* Inline functions */
299281494Sandrew/********************/
300281494Sandrew
301281494Sandrewstatic __inline void
302281494Sandrewpagecopy(void *s, void *d)
303281494Sandrew{
304281494Sandrew
305281494Sandrew	memcpy(d, s, PAGE_SIZE);
306281494Sandrew}
307281494Sandrew
308297446Sandrew#define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & L0_ADDR_MASK)
309281494Sandrew#define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
310281494Sandrew#define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
311281494Sandrew#define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
312281494Sandrew
313281494Sandrewstatic __inline pd_entry_t *
314297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va)
315297446Sandrew{
316297446Sandrew
317297446Sandrew	return (&pmap->pm_l0[pmap_l0_index(va)]);
318297446Sandrew}
319297446Sandrew
320297446Sandrewstatic __inline pd_entry_t *
321297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
322297446Sandrew{
323297446Sandrew	pd_entry_t *l1;
324297446Sandrew
325297446Sandrew	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
326297446Sandrew	return (&l1[pmap_l1_index(va)]);
327297446Sandrew}
328297446Sandrew
329297446Sandrewstatic __inline pd_entry_t *
330281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va)
331281494Sandrew{
332297446Sandrew	pd_entry_t *l0;
333281494Sandrew
334297446Sandrew	l0 = pmap_l0(pmap, va);
335297446Sandrew	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
336297446Sandrew		return (NULL);
337297446Sandrew
338297446Sandrew	return (pmap_l0_to_l1(l0, va));
339281494Sandrew}
340281494Sandrew
341281494Sandrewstatic __inline pd_entry_t *
342281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
343281494Sandrew{
344281494Sandrew	pd_entry_t *l2;
345281494Sandrew
346288445Sandrew	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
347281494Sandrew	return (&l2[pmap_l2_index(va)]);
348281494Sandrew}
349281494Sandrew
350281494Sandrewstatic __inline pd_entry_t *
351281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va)
352281494Sandrew{
353281494Sandrew	pd_entry_t *l1;
354281494Sandrew
355281494Sandrew	l1 = pmap_l1(pmap, va);
356288445Sandrew	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
357281494Sandrew		return (NULL);
358281494Sandrew
359281494Sandrew	return (pmap_l1_to_l2(l1, va));
360281494Sandrew}
361281494Sandrew
362281494Sandrewstatic __inline pt_entry_t *
363281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
364281494Sandrew{
365281494Sandrew	pt_entry_t *l3;
366281494Sandrew
367288445Sandrew	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
368281494Sandrew	return (&l3[pmap_l3_index(va)]);
369281494Sandrew}
370281494Sandrew
371297446Sandrew/*
372297446Sandrew * Returns the lowest valid pde for a given virtual address.
373297446Sandrew * The next level may or may not point to a valid page or block.
374297446Sandrew */
375297446Sandrewstatic __inline pd_entry_t *
376297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level)
377297446Sandrew{
378297446Sandrew	pd_entry_t *l0, *l1, *l2, desc;
379297446Sandrew
380297446Sandrew	l0 = pmap_l0(pmap, va);
381297446Sandrew	desc = pmap_load(l0) & ATTR_DESCR_MASK;
382297446Sandrew	if (desc != L0_TABLE) {
383297446Sandrew		*level = -1;
384297446Sandrew		return (NULL);
385297446Sandrew	}
386297446Sandrew
387297446Sandrew	l1 = pmap_l0_to_l1(l0, va);
388297446Sandrew	desc = pmap_load(l1) & ATTR_DESCR_MASK;
389297446Sandrew	if (desc != L1_TABLE) {
390297446Sandrew		*level = 0;
391297446Sandrew		return (l0);
392297446Sandrew	}
393297446Sandrew
394297446Sandrew	l2 = pmap_l1_to_l2(l1, va);
395297446Sandrew	desc = pmap_load(l2) & ATTR_DESCR_MASK;
396297446Sandrew	if (desc != L2_TABLE) {
397297446Sandrew		*level = 1;
398297446Sandrew		return (l1);
399297446Sandrew	}
400297446Sandrew
401297446Sandrew	*level = 2;
402297446Sandrew	return (l2);
403297446Sandrew}
404297446Sandrew
405297446Sandrew/*
406297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual
407297446Sandrew * address. If there are no valid entries return NULL and set the level to
408297446Sandrew * the first invalid level.
409297446Sandrew */
410281494Sandrewstatic __inline pt_entry_t *
411297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level)
412281494Sandrew{
413297446Sandrew	pd_entry_t *l1, *l2, desc;
414297446Sandrew	pt_entry_t *l3;
415281494Sandrew
416297446Sandrew	l1 = pmap_l1(pmap, va);
417297446Sandrew	if (l1 == NULL) {
418297446Sandrew		*level = 0;
419281494Sandrew		return (NULL);
420297446Sandrew	}
421297446Sandrew	desc = pmap_load(l1) & ATTR_DESCR_MASK;
422297446Sandrew	if (desc == L1_BLOCK) {
423297446Sandrew		*level = 1;
424297446Sandrew		return (l1);
425297446Sandrew	}
426281494Sandrew
427297446Sandrew	if (desc != L1_TABLE) {
428297446Sandrew		*level = 1;
429297446Sandrew		return (NULL);
430297446Sandrew	}
431297446Sandrew
432297446Sandrew	l2 = pmap_l1_to_l2(l1, va);
433297446Sandrew	desc = pmap_load(l2) & ATTR_DESCR_MASK;
434297446Sandrew	if (desc == L2_BLOCK) {
435297446Sandrew		*level = 2;
436297446Sandrew		return (l2);
437297446Sandrew	}
438297446Sandrew
439297446Sandrew	if (desc != L2_TABLE) {
440297446Sandrew		*level = 2;
441297446Sandrew		return (NULL);
442297446Sandrew	}
443297446Sandrew
444297446Sandrew	*level = 3;
445297446Sandrew	l3 = pmap_l2_to_l3(l2, va);
446297446Sandrew	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
447297446Sandrew		return (NULL);
448297446Sandrew
449297446Sandrew	return (l3);
450281494Sandrew}
451281494Sandrew
452305882Sandrewstatic inline bool
453305882Sandrewpmap_superpages_enabled(void)
454305882Sandrew{
455305882Sandrew
456305882Sandrew	return (superpages_enabled != 0);
457305882Sandrew}
458305882Sandrew
459286956Sandrewbool
460297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
461297446Sandrew    pd_entry_t **l2, pt_entry_t **l3)
462286956Sandrew{
463297446Sandrew	pd_entry_t *l0p, *l1p, *l2p;
464286956Sandrew
465297446Sandrew	if (pmap->pm_l0 == NULL)
466286956Sandrew		return (false);
467286956Sandrew
468297446Sandrew	l0p = pmap_l0(pmap, va);
469297446Sandrew	*l0 = l0p;
470297446Sandrew
471297446Sandrew	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
472297446Sandrew		return (false);
473297446Sandrew
474297446Sandrew	l1p = pmap_l0_to_l1(l0p, va);
475286956Sandrew	*l1 = l1p;
476286956Sandrew
477288445Sandrew	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
478286956Sandrew		*l2 = NULL;
479286956Sandrew		*l3 = NULL;
480286956Sandrew		return (true);
481286956Sandrew	}
482286956Sandrew
483288445Sandrew	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
484286956Sandrew		return (false);
485286956Sandrew
486286956Sandrew	l2p = pmap_l1_to_l2(l1p, va);
487286956Sandrew	*l2 = l2p;
488286956Sandrew
489288445Sandrew	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
490286956Sandrew		*l3 = NULL;
491286956Sandrew		return (true);
492286956Sandrew	}
493286956Sandrew
494286956Sandrew	*l3 = pmap_l2_to_l3(l2p, va);
495286956Sandrew
496286956Sandrew	return (true);
497286956Sandrew}
498286956Sandrew
499281494Sandrewstatic __inline int
500281494Sandrewpmap_is_current(pmap_t pmap)
501281494Sandrew{
502281494Sandrew
503281494Sandrew	return ((pmap == pmap_kernel()) ||
504281494Sandrew	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
505281494Sandrew}
506281494Sandrew
507281494Sandrewstatic __inline int
508281494Sandrewpmap_l3_valid(pt_entry_t l3)
509281494Sandrew{
510281494Sandrew
511281494Sandrew	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
512281494Sandrew}
513281494Sandrew
514305882Sandrew
515305882Sandrew/* Is a level 1 or 2entry a valid block and cacheable */
516305882SandrewCTASSERT(L1_BLOCK == L2_BLOCK);
517281494Sandrewstatic __inline int
518305882Sandrewpmap_pte_valid_cacheable(pt_entry_t pte)
519305882Sandrew{
520305882Sandrew
521305882Sandrew	return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) &&
522305882Sandrew	    ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
523305882Sandrew}
524305882Sandrew
525305882Sandrewstatic __inline int
526281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3)
527281494Sandrew{
528281494Sandrew
529281494Sandrew	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
530281494Sandrew	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
531281494Sandrew}
532281494Sandrew
533281494Sandrew#define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
534281494Sandrew
535281494Sandrew/*
536281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on
537281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is.
538281494Sandrew */
539281494Sandrewstatic inline int
540281494Sandrewpmap_page_dirty(pt_entry_t pte)
541281494Sandrew{
542281494Sandrew
543281494Sandrew	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
544281494Sandrew	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
545281494Sandrew}
546281494Sandrew
547281494Sandrewstatic __inline void
548281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count)
549281494Sandrew{
550281494Sandrew
551281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
552281494Sandrew	pmap->pm_stats.resident_count += count;
553281494Sandrew}
554281494Sandrew
555281494Sandrewstatic __inline void
556281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count)
557281494Sandrew{
558281494Sandrew
559281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
560281494Sandrew	KASSERT(pmap->pm_stats.resident_count >= count,
561281494Sandrew	    ("pmap %p resident count underflow %ld %d", pmap,
562281494Sandrew	    pmap->pm_stats.resident_count, count));
563281494Sandrew	pmap->pm_stats.resident_count -= count;
564281494Sandrew}
565281494Sandrew
566281494Sandrewstatic pt_entry_t *
567281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
568281494Sandrew    u_int *l2_slot)
569281494Sandrew{
570281494Sandrew	pt_entry_t *l2;
571281494Sandrew	pd_entry_t *l1;
572281494Sandrew
573281494Sandrew	l1 = (pd_entry_t *)l1pt;
574281494Sandrew	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
575281494Sandrew
576281494Sandrew	/* Check locore has used a table L1 map */
577281494Sandrew	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
578281494Sandrew	   ("Invalid bootstrap L1 table"));
579281494Sandrew	/* Find the address of the L2 table */
580281494Sandrew	l2 = (pt_entry_t *)init_pt_va;
581281494Sandrew	*l2_slot = pmap_l2_index(va);
582281494Sandrew
583281494Sandrew	return (l2);
584281494Sandrew}
585281494Sandrew
586281494Sandrewstatic vm_paddr_t
587281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
588281494Sandrew{
589281494Sandrew	u_int l1_slot, l2_slot;
590281494Sandrew	pt_entry_t *l2;
591281494Sandrew
592281494Sandrew	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
593281494Sandrew
594281494Sandrew	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
595281494Sandrew}
596281494Sandrew
597281494Sandrewstatic void
598297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
599281494Sandrew{
600281494Sandrew	vm_offset_t va;
601281494Sandrew	vm_paddr_t pa;
602281494Sandrew	u_int l1_slot;
603281494Sandrew
604297958Sandrew	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
605281494Sandrew	va = DMAP_MIN_ADDRESS;
606297958Sandrew	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
607281494Sandrew	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
608297914Sandrew		l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
609281494Sandrew
610297914Sandrew		pmap_load_store(&pagetable_dmap[l1_slot],
611319203Sandrew		    (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN |
612285537Sandrew		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
613281494Sandrew	}
614281494Sandrew
615297958Sandrew	/* Set the upper limit of the DMAP region */
616297958Sandrew	dmap_phys_max = pa;
617297958Sandrew	dmap_max_addr = va;
618297958Sandrew
619297914Sandrew	cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
620297914Sandrew	    PAGE_SIZE * DMAP_TABLES);
621281494Sandrew	cpu_tlb_flushID();
622281494Sandrew}
623281494Sandrew
624281494Sandrewstatic vm_offset_t
625281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
626281494Sandrew{
627281494Sandrew	vm_offset_t l2pt;
628281494Sandrew	vm_paddr_t pa;
629281494Sandrew	pd_entry_t *l1;
630281494Sandrew	u_int l1_slot;
631281494Sandrew
632281494Sandrew	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
633281494Sandrew
634281494Sandrew	l1 = (pd_entry_t *)l1pt;
635281494Sandrew	l1_slot = pmap_l1_index(va);
636281494Sandrew	l2pt = l2_start;
637281494Sandrew
638281494Sandrew	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
639281494Sandrew		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
640281494Sandrew
641281494Sandrew		pa = pmap_early_vtophys(l1pt, l2pt);
642281494Sandrew		pmap_load_store(&l1[l1_slot],
643281494Sandrew		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
644281494Sandrew		l2pt += PAGE_SIZE;
645281494Sandrew	}
646281494Sandrew
647281494Sandrew	/* Clean the L2 page table */
648281494Sandrew	memset((void *)l2_start, 0, l2pt - l2_start);
649281494Sandrew	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
650281494Sandrew
651281494Sandrew	/* Flush the l1 table to ram */
652281494Sandrew	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
653281494Sandrew
654281494Sandrew	return l2pt;
655281494Sandrew}
656281494Sandrew
657281494Sandrewstatic vm_offset_t
658281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
659281494Sandrew{
660281494Sandrew	vm_offset_t l2pt, l3pt;
661281494Sandrew	vm_paddr_t pa;
662281494Sandrew	pd_entry_t *l2;
663281494Sandrew	u_int l2_slot;
664281494Sandrew
665281494Sandrew	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
666281494Sandrew
667281494Sandrew	l2 = pmap_l2(kernel_pmap, va);
668298433Spfg	l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
669281494Sandrew	l2pt = (vm_offset_t)l2;
670281494Sandrew	l2_slot = pmap_l2_index(va);
671281494Sandrew	l3pt = l3_start;
672281494Sandrew
673281494Sandrew	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
674281494Sandrew		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
675281494Sandrew
676281494Sandrew		pa = pmap_early_vtophys(l1pt, l3pt);
677281494Sandrew		pmap_load_store(&l2[l2_slot],
678281494Sandrew		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
679281494Sandrew		l3pt += PAGE_SIZE;
680281494Sandrew	}
681281494Sandrew
682281494Sandrew	/* Clean the L2 page table */
683281494Sandrew	memset((void *)l3_start, 0, l3pt - l3_start);
684281494Sandrew	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
685281494Sandrew
686281494Sandrew	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
687281494Sandrew
688281494Sandrew	return l3pt;
689281494Sandrew}
690281494Sandrew
691281494Sandrew/*
692281494Sandrew *	Bootstrap the system enough to run with virtual memory.
693281494Sandrew */
694281494Sandrewvoid
695297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
696297446Sandrew    vm_size_t kernlen)
697281494Sandrew{
698281494Sandrew	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
699281494Sandrew	uint64_t kern_delta;
700281494Sandrew	pt_entry_t *l2;
701281494Sandrew	vm_offset_t va, freemempos;
702281494Sandrew	vm_offset_t dpcpu, msgbufpv;
703297958Sandrew	vm_paddr_t pa, max_pa, min_pa;
704291246Sandrew	int i;
705281494Sandrew
706281494Sandrew	kern_delta = KERNBASE - kernstart;
707281494Sandrew	physmem = 0;
708281494Sandrew
709281494Sandrew	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
710281494Sandrew	printf("%lx\n", l1pt);
711281494Sandrew	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
712281494Sandrew
713281494Sandrew	/* Set this early so we can use the pagetable walking functions */
714297446Sandrew	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
715281494Sandrew	PMAP_LOCK_INIT(kernel_pmap);
716281494Sandrew
717291246Sandrew	/* Assume the address we were loaded to is a valid physical address */
718297958Sandrew	min_pa = max_pa = KERNBASE - kern_delta;
719291246Sandrew
720291246Sandrew	/*
721291246Sandrew	 * Find the minimum physical address. physmap is sorted,
722291246Sandrew	 * but may contain empty ranges.
723291246Sandrew	 */
724291246Sandrew	for (i = 0; i < (physmap_idx * 2); i += 2) {
725291246Sandrew		if (physmap[i] == physmap[i + 1])
726291246Sandrew			continue;
727291246Sandrew		if (physmap[i] <= min_pa)
728291246Sandrew			min_pa = physmap[i];
729297958Sandrew		if (physmap[i + 1] > max_pa)
730297958Sandrew			max_pa = physmap[i + 1];
731291246Sandrew	}
732291246Sandrew
733281494Sandrew	/* Create a direct map region early so we can use it for pa -> va */
734297958Sandrew	pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
735281494Sandrew
736281494Sandrew	va = KERNBASE;
737281494Sandrew	pa = KERNBASE - kern_delta;
738281494Sandrew
739281494Sandrew	/*
740281494Sandrew	 * Start to initialise phys_avail by copying from physmap
741281494Sandrew	 * up to the physical address KERNBASE points at.
742281494Sandrew	 */
743281494Sandrew	map_slot = avail_slot = 0;
744295157Sandrew	for (; map_slot < (physmap_idx * 2) &&
745295157Sandrew	    avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
746281494Sandrew		if (physmap[map_slot] == physmap[map_slot + 1])
747281494Sandrew			continue;
748281494Sandrew
749281494Sandrew		if (physmap[map_slot] <= pa &&
750281494Sandrew		    physmap[map_slot + 1] > pa)
751281494Sandrew			break;
752281494Sandrew
753281494Sandrew		phys_avail[avail_slot] = physmap[map_slot];
754281494Sandrew		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
755281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
756281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
757281494Sandrew		avail_slot += 2;
758281494Sandrew	}
759281494Sandrew
760281494Sandrew	/* Add the memory before the kernel */
761295157Sandrew	if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
762281494Sandrew		phys_avail[avail_slot] = physmap[map_slot];
763281494Sandrew		phys_avail[avail_slot + 1] = pa;
764281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
765281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
766281494Sandrew		avail_slot += 2;
767281494Sandrew	}
768281494Sandrew	used_map_slot = map_slot;
769281494Sandrew
770281494Sandrew	/*
771281494Sandrew	 * Read the page table to find out what is already mapped.
772281494Sandrew	 * This assumes we have mapped a block of memory from KERNBASE
773281494Sandrew	 * using a single L1 entry.
774281494Sandrew	 */
775281494Sandrew	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
776281494Sandrew
777281494Sandrew	/* Sanity check the index, KERNBASE should be the first VA */
778281494Sandrew	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
779281494Sandrew
780281494Sandrew	/* Find how many pages we have mapped */
781281494Sandrew	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
782281494Sandrew		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
783281494Sandrew			break;
784281494Sandrew
785281494Sandrew		/* Check locore used L2 blocks */
786281494Sandrew		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
787281494Sandrew		    ("Invalid bootstrap L2 table"));
788281494Sandrew		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
789281494Sandrew		    ("Incorrect PA in L2 table"));
790281494Sandrew
791281494Sandrew		va += L2_SIZE;
792281494Sandrew		pa += L2_SIZE;
793281494Sandrew	}
794281494Sandrew
795281494Sandrew	va = roundup2(va, L1_SIZE);
796281494Sandrew
797281494Sandrew	freemempos = KERNBASE + kernlen;
798281494Sandrew	freemempos = roundup2(freemempos, PAGE_SIZE);
799281494Sandrew	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
800281494Sandrew	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
801281494Sandrew	/* And the l3 tables for the early devmap */
802281494Sandrew	freemempos = pmap_bootstrap_l3(l1pt,
803281494Sandrew	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
804281494Sandrew
805281494Sandrew	cpu_tlb_flushID();
806281494Sandrew
807281494Sandrew#define alloc_pages(var, np)						\
808281494Sandrew	(var) = freemempos;						\
809281494Sandrew	freemempos += (np * PAGE_SIZE);					\
810281494Sandrew	memset((char *)(var), 0, ((np) * PAGE_SIZE));
811281494Sandrew
812281494Sandrew	/* Allocate dynamic per-cpu area. */
813281494Sandrew	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
814281494Sandrew	dpcpu_init((void *)dpcpu, 0);
815281494Sandrew
816281494Sandrew	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
817281494Sandrew	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
818281494Sandrew	msgbufp = (void *)msgbufpv;
819281494Sandrew
820281494Sandrew	virtual_avail = roundup2(freemempos, L1_SIZE);
821281494Sandrew	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
822281494Sandrew	kernel_vm_end = virtual_avail;
823305531Sandrew
824281494Sandrew	pa = pmap_early_vtophys(l1pt, freemempos);
825281494Sandrew
826281494Sandrew	/* Finish initialising physmap */
827281494Sandrew	map_slot = used_map_slot;
828281494Sandrew	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
829281494Sandrew	    map_slot < (physmap_idx * 2); map_slot += 2) {
830281494Sandrew		if (physmap[map_slot] == physmap[map_slot + 1])
831281494Sandrew			continue;
832281494Sandrew
833281494Sandrew		/* Have we used the current range? */
834281494Sandrew		if (physmap[map_slot + 1] <= pa)
835281494Sandrew			continue;
836281494Sandrew
837281494Sandrew		/* Do we need to split the entry? */
838281494Sandrew		if (physmap[map_slot] < pa) {
839281494Sandrew			phys_avail[avail_slot] = pa;
840281494Sandrew			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
841281494Sandrew		} else {
842281494Sandrew			phys_avail[avail_slot] = physmap[map_slot];
843281494Sandrew			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
844281494Sandrew		}
845281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
846281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
847281494Sandrew
848281494Sandrew		avail_slot += 2;
849281494Sandrew	}
850281494Sandrew	phys_avail[avail_slot] = 0;
851281494Sandrew	phys_avail[avail_slot + 1] = 0;
852281494Sandrew
853281494Sandrew	/*
854281494Sandrew	 * Maxmem isn't the "maximum memory", it's one larger than the
855281494Sandrew	 * highest page of the physical address space.  It should be
856281494Sandrew	 * called something like "Maxphyspage".
857281494Sandrew	 */
858281494Sandrew	Maxmem = atop(phys_avail[avail_slot - 1]);
859281494Sandrew
860281494Sandrew	cpu_tlb_flushID();
861281494Sandrew}
862281494Sandrew
863281494Sandrew/*
864281494Sandrew *	Initialize a vm_page's machine-dependent fields.
865281494Sandrew */
866281494Sandrewvoid
867281494Sandrewpmap_page_init(vm_page_t m)
868281494Sandrew{
869281494Sandrew
870281494Sandrew	TAILQ_INIT(&m->md.pv_list);
871281494Sandrew	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
872281494Sandrew}
873281494Sandrew
874281494Sandrew/*
875281494Sandrew *	Initialize the pmap module.
876281494Sandrew *	Called by vm_init, to initialize any structures that the pmap
877281494Sandrew *	system needs to map virtual memory.
878281494Sandrew */
879281494Sandrewvoid
880281494Sandrewpmap_init(void)
881281494Sandrew{
882305882Sandrew	vm_size_t s;
883305882Sandrew	int i, pv_npg;
884281494Sandrew
885281494Sandrew	/*
886305882Sandrew	 * Are large page mappings enabled?
887305882Sandrew	 */
888305882Sandrew	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
889305882Sandrew
890305882Sandrew	/*
891281494Sandrew	 * Initialize the pv chunk list mutex.
892281494Sandrew	 */
893281494Sandrew	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
894281494Sandrew
895281494Sandrew	/*
896281494Sandrew	 * Initialize the pool of pv list locks.
897281494Sandrew	 */
898281494Sandrew	for (i = 0; i < NPV_LIST_LOCKS; i++)
899281494Sandrew		rw_init(&pv_list_locks[i], "pmap pv list");
900305882Sandrew
901305882Sandrew	/*
902305882Sandrew	 * Calculate the size of the pv head table for superpages.
903305882Sandrew	 */
904305882Sandrew	pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
905305882Sandrew
906305882Sandrew	/*
907305882Sandrew	 * Allocate memory for the pv head table for superpages.
908305882Sandrew	 */
909305882Sandrew	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
910305882Sandrew	s = round_page(s);
911305882Sandrew	pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
912305882Sandrew	    M_WAITOK | M_ZERO);
913305882Sandrew	for (i = 0; i < pv_npg; i++)
914305882Sandrew		TAILQ_INIT(&pv_table[i].pv_list);
915305882Sandrew	TAILQ_INIT(&pv_dummy.pv_list);
916281494Sandrew}
917281494Sandrew
918305882Sandrewstatic SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0,
919305882Sandrew    "2MB page mapping counters");
920305882Sandrew
921305882Sandrewstatic u_long pmap_l2_demotions;
922305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
923305882Sandrew    &pmap_l2_demotions, 0, "2MB page demotions");
924305882Sandrew
925305882Sandrewstatic u_long pmap_l2_p_failures;
926305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
927305882Sandrew    &pmap_l2_p_failures, 0, "2MB page promotion failures");
928305882Sandrew
929305882Sandrewstatic u_long pmap_l2_promotions;
930305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
931305882Sandrew    &pmap_l2_promotions, 0, "2MB page promotions");
932305882Sandrew
933281494Sandrew/*
934305540Sandrew * Invalidate a single TLB entry.
935281494Sandrew */
936281494SandrewPMAP_INLINE void
937281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va)
938281494Sandrew{
939281494Sandrew
940281494Sandrew	sched_pin();
941281494Sandrew	__asm __volatile(
942305540Sandrew	    "dsb  ishst		\n"
943281494Sandrew	    "tlbi vaae1is, %0	\n"
944305540Sandrew	    "dsb  ish		\n"
945281494Sandrew	    "isb		\n"
946281494Sandrew	    : : "r"(va >> PAGE_SHIFT));
947281494Sandrew	sched_unpin();
948281494Sandrew}
949281494Sandrew
950281494SandrewPMAP_INLINE void
951281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
952281494Sandrew{
953281494Sandrew	vm_offset_t addr;
954281494Sandrew
955281494Sandrew	sched_pin();
956305540Sandrew	dsb(ishst);
957296828Swma	for (addr = sva; addr < eva; addr += PAGE_SIZE) {
958281494Sandrew		__asm __volatile(
959296828Swma		    "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
960281494Sandrew	}
961281494Sandrew	__asm __volatile(
962305540Sandrew	    "dsb  ish	\n"
963281494Sandrew	    "isb	\n");
964281494Sandrew	sched_unpin();
965281494Sandrew}
966281494Sandrew
967281494SandrewPMAP_INLINE void
968281494Sandrewpmap_invalidate_all(pmap_t pmap)
969281494Sandrew{
970281494Sandrew
971281494Sandrew	sched_pin();
972281494Sandrew	__asm __volatile(
973305540Sandrew	    "dsb  ishst		\n"
974281494Sandrew	    "tlbi vmalle1is	\n"
975305540Sandrew	    "dsb  ish		\n"
976281494Sandrew	    "isb		\n");
977281494Sandrew	sched_unpin();
978281494Sandrew}
979281494Sandrew
980281494Sandrew/*
981281494Sandrew *	Routine:	pmap_extract
982281494Sandrew *	Function:
983281494Sandrew *		Extract the physical page address associated
984281494Sandrew *		with the given map/virtual_address pair.
985281494Sandrew */
986305531Sandrewvm_paddr_t
987281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va)
988281494Sandrew{
989297446Sandrew	pt_entry_t *pte, tpte;
990281494Sandrew	vm_paddr_t pa;
991297446Sandrew	int lvl;
992281494Sandrew
993281494Sandrew	pa = 0;
994281494Sandrew	PMAP_LOCK(pmap);
995281494Sandrew	/*
996297446Sandrew	 * Find the block or page map for this virtual address. pmap_pte
997297446Sandrew	 * will return either a valid block/page entry, or NULL.
998281494Sandrew	 */
999297446Sandrew	pte = pmap_pte(pmap, va, &lvl);
1000297446Sandrew	if (pte != NULL) {
1001297446Sandrew		tpte = pmap_load(pte);
1002297446Sandrew		pa = tpte & ~ATTR_MASK;
1003297446Sandrew		switch(lvl) {
1004297446Sandrew		case 1:
1005297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
1006297446Sandrew			    ("pmap_extract: Invalid L1 pte found: %lx",
1007297446Sandrew			    tpte & ATTR_DESCR_MASK));
1008297446Sandrew			pa |= (va & L1_OFFSET);
1009297446Sandrew			break;
1010297446Sandrew		case 2:
1011297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
1012297446Sandrew			    ("pmap_extract: Invalid L2 pte found: %lx",
1013297446Sandrew			    tpte & ATTR_DESCR_MASK));
1014297446Sandrew			pa |= (va & L2_OFFSET);
1015297446Sandrew			break;
1016297446Sandrew		case 3:
1017297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
1018297446Sandrew			    ("pmap_extract: Invalid L3 pte found: %lx",
1019297446Sandrew			    tpte & ATTR_DESCR_MASK));
1020297446Sandrew			pa |= (va & L3_OFFSET);
1021297446Sandrew			break;
1022297446Sandrew		}
1023281494Sandrew	}
1024281494Sandrew	PMAP_UNLOCK(pmap);
1025281494Sandrew	return (pa);
1026281494Sandrew}
1027281494Sandrew
1028281494Sandrew/*
1029281494Sandrew *	Routine:	pmap_extract_and_hold
1030281494Sandrew *	Function:
1031281494Sandrew *		Atomically extract and hold the physical page
1032281494Sandrew *		with the given pmap and virtual address pair
1033281494Sandrew *		if that mapping permits the given protection.
1034281494Sandrew */
1035281494Sandrewvm_page_t
1036281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1037281494Sandrew{
1038297446Sandrew	pt_entry_t *pte, tpte;
1039305882Sandrew	vm_offset_t off;
1040281494Sandrew	vm_paddr_t pa;
1041281494Sandrew	vm_page_t m;
1042297446Sandrew	int lvl;
1043281494Sandrew
1044281494Sandrew	pa = 0;
1045281494Sandrew	m = NULL;
1046281494Sandrew	PMAP_LOCK(pmap);
1047281494Sandrewretry:
1048297446Sandrew	pte = pmap_pte(pmap, va, &lvl);
1049297446Sandrew	if (pte != NULL) {
1050297446Sandrew		tpte = pmap_load(pte);
1051297446Sandrew
1052297446Sandrew		KASSERT(lvl > 0 && lvl <= 3,
1053297446Sandrew		    ("pmap_extract_and_hold: Invalid level %d", lvl));
1054297446Sandrew		CTASSERT(L1_BLOCK == L2_BLOCK);
1055297446Sandrew		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
1056297446Sandrew		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
1057297446Sandrew		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
1058297446Sandrew		     tpte & ATTR_DESCR_MASK));
1059297446Sandrew		if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
1060281494Sandrew		    ((prot & VM_PROT_WRITE) == 0)) {
1061305882Sandrew			switch(lvl) {
1062305882Sandrew			case 1:
1063305882Sandrew				off = va & L1_OFFSET;
1064305882Sandrew				break;
1065305882Sandrew			case 2:
1066305882Sandrew				off = va & L2_OFFSET;
1067305882Sandrew				break;
1068305882Sandrew			case 3:
1069305882Sandrew			default:
1070305882Sandrew				off = 0;
1071305882Sandrew			}
1072305882Sandrew			if (vm_page_pa_tryrelock(pmap,
1073305882Sandrew			    (tpte & ~ATTR_MASK) | off, &pa))
1074281494Sandrew				goto retry;
1075305882Sandrew			m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
1076281494Sandrew			vm_page_hold(m);
1077281494Sandrew		}
1078281494Sandrew	}
1079281494Sandrew	PA_UNLOCK_COND(pa);
1080281494Sandrew	PMAP_UNLOCK(pmap);
1081281494Sandrew	return (m);
1082281494Sandrew}
1083281494Sandrew
1084281494Sandrewvm_paddr_t
1085281494Sandrewpmap_kextract(vm_offset_t va)
1086281494Sandrew{
1087297446Sandrew	pt_entry_t *pte, tpte;
1088281494Sandrew	vm_paddr_t pa;
1089297446Sandrew	int lvl;
1090281494Sandrew
1091281494Sandrew	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
1092281494Sandrew		pa = DMAP_TO_PHYS(va);
1093281494Sandrew	} else {
1094297446Sandrew		pa = 0;
1095297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1096297446Sandrew		if (pte != NULL) {
1097297446Sandrew			tpte = pmap_load(pte);
1098297446Sandrew			pa = tpte & ~ATTR_MASK;
1099297446Sandrew			switch(lvl) {
1100297446Sandrew			case 1:
1101297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
1102297446Sandrew				    ("pmap_kextract: Invalid L1 pte found: %lx",
1103297446Sandrew				    tpte & ATTR_DESCR_MASK));
1104297446Sandrew				pa |= (va & L1_OFFSET);
1105297446Sandrew				break;
1106297446Sandrew			case 2:
1107297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
1108297446Sandrew				    ("pmap_kextract: Invalid L2 pte found: %lx",
1109297446Sandrew				    tpte & ATTR_DESCR_MASK));
1110297446Sandrew				pa |= (va & L2_OFFSET);
1111297446Sandrew				break;
1112297446Sandrew			case 3:
1113297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
1114297446Sandrew				    ("pmap_kextract: Invalid L3 pte found: %lx",
1115297446Sandrew				    tpte & ATTR_DESCR_MASK));
1116297446Sandrew				pa |= (va & L3_OFFSET);
1117297446Sandrew				break;
1118297446Sandrew			}
1119297446Sandrew		}
1120281494Sandrew	}
1121281494Sandrew	return (pa);
1122281494Sandrew}
1123281494Sandrew
1124281494Sandrew/***************************************************
1125281494Sandrew * Low level mapping routines.....
1126281494Sandrew ***************************************************/
1127281494Sandrew
1128305542Sandrewstatic void
1129305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
1130281494Sandrew{
1131297446Sandrew	pd_entry_t *pde;
1132319203Sandrew	pt_entry_t *pte, attr;
1133285212Sandrew	vm_offset_t va;
1134297446Sandrew	int lvl;
1135281494Sandrew
1136281494Sandrew	KASSERT((pa & L3_OFFSET) == 0,
1137305542Sandrew	   ("pmap_kenter: Invalid physical address"));
1138285212Sandrew	KASSERT((sva & L3_OFFSET) == 0,
1139305542Sandrew	   ("pmap_kenter: Invalid virtual address"));
1140281494Sandrew	KASSERT((size & PAGE_MASK) == 0,
1141305542Sandrew	    ("pmap_kenter: Mapping is not page-sized"));
1142281494Sandrew
1143319203Sandrew	attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE;
1144319203Sandrew	if (mode == DEVICE_MEMORY)
1145319203Sandrew		attr |= ATTR_XN;
1146319203Sandrew
1147285212Sandrew	va = sva;
1148281494Sandrew	while (size != 0) {
1149297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
1150297446Sandrew		KASSERT(pde != NULL,
1151305542Sandrew		    ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
1152305542Sandrew		KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
1153297446Sandrew
1154297446Sandrew		pte = pmap_l2_to_l3(pde, va);
1155319203Sandrew		pmap_load_store(pte, (pa & ~L3_OFFSET) | attr);
1156297446Sandrew		PTE_SYNC(pte);
1157281494Sandrew
1158281494Sandrew		va += PAGE_SIZE;
1159281494Sandrew		pa += PAGE_SIZE;
1160281494Sandrew		size -= PAGE_SIZE;
1161281494Sandrew	}
1162285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1163281494Sandrew}
1164281494Sandrew
1165305542Sandrewvoid
1166305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
1167305542Sandrew{
1168305542Sandrew
1169305542Sandrew	pmap_kenter(sva, size, pa, DEVICE_MEMORY);
1170305542Sandrew}
1171305542Sandrew
1172281494Sandrew/*
1173281494Sandrew * Remove a page from the kernel pagetables.
1174281494Sandrew */
1175281494SandrewPMAP_INLINE void
1176281494Sandrewpmap_kremove(vm_offset_t va)
1177281494Sandrew{
1178297446Sandrew	pt_entry_t *pte;
1179297446Sandrew	int lvl;
1180281494Sandrew
1181297446Sandrew	pte = pmap_pte(kernel_pmap, va, &lvl);
1182297446Sandrew	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
1183297446Sandrew	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
1184281494Sandrew
1185297446Sandrew	if (pmap_l3_valid_cacheable(pmap_load(pte)))
1186281494Sandrew		cpu_dcache_wb_range(va, L3_SIZE);
1187297446Sandrew	pmap_load_clear(pte);
1188297446Sandrew	PTE_SYNC(pte);
1189285212Sandrew	pmap_invalidate_page(kernel_pmap, va);
1190281494Sandrew}
1191281494Sandrew
1192281494Sandrewvoid
1193285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size)
1194281494Sandrew{
1195297446Sandrew	pt_entry_t *pte;
1196285212Sandrew	vm_offset_t va;
1197297446Sandrew	int lvl;
1198281494Sandrew
1199285212Sandrew	KASSERT((sva & L3_OFFSET) == 0,
1200281494Sandrew	   ("pmap_kremove_device: Invalid virtual address"));
1201281494Sandrew	KASSERT((size & PAGE_MASK) == 0,
1202281494Sandrew	    ("pmap_kremove_device: Mapping is not page-sized"));
1203281494Sandrew
1204285212Sandrew	va = sva;
1205281494Sandrew	while (size != 0) {
1206297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1207297446Sandrew		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
1208297446Sandrew		KASSERT(lvl == 3,
1209297446Sandrew		    ("Invalid device pagetable level: %d != 3", lvl));
1210297446Sandrew		pmap_load_clear(pte);
1211297446Sandrew		PTE_SYNC(pte);
1212281494Sandrew
1213281494Sandrew		va += PAGE_SIZE;
1214281494Sandrew		size -= PAGE_SIZE;
1215281494Sandrew	}
1216285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1217281494Sandrew}
1218281494Sandrew
1219281494Sandrew/*
1220281494Sandrew *	Used to map a range of physical addresses into kernel
1221281494Sandrew *	virtual address space.
1222281494Sandrew *
1223281494Sandrew *	The value passed in '*virt' is a suggested virtual address for
1224281494Sandrew *	the mapping. Architectures which can support a direct-mapped
1225281494Sandrew *	physical to virtual region can return the appropriate address
1226281494Sandrew *	within that region, leaving '*virt' unchanged. Other
1227281494Sandrew *	architectures should map the pages starting at '*virt' and
1228281494Sandrew *	update '*virt' with the first usable address after the mapped
1229281494Sandrew *	region.
1230281494Sandrew */
1231281494Sandrewvm_offset_t
1232281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1233281494Sandrew{
1234281494Sandrew	return PHYS_TO_DMAP(start);
1235281494Sandrew}
1236281494Sandrew
1237281494Sandrew
1238281494Sandrew/*
1239281494Sandrew * Add a list of wired pages to the kva
1240281494Sandrew * this routine is only used for temporary
1241281494Sandrew * kernel mappings that do not need to have
1242281494Sandrew * page modification or references recorded.
1243281494Sandrew * Note that old mappings are simply written
1244281494Sandrew * over.  The page *must* be wired.
1245281494Sandrew * Note: SMP coherent.  Uses a ranged shootdown IPI.
1246281494Sandrew */
1247281494Sandrewvoid
1248281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
1249281494Sandrew{
1250297446Sandrew	pd_entry_t *pde;
1251297446Sandrew	pt_entry_t *pte, pa;
1252281494Sandrew	vm_offset_t va;
1253281494Sandrew	vm_page_t m;
1254297446Sandrew	int i, lvl;
1255281494Sandrew
1256281494Sandrew	va = sva;
1257281494Sandrew	for (i = 0; i < count; i++) {
1258297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
1259297446Sandrew		KASSERT(pde != NULL,
1260297446Sandrew		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
1261297446Sandrew		KASSERT(lvl == 2,
1262297446Sandrew		    ("pmap_qenter: Invalid level %d", lvl));
1263297446Sandrew
1264281494Sandrew		m = ma[i];
1265285537Sandrew		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
1266285537Sandrew		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
1267319203Sandrew		if (m->md.pv_memattr == DEVICE_MEMORY)
1268319203Sandrew			pa |= ATTR_XN;
1269297446Sandrew		pte = pmap_l2_to_l3(pde, va);
1270297446Sandrew		pmap_load_store(pte, pa);
1271297446Sandrew		PTE_SYNC(pte);
1272281494Sandrew
1273281494Sandrew		va += L3_SIZE;
1274281494Sandrew	}
1275285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1276281494Sandrew}
1277281494Sandrew
1278281494Sandrew/*
1279281494Sandrew * This routine tears out page mappings from the
1280281494Sandrew * kernel -- it is meant only for temporary mappings.
1281281494Sandrew */
1282281494Sandrewvoid
1283281494Sandrewpmap_qremove(vm_offset_t sva, int count)
1284281494Sandrew{
1285297446Sandrew	pt_entry_t *pte;
1286281494Sandrew	vm_offset_t va;
1287297446Sandrew	int lvl;
1288281494Sandrew
1289285212Sandrew	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1290285212Sandrew
1291281494Sandrew	va = sva;
1292281494Sandrew	while (count-- > 0) {
1293297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1294297446Sandrew		KASSERT(lvl == 3,
1295297446Sandrew		    ("Invalid device pagetable level: %d != 3", lvl));
1296297446Sandrew		if (pte != NULL) {
1297297446Sandrew			if (pmap_l3_valid_cacheable(pmap_load(pte)))
1298297446Sandrew				cpu_dcache_wb_range(va, L3_SIZE);
1299297446Sandrew			pmap_load_clear(pte);
1300297446Sandrew			PTE_SYNC(pte);
1301297446Sandrew		}
1302285212Sandrew
1303281494Sandrew		va += PAGE_SIZE;
1304281494Sandrew	}
1305281494Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1306281494Sandrew}
1307281494Sandrew
1308281494Sandrew/***************************************************
1309281494Sandrew * Page table page management routines.....
1310281494Sandrew ***************************************************/
1311281494Sandrewstatic __inline void
1312281494Sandrewpmap_free_zero_pages(struct spglist *free)
1313281494Sandrew{
1314281494Sandrew	vm_page_t m;
1315281494Sandrew
1316281494Sandrew	while ((m = SLIST_FIRST(free)) != NULL) {
1317281494Sandrew		SLIST_REMOVE_HEAD(free, plinks.s.ss);
1318281494Sandrew		/* Preserve the page's PG_ZERO setting. */
1319281494Sandrew		vm_page_free_toq(m);
1320281494Sandrew	}
1321281494Sandrew}
1322281494Sandrew
1323281494Sandrew/*
1324281494Sandrew * Schedule the specified unused page table page to be freed.  Specifically,
1325281494Sandrew * add the page to the specified list of pages that will be released to the
1326281494Sandrew * physical memory manager after the TLB has been updated.
1327281494Sandrew */
1328281494Sandrewstatic __inline void
1329281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1330281494Sandrew    boolean_t set_PG_ZERO)
1331281494Sandrew{
1332281494Sandrew
1333281494Sandrew	if (set_PG_ZERO)
1334281494Sandrew		m->flags |= PG_ZERO;
1335281494Sandrew	else
1336281494Sandrew		m->flags &= ~PG_ZERO;
1337281494Sandrew	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1338281494Sandrew}
1339305531Sandrew
1340281494Sandrew/*
1341281494Sandrew * Decrements a page table page's wire count, which is used to record the
1342281494Sandrew * number of valid page table entries within the page.  If the wire count
1343281494Sandrew * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1344281494Sandrew * page table page was unmapped and FALSE otherwise.
1345281494Sandrew */
1346281494Sandrewstatic inline boolean_t
1347281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1348281494Sandrew{
1349281494Sandrew
1350281494Sandrew	--m->wire_count;
1351281494Sandrew	if (m->wire_count == 0) {
1352281494Sandrew		_pmap_unwire_l3(pmap, va, m, free);
1353281494Sandrew		return (TRUE);
1354281494Sandrew	} else
1355281494Sandrew		return (FALSE);
1356281494Sandrew}
1357281494Sandrew
1358281494Sandrewstatic void
1359281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1360281494Sandrew{
1361281494Sandrew
1362281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1363281494Sandrew	/*
1364281494Sandrew	 * unmap the page table page
1365281494Sandrew	 */
1366297446Sandrew	if (m->pindex >= (NUL2E + NUL1E)) {
1367297446Sandrew		/* l1 page */
1368297446Sandrew		pd_entry_t *l0;
1369297446Sandrew
1370297446Sandrew		l0 = pmap_l0(pmap, va);
1371297446Sandrew		pmap_load_clear(l0);
1372297446Sandrew		PTE_SYNC(l0);
1373297446Sandrew	} else if (m->pindex >= NUL2E) {
1374297446Sandrew		/* l2 page */
1375281494Sandrew		pd_entry_t *l1;
1376297446Sandrew
1377281494Sandrew		l1 = pmap_l1(pmap, va);
1378281494Sandrew		pmap_load_clear(l1);
1379281494Sandrew		PTE_SYNC(l1);
1380281494Sandrew	} else {
1381297446Sandrew		/* l3 page */
1382281494Sandrew		pd_entry_t *l2;
1383297446Sandrew
1384281494Sandrew		l2 = pmap_l2(pmap, va);
1385281494Sandrew		pmap_load_clear(l2);
1386281494Sandrew		PTE_SYNC(l2);
1387281494Sandrew	}
1388281494Sandrew	pmap_resident_count_dec(pmap, 1);
1389297446Sandrew	if (m->pindex < NUL2E) {
1390297446Sandrew		/* We just released an l3, unhold the matching l2 */
1391297446Sandrew		pd_entry_t *l1, tl1;
1392297446Sandrew		vm_page_t l2pg;
1393281494Sandrew
1394297446Sandrew		l1 = pmap_l1(pmap, va);
1395297446Sandrew		tl1 = pmap_load(l1);
1396297446Sandrew		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1397297446Sandrew		pmap_unwire_l3(pmap, va, l2pg, free);
1398297446Sandrew	} else if (m->pindex < (NUL2E + NUL1E)) {
1399297446Sandrew		/* We just released an l2, unhold the matching l1 */
1400297446Sandrew		pd_entry_t *l0, tl0;
1401297446Sandrew		vm_page_t l1pg;
1402297446Sandrew
1403297446Sandrew		l0 = pmap_l0(pmap, va);
1404297446Sandrew		tl0 = pmap_load(l0);
1405297446Sandrew		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1406297446Sandrew		pmap_unwire_l3(pmap, va, l1pg, free);
1407281494Sandrew	}
1408285212Sandrew	pmap_invalidate_page(pmap, va);
1409281494Sandrew
1410281494Sandrew	/*
1411281494Sandrew	 * This is a release store so that the ordinary store unmapping
1412281494Sandrew	 * the page table page is globally performed before TLB shoot-
1413281494Sandrew	 * down is begun.
1414281494Sandrew	 */
1415281494Sandrew	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1416281494Sandrew
1417305531Sandrew	/*
1418281494Sandrew	 * Put page on a list so that it is released after
1419281494Sandrew	 * *ALL* TLB shootdown is done
1420281494Sandrew	 */
1421281494Sandrew	pmap_add_delayed_free_list(m, free, TRUE);
1422281494Sandrew}
1423281494Sandrew
1424281494Sandrew/*
1425281494Sandrew * After removing an l3 entry, this routine is used to
1426281494Sandrew * conditionally free the page, and manage the hold/wire counts.
1427281494Sandrew */
1428281494Sandrewstatic int
1429281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1430281494Sandrew    struct spglist *free)
1431281494Sandrew{
1432281494Sandrew	vm_page_t mpte;
1433281494Sandrew
1434281494Sandrew	if (va >= VM_MAXUSER_ADDRESS)
1435281494Sandrew		return (0);
1436281494Sandrew	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1437281494Sandrew	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1438281494Sandrew	return (pmap_unwire_l3(pmap, va, mpte, free));
1439281494Sandrew}
1440281494Sandrew
1441281494Sandrewvoid
1442281494Sandrewpmap_pinit0(pmap_t pmap)
1443281494Sandrew{
1444281494Sandrew
1445281494Sandrew	PMAP_LOCK_INIT(pmap);
1446281494Sandrew	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1447297446Sandrew	pmap->pm_l0 = kernel_pmap->pm_l0;
1448305882Sandrew	pmap->pm_root.rt_root = 0;
1449281494Sandrew}
1450281494Sandrew
1451281494Sandrewint
1452281494Sandrewpmap_pinit(pmap_t pmap)
1453281494Sandrew{
1454297446Sandrew	vm_paddr_t l0phys;
1455297446Sandrew	vm_page_t l0pt;
1456281494Sandrew
1457281494Sandrew	/*
1458297446Sandrew	 * allocate the l0 page
1459281494Sandrew	 */
1460297446Sandrew	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1461281494Sandrew	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1462281494Sandrew		VM_WAIT;
1463281494Sandrew
1464297446Sandrew	l0phys = VM_PAGE_TO_PHYS(l0pt);
1465297446Sandrew	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
1466281494Sandrew
1467297446Sandrew	if ((l0pt->flags & PG_ZERO) == 0)
1468297446Sandrew		pagezero(pmap->pm_l0);
1469281494Sandrew
1470305882Sandrew	pmap->pm_root.rt_root = 0;
1471281494Sandrew	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1472281494Sandrew
1473281494Sandrew	return (1);
1474281494Sandrew}
1475281494Sandrew
1476281494Sandrew/*
1477281494Sandrew * This routine is called if the desired page table page does not exist.
1478281494Sandrew *
1479281494Sandrew * If page table page allocation fails, this routine may sleep before
1480281494Sandrew * returning NULL.  It sleeps only if a lock pointer was given.
1481281494Sandrew *
1482281494Sandrew * Note: If a page allocation fails at page table level two or three,
1483281494Sandrew * one or two pages may be held during the wait, only to be released
1484281494Sandrew * afterwards.  This conservative approach is easily argued to avoid
1485281494Sandrew * race conditions.
1486281494Sandrew */
1487281494Sandrewstatic vm_page_t
1488281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1489281494Sandrew{
1490297446Sandrew	vm_page_t m, l1pg, l2pg;
1491281494Sandrew
1492281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1493281494Sandrew
1494281494Sandrew	/*
1495281494Sandrew	 * Allocate a page table page.
1496281494Sandrew	 */
1497281494Sandrew	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1498281494Sandrew	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1499281494Sandrew		if (lockp != NULL) {
1500281494Sandrew			RELEASE_PV_LIST_LOCK(lockp);
1501281494Sandrew			PMAP_UNLOCK(pmap);
1502281494Sandrew			VM_WAIT;
1503281494Sandrew			PMAP_LOCK(pmap);
1504281494Sandrew		}
1505281494Sandrew
1506281494Sandrew		/*
1507281494Sandrew		 * Indicate the need to retry.  While waiting, the page table
1508281494Sandrew		 * page may have been allocated.
1509281494Sandrew		 */
1510281494Sandrew		return (NULL);
1511281494Sandrew	}
1512281494Sandrew	if ((m->flags & PG_ZERO) == 0)
1513281494Sandrew		pmap_zero_page(m);
1514281494Sandrew
1515281494Sandrew	/*
1516281494Sandrew	 * Map the pagetable page into the process address space, if
1517281494Sandrew	 * it isn't already there.
1518281494Sandrew	 */
1519281494Sandrew
1520297446Sandrew	if (ptepindex >= (NUL2E + NUL1E)) {
1521297446Sandrew		pd_entry_t *l0;
1522297446Sandrew		vm_pindex_t l0index;
1523281494Sandrew
1524297446Sandrew		l0index = ptepindex - (NUL2E + NUL1E);
1525297446Sandrew		l0 = &pmap->pm_l0[l0index];
1526297446Sandrew		pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
1527297446Sandrew		PTE_SYNC(l0);
1528297446Sandrew	} else if (ptepindex >= NUL2E) {
1529297446Sandrew		vm_pindex_t l0index, l1index;
1530297446Sandrew		pd_entry_t *l0, *l1;
1531297446Sandrew		pd_entry_t tl0;
1532297446Sandrew
1533297446Sandrew		l1index = ptepindex - NUL2E;
1534297446Sandrew		l0index = l1index >> L0_ENTRIES_SHIFT;
1535297446Sandrew
1536297446Sandrew		l0 = &pmap->pm_l0[l0index];
1537297446Sandrew		tl0 = pmap_load(l0);
1538297446Sandrew		if (tl0 == 0) {
1539297446Sandrew			/* recurse for allocating page dir */
1540297446Sandrew			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
1541297446Sandrew			    lockp) == NULL) {
1542297446Sandrew				--m->wire_count;
1543297446Sandrew				/* XXX: release mem barrier? */
1544297446Sandrew				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1545297446Sandrew				vm_page_free_zero(m);
1546297446Sandrew				return (NULL);
1547297446Sandrew			}
1548297446Sandrew		} else {
1549297446Sandrew			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1550297446Sandrew			l1pg->wire_count++;
1551297446Sandrew		}
1552297446Sandrew
1553297446Sandrew		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
1554297446Sandrew		l1 = &l1[ptepindex & Ln_ADDR_MASK];
1555281494Sandrew		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1556281494Sandrew		PTE_SYNC(l1);
1557281494Sandrew	} else {
1558297446Sandrew		vm_pindex_t l0index, l1index;
1559297446Sandrew		pd_entry_t *l0, *l1, *l2;
1560297446Sandrew		pd_entry_t tl0, tl1;
1561281494Sandrew
1562297446Sandrew		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
1563297446Sandrew		l0index = l1index >> L0_ENTRIES_SHIFT;
1564297446Sandrew
1565297446Sandrew		l0 = &pmap->pm_l0[l0index];
1566297446Sandrew		tl0 = pmap_load(l0);
1567297446Sandrew		if (tl0 == 0) {
1568281494Sandrew			/* recurse for allocating page dir */
1569297446Sandrew			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1570281494Sandrew			    lockp) == NULL) {
1571281494Sandrew				--m->wire_count;
1572281494Sandrew				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1573281494Sandrew				vm_page_free_zero(m);
1574281494Sandrew				return (NULL);
1575281494Sandrew			}
1576297446Sandrew			tl0 = pmap_load(l0);
1577297446Sandrew			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1578297446Sandrew			l1 = &l1[l1index & Ln_ADDR_MASK];
1579281494Sandrew		} else {
1580297446Sandrew			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1581297446Sandrew			l1 = &l1[l1index & Ln_ADDR_MASK];
1582297446Sandrew			tl1 = pmap_load(l1);
1583297446Sandrew			if (tl1 == 0) {
1584297446Sandrew				/* recurse for allocating page dir */
1585297446Sandrew				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1586297446Sandrew				    lockp) == NULL) {
1587297446Sandrew					--m->wire_count;
1588297446Sandrew					/* XXX: release mem barrier? */
1589297446Sandrew					atomic_subtract_int(
1590297446Sandrew					    &vm_cnt.v_wire_count, 1);
1591297446Sandrew					vm_page_free_zero(m);
1592297446Sandrew					return (NULL);
1593297446Sandrew				}
1594297446Sandrew			} else {
1595297446Sandrew				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1596297446Sandrew				l2pg->wire_count++;
1597297446Sandrew			}
1598281494Sandrew		}
1599281494Sandrew
1600288445Sandrew		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
1601281494Sandrew		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1602285537Sandrew		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
1603281494Sandrew		PTE_SYNC(l2);
1604281494Sandrew	}
1605281494Sandrew
1606281494Sandrew	pmap_resident_count_inc(pmap, 1);
1607281494Sandrew
1608281494Sandrew	return (m);
1609281494Sandrew}
1610281494Sandrew
1611281494Sandrewstatic vm_page_t
1612281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1613281494Sandrew{
1614281494Sandrew	vm_pindex_t ptepindex;
1615297446Sandrew	pd_entry_t *pde, tpde;
1616305882Sandrew#ifdef INVARIANTS
1617305882Sandrew	pt_entry_t *pte;
1618305882Sandrew#endif
1619281494Sandrew	vm_page_t m;
1620297446Sandrew	int lvl;
1621281494Sandrew
1622281494Sandrew	/*
1623281494Sandrew	 * Calculate pagetable page index
1624281494Sandrew	 */
1625281494Sandrew	ptepindex = pmap_l2_pindex(va);
1626281494Sandrewretry:
1627281494Sandrew	/*
1628281494Sandrew	 * Get the page directory entry
1629281494Sandrew	 */
1630297446Sandrew	pde = pmap_pde(pmap, va, &lvl);
1631281494Sandrew
1632281494Sandrew	/*
1633297446Sandrew	 * If the page table page is mapped, we just increment the hold count,
1634297446Sandrew	 * and activate it. If we get a level 2 pde it will point to a level 3
1635297446Sandrew	 * table.
1636281494Sandrew	 */
1637305882Sandrew	switch (lvl) {
1638305882Sandrew	case -1:
1639305882Sandrew		break;
1640305882Sandrew	case 0:
1641305882Sandrew#ifdef INVARIANTS
1642305882Sandrew		pte = pmap_l0_to_l1(pde, va);
1643305882Sandrew		KASSERT(pmap_load(pte) == 0,
1644305882Sandrew		    ("pmap_alloc_l3: TODO: l0 superpages"));
1645305882Sandrew#endif
1646305882Sandrew		break;
1647305882Sandrew	case 1:
1648305882Sandrew#ifdef INVARIANTS
1649305882Sandrew		pte = pmap_l1_to_l2(pde, va);
1650305882Sandrew		KASSERT(pmap_load(pte) == 0,
1651305882Sandrew		    ("pmap_alloc_l3: TODO: l1 superpages"));
1652305882Sandrew#endif
1653305882Sandrew		break;
1654305882Sandrew	case 2:
1655297446Sandrew		tpde = pmap_load(pde);
1656297446Sandrew		if (tpde != 0) {
1657297446Sandrew			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
1658297446Sandrew			m->wire_count++;
1659297446Sandrew			return (m);
1660297446Sandrew		}
1661305882Sandrew		break;
1662305882Sandrew	default:
1663305882Sandrew		panic("pmap_alloc_l3: Invalid level %d", lvl);
1664281494Sandrew	}
1665297446Sandrew
1666297446Sandrew	/*
1667297446Sandrew	 * Here if the pte page isn't mapped, or if it has been deallocated.
1668297446Sandrew	 */
1669297446Sandrew	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1670297446Sandrew	if (m == NULL && lockp != NULL)
1671297446Sandrew		goto retry;
1672297446Sandrew
1673281494Sandrew	return (m);
1674281494Sandrew}
1675281494Sandrew
1676281494Sandrew
1677281494Sandrew/***************************************************
1678281494Sandrew * Pmap allocation/deallocation routines.
1679281494Sandrew ***************************************************/
1680281494Sandrew
1681281494Sandrew/*
1682281494Sandrew * Release any resources held by the given physical map.
1683281494Sandrew * Called when a pmap initialized by pmap_pinit is being released.
1684281494Sandrew * Should only be called if the map contains no valid mappings.
1685281494Sandrew */
1686281494Sandrewvoid
1687281494Sandrewpmap_release(pmap_t pmap)
1688281494Sandrew{
1689281494Sandrew	vm_page_t m;
1690281494Sandrew
1691281494Sandrew	KASSERT(pmap->pm_stats.resident_count == 0,
1692281494Sandrew	    ("pmap_release: pmap resident count %ld != 0",
1693281494Sandrew	    pmap->pm_stats.resident_count));
1694305882Sandrew	KASSERT(vm_radix_is_empty(&pmap->pm_root),
1695305882Sandrew	    ("pmap_release: pmap has reserved page table page(s)"));
1696281494Sandrew
1697297446Sandrew	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
1698281494Sandrew
1699281494Sandrew	m->wire_count--;
1700281494Sandrew	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1701281494Sandrew	vm_page_free_zero(m);
1702281494Sandrew}
1703281494Sandrew
1704281494Sandrewstatic int
1705281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS)
1706281494Sandrew{
1707281494Sandrew	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1708281494Sandrew
1709281494Sandrew	return sysctl_handle_long(oidp, &ksize, 0, req);
1710281494Sandrew}
1711305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1712281494Sandrew    0, 0, kvm_size, "LU", "Size of KVM");
1713281494Sandrew
1714281494Sandrewstatic int
1715281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS)
1716281494Sandrew{
1717281494Sandrew	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1718281494Sandrew
1719281494Sandrew	return sysctl_handle_long(oidp, &kfree, 0, req);
1720281494Sandrew}
1721305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1722281494Sandrew    0, 0, kvm_free, "LU", "Amount of KVM free");
1723281494Sandrew
1724281494Sandrew/*
1725281494Sandrew * grow the number of kernel page table entries, if needed
1726281494Sandrew */
1727281494Sandrewvoid
1728281494Sandrewpmap_growkernel(vm_offset_t addr)
1729281494Sandrew{
1730281494Sandrew	vm_paddr_t paddr;
1731281494Sandrew	vm_page_t nkpg;
1732297446Sandrew	pd_entry_t *l0, *l1, *l2;
1733281494Sandrew
1734281494Sandrew	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1735281494Sandrew
1736281494Sandrew	addr = roundup2(addr, L2_SIZE);
1737338484Skib	if (addr - 1 >= vm_map_max(kernel_map))
1738338484Skib		addr = vm_map_max(kernel_map);
1739281494Sandrew	while (kernel_vm_end < addr) {
1740297446Sandrew		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
1741297446Sandrew		KASSERT(pmap_load(l0) != 0,
1742297446Sandrew		    ("pmap_growkernel: No level 0 kernel entry"));
1743297446Sandrew
1744297446Sandrew		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
1745285045Sandrew		if (pmap_load(l1) == 0) {
1746281494Sandrew			/* We need a new PDP entry */
1747281494Sandrew			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1748281494Sandrew			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1749281494Sandrew			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1750281494Sandrew			if (nkpg == NULL)
1751281494Sandrew				panic("pmap_growkernel: no memory to grow kernel");
1752281494Sandrew			if ((nkpg->flags & PG_ZERO) == 0)
1753281494Sandrew				pmap_zero_page(nkpg);
1754281494Sandrew			paddr = VM_PAGE_TO_PHYS(nkpg);
1755281494Sandrew			pmap_load_store(l1, paddr | L1_TABLE);
1756281494Sandrew			PTE_SYNC(l1);
1757281494Sandrew			continue; /* try again */
1758281494Sandrew		}
1759281494Sandrew		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1760285045Sandrew		if ((pmap_load(l2) & ATTR_AF) != 0) {
1761281494Sandrew			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1762338484Skib			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1763338484Skib				kernel_vm_end = vm_map_max(kernel_map);
1764305531Sandrew				break;
1765281494Sandrew			}
1766281494Sandrew			continue;
1767281494Sandrew		}
1768281494Sandrew
1769281494Sandrew		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1770281494Sandrew		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1771281494Sandrew		    VM_ALLOC_ZERO);
1772281494Sandrew		if (nkpg == NULL)
1773281494Sandrew			panic("pmap_growkernel: no memory to grow kernel");
1774281494Sandrew		if ((nkpg->flags & PG_ZERO) == 0)
1775281494Sandrew			pmap_zero_page(nkpg);
1776281494Sandrew		paddr = VM_PAGE_TO_PHYS(nkpg);
1777281494Sandrew		pmap_load_store(l2, paddr | L2_TABLE);
1778281494Sandrew		PTE_SYNC(l2);
1779285212Sandrew		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1780281494Sandrew
1781281494Sandrew		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1782338484Skib		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1783338484Skib			kernel_vm_end = vm_map_max(kernel_map);
1784305531Sandrew			break;
1785281494Sandrew		}
1786281494Sandrew	}
1787281494Sandrew}
1788281494Sandrew
1789281494Sandrew
1790281494Sandrew/***************************************************
1791281494Sandrew * page management routines.
1792281494Sandrew ***************************************************/
1793281494Sandrew
1794281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1795281494SandrewCTASSERT(_NPCM == 3);
1796281494SandrewCTASSERT(_NPCPV == 168);
1797281494Sandrew
1798281494Sandrewstatic __inline struct pv_chunk *
1799281494Sandrewpv_to_chunk(pv_entry_t pv)
1800281494Sandrew{
1801281494Sandrew
1802281494Sandrew	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1803281494Sandrew}
1804281494Sandrew
1805281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1806281494Sandrew
1807281494Sandrew#define	PC_FREE0	0xfffffffffffffffful
1808281494Sandrew#define	PC_FREE1	0xfffffffffffffffful
1809281494Sandrew#define	PC_FREE2	0x000000fffffffffful
1810281494Sandrew
1811281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1812281494Sandrew
1813281494Sandrew#if 0
1814281494Sandrew#ifdef PV_STATS
1815281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1816281494Sandrew
1817281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1818281494Sandrew	"Current number of pv entry chunks");
1819281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1820281494Sandrew	"Current number of pv entry chunks allocated");
1821281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1822281494Sandrew	"Current number of pv entry chunks frees");
1823281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1824281494Sandrew	"Number of times tried to get a chunk page but failed.");
1825281494Sandrew
1826281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1827281494Sandrewstatic int pv_entry_spare;
1828281494Sandrew
1829281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1830281494Sandrew	"Current number of pv entry frees");
1831281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1832281494Sandrew	"Current number of pv entry allocs");
1833281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1834281494Sandrew	"Current number of pv entries");
1835281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1836281494Sandrew	"Current number of spare pv entries");
1837281494Sandrew#endif
1838281494Sandrew#endif /* 0 */
1839281494Sandrew
1840281494Sandrew/*
1841281494Sandrew * We are in a serious low memory condition.  Resort to
1842281494Sandrew * drastic measures to free some pages so we can allocate
1843281494Sandrew * another pv entry chunk.
1844281494Sandrew *
1845281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap.
1846281494Sandrew *
1847281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will
1848281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby
1849281494Sandrew * exacerbating the shortage of free pv entries.
1850281494Sandrew */
1851281494Sandrewstatic vm_page_t
1852281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1853281494Sandrew{
1854336711Smarkj	struct pv_chunk *pc, *pc_marker, *pc_marker_end;
1855336711Smarkj	struct pv_chunk_header pc_marker_b, pc_marker_end_b;
1856319210Sandrew	struct md_page *pvh;
1857319210Sandrew	pd_entry_t *pde;
1858336711Smarkj	pmap_t next_pmap, pmap;
1859319210Sandrew	pt_entry_t *pte, tpte;
1860319210Sandrew	pv_entry_t pv;
1861319210Sandrew	vm_offset_t va;
1862319210Sandrew	vm_page_t m, m_pc;
1863319210Sandrew	struct spglist free;
1864319210Sandrew	uint64_t inuse;
1865319210Sandrew	int bit, field, freed, lvl;
1866336711Smarkj	static int active_reclaims = 0;
1867281494Sandrew
1868319210Sandrew	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1869319210Sandrew	KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
1870336711Smarkj
1871319210Sandrew	pmap = NULL;
1872319210Sandrew	m_pc = NULL;
1873319210Sandrew	SLIST_INIT(&free);
1874336711Smarkj	bzero(&pc_marker_b, sizeof(pc_marker_b));
1875336711Smarkj	bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
1876336711Smarkj	pc_marker = (struct pv_chunk *)&pc_marker_b;
1877336711Smarkj	pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
1878336711Smarkj
1879319210Sandrew	mtx_lock(&pv_chunks_mutex);
1880336711Smarkj	active_reclaims++;
1881336711Smarkj	TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
1882336711Smarkj	TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
1883336711Smarkj	while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
1884336711Smarkj	    SLIST_EMPTY(&free)) {
1885336711Smarkj		next_pmap = pc->pc_pmap;
1886336711Smarkj		if (next_pmap == NULL) {
1887336711Smarkj			/*
1888336711Smarkj			 * The next chunk is a marker.  However, it is
1889336711Smarkj			 * not our marker, so active_reclaims must be
1890336711Smarkj			 * > 1.  Consequently, the next_chunk code
1891336711Smarkj			 * will not rotate the pv_chunks list.
1892336711Smarkj			 */
1893336711Smarkj			goto next_chunk;
1894336711Smarkj		}
1895319210Sandrew		mtx_unlock(&pv_chunks_mutex);
1896336711Smarkj
1897336711Smarkj		/*
1898336711Smarkj		 * A pv_chunk can only be removed from the pc_lru list
1899336711Smarkj		 * when both pv_chunks_mutex is owned and the
1900336711Smarkj		 * corresponding pmap is locked.
1901336711Smarkj		 */
1902336711Smarkj		if (pmap != next_pmap) {
1903319210Sandrew			if (pmap != NULL && pmap != locked_pmap)
1904319210Sandrew				PMAP_UNLOCK(pmap);
1905336711Smarkj			pmap = next_pmap;
1906319210Sandrew			/* Avoid deadlock and lock recursion. */
1907319210Sandrew			if (pmap > locked_pmap) {
1908319210Sandrew				RELEASE_PV_LIST_LOCK(lockp);
1909319210Sandrew				PMAP_LOCK(pmap);
1910319210Sandrew				mtx_lock(&pv_chunks_mutex);
1911319210Sandrew				continue;
1912336711Smarkj			} else if (pmap != locked_pmap) {
1913336711Smarkj				if (PMAP_TRYLOCK(pmap)) {
1914336711Smarkj					mtx_lock(&pv_chunks_mutex);
1915336711Smarkj					continue;
1916336711Smarkj				} else {
1917336711Smarkj					pmap = NULL; /* pmap is not locked */
1918336711Smarkj					mtx_lock(&pv_chunks_mutex);
1919336711Smarkj					pc = TAILQ_NEXT(pc_marker, pc_lru);
1920336711Smarkj					if (pc == NULL ||
1921336711Smarkj					    pc->pc_pmap != next_pmap)
1922336711Smarkj						continue;
1923336711Smarkj					goto next_chunk;
1924336711Smarkj				}
1925319210Sandrew			}
1926319210Sandrew		}
1927319210Sandrew
1928319210Sandrew		/*
1929319210Sandrew		 * Destroy every non-wired, 4 KB page mapping in the chunk.
1930319210Sandrew		 */
1931319210Sandrew		freed = 0;
1932319210Sandrew		for (field = 0; field < _NPCM; field++) {
1933319210Sandrew			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1934319210Sandrew			    inuse != 0; inuse &= ~(1UL << bit)) {
1935319210Sandrew				bit = ffsl(inuse) - 1;
1936319210Sandrew				pv = &pc->pc_pventry[field * 64 + bit];
1937319210Sandrew				va = pv->pv_va;
1938319210Sandrew				pde = pmap_pde(pmap, va, &lvl);
1939319210Sandrew				if (lvl != 2)
1940319210Sandrew					continue;
1941319210Sandrew				pte = pmap_l2_to_l3(pde, va);
1942319210Sandrew				tpte = pmap_load(pte);
1943319210Sandrew				if ((tpte & ATTR_SW_WIRED) != 0)
1944319210Sandrew					continue;
1945319210Sandrew				tpte = pmap_load_clear(pte);
1946319210Sandrew				PTE_SYNC(pte);
1947319210Sandrew				pmap_invalidate_page(pmap, va);
1948319210Sandrew				m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
1949319210Sandrew				if (pmap_page_dirty(tpte))
1950319210Sandrew					vm_page_dirty(m);
1951319210Sandrew				if ((tpte & ATTR_AF) != 0)
1952319210Sandrew					vm_page_aflag_set(m, PGA_REFERENCED);
1953319210Sandrew				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1954319210Sandrew				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
1955319210Sandrew				m->md.pv_gen++;
1956319210Sandrew				if (TAILQ_EMPTY(&m->md.pv_list) &&
1957319210Sandrew				    (m->flags & PG_FICTITIOUS) == 0) {
1958319210Sandrew					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
1959319210Sandrew					if (TAILQ_EMPTY(&pvh->pv_list)) {
1960319210Sandrew						vm_page_aflag_clear(m,
1961319210Sandrew						    PGA_WRITEABLE);
1962319210Sandrew					}
1963319210Sandrew				}
1964319210Sandrew				pc->pc_map[field] |= 1UL << bit;
1965319210Sandrew				pmap_unuse_l3(pmap, va, pmap_load(pde), &free);
1966319210Sandrew				freed++;
1967319210Sandrew			}
1968319210Sandrew		}
1969319210Sandrew		if (freed == 0) {
1970319210Sandrew			mtx_lock(&pv_chunks_mutex);
1971336711Smarkj			goto next_chunk;
1972319210Sandrew		}
1973319210Sandrew		/* Every freed mapping is for a 4 KB page. */
1974319210Sandrew		pmap_resident_count_dec(pmap, freed);
1975319210Sandrew		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
1976319210Sandrew		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
1977319210Sandrew		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
1978319210Sandrew		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1979319210Sandrew		if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
1980319210Sandrew		    pc->pc_map[2] == PC_FREE2) {
1981319210Sandrew			PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1982319210Sandrew			PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1983319210Sandrew			PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1984319210Sandrew			/* Entire chunk is free; return it. */
1985319210Sandrew			m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1986319210Sandrew			dump_drop_page(m_pc->phys_addr);
1987319210Sandrew			mtx_lock(&pv_chunks_mutex);
1988336711Smarkj			TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1989319210Sandrew			break;
1990319210Sandrew		}
1991319210Sandrew		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1992319210Sandrew		mtx_lock(&pv_chunks_mutex);
1993319210Sandrew		/* One freed pv entry in locked_pmap is sufficient. */
1994319210Sandrew		if (pmap == locked_pmap)
1995319210Sandrew			break;
1996336711Smarkj
1997336711Smarkjnext_chunk:
1998336711Smarkj		TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
1999336711Smarkj		TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
2000336711Smarkj		if (active_reclaims == 1 && pmap != NULL) {
2001336711Smarkj			/*
2002336711Smarkj			 * Rotate the pv chunks list so that we do not
2003336711Smarkj			 * scan the same pv chunks that could not be
2004336711Smarkj			 * freed (because they contained a wired
2005336711Smarkj			 * and/or superpage mapping) on every
2006336711Smarkj			 * invocation of reclaim_pv_chunk().
2007336711Smarkj			 */
2008336711Smarkj			while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
2009336711Smarkj				MPASS(pc->pc_pmap != NULL);
2010336711Smarkj				TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
2011336711Smarkj				TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
2012336711Smarkj			}
2013336711Smarkj		}
2014319210Sandrew	}
2015336711Smarkj	TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
2016336711Smarkj	TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
2017336711Smarkj	active_reclaims--;
2018319210Sandrew	mtx_unlock(&pv_chunks_mutex);
2019319210Sandrew	if (pmap != NULL && pmap != locked_pmap)
2020319210Sandrew		PMAP_UNLOCK(pmap);
2021319210Sandrew	if (m_pc == NULL && !SLIST_EMPTY(&free)) {
2022319210Sandrew		m_pc = SLIST_FIRST(&free);
2023319210Sandrew		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
2024319210Sandrew		/* Recycle a freed page table page. */
2025319210Sandrew		m_pc->wire_count = 1;
2026319210Sandrew		atomic_add_int(&vm_cnt.v_wire_count, 1);
2027319210Sandrew	}
2028319210Sandrew	pmap_free_zero_pages(&free);
2029319210Sandrew	return (m_pc);
2030281494Sandrew}
2031281494Sandrew
2032281494Sandrew/*
2033281494Sandrew * free the pv_entry back to the free list
2034281494Sandrew */
2035281494Sandrewstatic void
2036281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv)
2037281494Sandrew{
2038281494Sandrew	struct pv_chunk *pc;
2039281494Sandrew	int idx, field, bit;
2040281494Sandrew
2041281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2042281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
2043281494Sandrew	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
2044281494Sandrew	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
2045281494Sandrew	pc = pv_to_chunk(pv);
2046281494Sandrew	idx = pv - &pc->pc_pventry[0];
2047281494Sandrew	field = idx / 64;
2048281494Sandrew	bit = idx % 64;
2049281494Sandrew	pc->pc_map[field] |= 1ul << bit;
2050281494Sandrew	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
2051281494Sandrew	    pc->pc_map[2] != PC_FREE2) {
2052281494Sandrew		/* 98% of the time, pc is already at the head of the list. */
2053281494Sandrew		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
2054281494Sandrew			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2055281494Sandrew			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
2056281494Sandrew		}
2057281494Sandrew		return;
2058281494Sandrew	}
2059281494Sandrew	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2060281494Sandrew	free_pv_chunk(pc);
2061281494Sandrew}
2062281494Sandrew
2063281494Sandrewstatic void
2064281494Sandrewfree_pv_chunk(struct pv_chunk *pc)
2065281494Sandrew{
2066281494Sandrew	vm_page_t m;
2067281494Sandrew
2068281494Sandrew	mtx_lock(&pv_chunks_mutex);
2069281494Sandrew 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
2070281494Sandrew	mtx_unlock(&pv_chunks_mutex);
2071281494Sandrew	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
2072281494Sandrew	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
2073281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
2074281494Sandrew	/* entire chunk is free, return it */
2075281494Sandrew	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
2076281494Sandrew	dump_drop_page(m->phys_addr);
2077288256Salc	vm_page_unwire(m, PQ_NONE);
2078281494Sandrew	vm_page_free(m);
2079281494Sandrew}
2080281494Sandrew
2081281494Sandrew/*
2082281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when
2083281494Sandrew * needed.  If this PV chunk allocation fails and a PV list lock pointer was
2084281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
2085281494Sandrew * returned.
2086281494Sandrew *
2087281494Sandrew * The given PV list lock may be released.
2088281494Sandrew */
2089281494Sandrewstatic pv_entry_t
2090281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp)
2091281494Sandrew{
2092281494Sandrew	int bit, field;
2093281494Sandrew	pv_entry_t pv;
2094281494Sandrew	struct pv_chunk *pc;
2095281494Sandrew	vm_page_t m;
2096281494Sandrew
2097281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2098281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
2099281494Sandrewretry:
2100281494Sandrew	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
2101281494Sandrew	if (pc != NULL) {
2102281494Sandrew		for (field = 0; field < _NPCM; field++) {
2103281494Sandrew			if (pc->pc_map[field]) {
2104281494Sandrew				bit = ffsl(pc->pc_map[field]) - 1;
2105281494Sandrew				break;
2106281494Sandrew			}
2107281494Sandrew		}
2108281494Sandrew		if (field < _NPCM) {
2109281494Sandrew			pv = &pc->pc_pventry[field * 64 + bit];
2110281494Sandrew			pc->pc_map[field] &= ~(1ul << bit);
2111281494Sandrew			/* If this was the last item, move it to tail */
2112281494Sandrew			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
2113281494Sandrew			    pc->pc_map[2] == 0) {
2114281494Sandrew				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2115281494Sandrew				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
2116281494Sandrew				    pc_list);
2117281494Sandrew			}
2118281494Sandrew			PV_STAT(atomic_add_long(&pv_entry_count, 1));
2119281494Sandrew			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
2120281494Sandrew			return (pv);
2121281494Sandrew		}
2122281494Sandrew	}
2123281494Sandrew	/* No free items, allocate another chunk */
2124281494Sandrew	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
2125281494Sandrew	    VM_ALLOC_WIRED);
2126281494Sandrew	if (m == NULL) {
2127281494Sandrew		if (lockp == NULL) {
2128281494Sandrew			PV_STAT(pc_chunk_tryfail++);
2129281494Sandrew			return (NULL);
2130281494Sandrew		}
2131281494Sandrew		m = reclaim_pv_chunk(pmap, lockp);
2132281494Sandrew		if (m == NULL)
2133281494Sandrew			goto retry;
2134281494Sandrew	}
2135281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
2136281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
2137281494Sandrew	dump_add_page(m->phys_addr);
2138281494Sandrew	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
2139281494Sandrew	pc->pc_pmap = pmap;
2140281494Sandrew	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
2141281494Sandrew	pc->pc_map[1] = PC_FREE1;
2142281494Sandrew	pc->pc_map[2] = PC_FREE2;
2143281494Sandrew	mtx_lock(&pv_chunks_mutex);
2144281494Sandrew	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
2145281494Sandrew	mtx_unlock(&pv_chunks_mutex);
2146281494Sandrew	pv = &pc->pc_pventry[0];
2147281494Sandrew	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
2148281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_count, 1));
2149281494Sandrew	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
2150281494Sandrew	return (pv);
2151281494Sandrew}
2152281494Sandrew
2153281494Sandrew/*
2154305882Sandrew * Ensure that the number of spare PV entries in the specified pmap meets or
2155305882Sandrew * exceeds the given count, "needed".
2156305882Sandrew *
2157305882Sandrew * The given PV list lock may be released.
2158305882Sandrew */
2159305882Sandrewstatic void
2160305882Sandrewreserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
2161305882Sandrew{
2162305882Sandrew	struct pch new_tail;
2163305882Sandrew	struct pv_chunk *pc;
2164336071Smarkj	vm_page_t m;
2165305882Sandrew	int avail, free;
2166336071Smarkj	bool reclaimed;
2167305882Sandrew
2168305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2169305882Sandrew	KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
2170305882Sandrew
2171305882Sandrew	/*
2172305882Sandrew	 * Newly allocated PV chunks must be stored in a private list until
2173305882Sandrew	 * the required number of PV chunks have been allocated.  Otherwise,
2174305882Sandrew	 * reclaim_pv_chunk() could recycle one of these chunks.  In
2175305882Sandrew	 * contrast, these chunks must be added to the pmap upon allocation.
2176305882Sandrew	 */
2177305882Sandrew	TAILQ_INIT(&new_tail);
2178305882Sandrewretry:
2179305882Sandrew	avail = 0;
2180305882Sandrew	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
2181305882Sandrew		bit_count((bitstr_t *)pc->pc_map, 0,
2182305882Sandrew		    sizeof(pc->pc_map) * NBBY, &free);
2183305882Sandrew		if (free == 0)
2184305882Sandrew			break;
2185305882Sandrew		avail += free;
2186305882Sandrew		if (avail >= needed)
2187305882Sandrew			break;
2188305882Sandrew	}
2189336071Smarkj	for (reclaimed = false; avail < needed; avail += _NPCPV) {
2190305882Sandrew		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
2191305882Sandrew		    VM_ALLOC_WIRED);
2192305882Sandrew		if (m == NULL) {
2193305882Sandrew			m = reclaim_pv_chunk(pmap, lockp);
2194305882Sandrew			if (m == NULL)
2195305882Sandrew				goto retry;
2196336071Smarkj			reclaimed = true;
2197305882Sandrew		}
2198305882Sandrew		PV_STAT(atomic_add_int(&pc_chunk_count, 1));
2199305882Sandrew		PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
2200305882Sandrew		dump_add_page(m->phys_addr);
2201305882Sandrew		pc = (void *)PHYS_TO_DMAP(m->phys_addr);
2202305882Sandrew		pc->pc_pmap = pmap;
2203305882Sandrew		pc->pc_map[0] = PC_FREE0;
2204305882Sandrew		pc->pc_map[1] = PC_FREE1;
2205305882Sandrew		pc->pc_map[2] = PC_FREE2;
2206305882Sandrew		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
2207305882Sandrew		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
2208305882Sandrew		PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
2209336071Smarkj
2210336071Smarkj		/*
2211336071Smarkj		 * The reclaim might have freed a chunk from the current pmap.
2212336071Smarkj		 * If that chunk contained available entries, we need to
2213336071Smarkj		 * re-count the number of available entries.
2214336071Smarkj		 */
2215336071Smarkj		if (reclaimed)
2216336071Smarkj			goto retry;
2217305882Sandrew	}
2218305882Sandrew	if (!TAILQ_EMPTY(&new_tail)) {
2219305882Sandrew		mtx_lock(&pv_chunks_mutex);
2220305882Sandrew		TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
2221305882Sandrew		mtx_unlock(&pv_chunks_mutex);
2222305882Sandrew	}
2223305882Sandrew}
2224305882Sandrew
2225305882Sandrew/*
2226281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual
2227281494Sandrew * address from the specified pv list.  Returns the pv entry if found and NULL
2228281494Sandrew * otherwise.  This operation can be performed on pv lists for either 4KB or
2229281494Sandrew * 2MB page mappings.
2230281494Sandrew */
2231281494Sandrewstatic __inline pv_entry_t
2232281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
2233281494Sandrew{
2234281494Sandrew	pv_entry_t pv;
2235281494Sandrew
2236281494Sandrew	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
2237281494Sandrew		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
2238281494Sandrew			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
2239281494Sandrew			pvh->pv_gen++;
2240281494Sandrew			break;
2241281494Sandrew		}
2242281494Sandrew	}
2243281494Sandrew	return (pv);
2244281494Sandrew}
2245281494Sandrew
2246281494Sandrew/*
2247305882Sandrew * After demotion from a 2MB page mapping to 512 4KB page mappings,
2248305882Sandrew * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
2249305882Sandrew * entries for each of the 4KB page mappings.
2250305882Sandrew */
2251305882Sandrewstatic void
2252305882Sandrewpmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
2253305882Sandrew    struct rwlock **lockp)
2254305882Sandrew{
2255305882Sandrew	struct md_page *pvh;
2256305882Sandrew	struct pv_chunk *pc;
2257305882Sandrew	pv_entry_t pv;
2258305882Sandrew	vm_offset_t va_last;
2259305882Sandrew	vm_page_t m;
2260305882Sandrew	int bit, field;
2261305882Sandrew
2262305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2263305882Sandrew	KASSERT((pa & L2_OFFSET) == 0,
2264305882Sandrew	    ("pmap_pv_demote_l2: pa is not 2mpage aligned"));
2265305882Sandrew	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
2266305882Sandrew
2267305882Sandrew	/*
2268305882Sandrew	 * Transfer the 2mpage's pv entry for this mapping to the first
2269305882Sandrew	 * page's pv list.  Once this transfer begins, the pv list lock
2270305882Sandrew	 * must not be released until the last pv entry is reinstantiated.
2271305882Sandrew	 */
2272305882Sandrew	pvh = pa_to_pvh(pa);
2273305882Sandrew	va = va & ~L2_OFFSET;
2274305882Sandrew	pv = pmap_pvh_remove(pvh, pmap, va);
2275305882Sandrew	KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
2276305882Sandrew	m = PHYS_TO_VM_PAGE(pa);
2277305882Sandrew	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2278305882Sandrew	m->md.pv_gen++;
2279305882Sandrew	/* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */
2280305882Sandrew	PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1));
2281305882Sandrew	va_last = va + L2_SIZE - PAGE_SIZE;
2282305882Sandrew	for (;;) {
2283305882Sandrew		pc = TAILQ_FIRST(&pmap->pm_pvchunk);
2284305882Sandrew		KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
2285305882Sandrew		    pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
2286305882Sandrew		for (field = 0; field < _NPCM; field++) {
2287305882Sandrew			while (pc->pc_map[field]) {
2288305882Sandrew				bit = ffsl(pc->pc_map[field]) - 1;
2289305882Sandrew				pc->pc_map[field] &= ~(1ul << bit);
2290305882Sandrew				pv = &pc->pc_pventry[field * 64 + bit];
2291305882Sandrew				va += PAGE_SIZE;
2292305882Sandrew				pv->pv_va = va;
2293305882Sandrew				m++;
2294305882Sandrew				KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2295305882Sandrew			    ("pmap_pv_demote_l2: page %p is not managed", m));
2296305882Sandrew				TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2297305882Sandrew				m->md.pv_gen++;
2298305882Sandrew				if (va == va_last)
2299305882Sandrew					goto out;
2300305882Sandrew			}
2301305882Sandrew		}
2302305882Sandrew		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2303305882Sandrew		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
2304305882Sandrew	}
2305305882Sandrewout:
2306305882Sandrew	if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
2307305882Sandrew		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2308305882Sandrew		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
2309305882Sandrew	}
2310305882Sandrew	PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1));
2311305882Sandrew	PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1));
2312305882Sandrew}
2313305882Sandrew
2314305882Sandrew/*
2315281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual
2316281494Sandrew * address.  This operation can be performed on pv lists for either 4KB or 2MB
2317281494Sandrew * page mappings.
2318281494Sandrew */
2319281494Sandrewstatic void
2320281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
2321281494Sandrew{
2322281494Sandrew	pv_entry_t pv;
2323281494Sandrew
2324281494Sandrew	pv = pmap_pvh_remove(pvh, pmap, va);
2325281494Sandrew	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
2326281494Sandrew	free_pv_entry(pmap, pv);
2327281494Sandrew}
2328281494Sandrew
2329281494Sandrew/*
2330281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required
2331281494Sandrew * memory can be allocated without resorting to reclamation.
2332281494Sandrew */
2333281494Sandrewstatic boolean_t
2334281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
2335281494Sandrew    struct rwlock **lockp)
2336281494Sandrew{
2337281494Sandrew	pv_entry_t pv;
2338281494Sandrew
2339281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2340281494Sandrew	/* Pass NULL instead of the lock pointer to disable reclamation. */
2341281494Sandrew	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
2342281494Sandrew		pv->pv_va = va;
2343281494Sandrew		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
2344281494Sandrew		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2345281494Sandrew		m->md.pv_gen++;
2346281494Sandrew		return (TRUE);
2347281494Sandrew	} else
2348281494Sandrew		return (FALSE);
2349281494Sandrew}
2350281494Sandrew
2351281494Sandrew/*
2352281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process
2353281494Sandrew */
2354281494Sandrewstatic int
2355305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
2356281494Sandrew    pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
2357281494Sandrew{
2358305882Sandrew	struct md_page *pvh;
2359281494Sandrew	pt_entry_t old_l3;
2360281494Sandrew	vm_page_t m;
2361281494Sandrew
2362281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2363281494Sandrew	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
2364281494Sandrew		cpu_dcache_wb_range(va, L3_SIZE);
2365281494Sandrew	old_l3 = pmap_load_clear(l3);
2366281494Sandrew	PTE_SYNC(l3);
2367285212Sandrew	pmap_invalidate_page(pmap, va);
2368281494Sandrew	if (old_l3 & ATTR_SW_WIRED)
2369281494Sandrew		pmap->pm_stats.wired_count -= 1;
2370281494Sandrew	pmap_resident_count_dec(pmap, 1);
2371281494Sandrew	if (old_l3 & ATTR_SW_MANAGED) {
2372281494Sandrew		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
2373281494Sandrew		if (pmap_page_dirty(old_l3))
2374281494Sandrew			vm_page_dirty(m);
2375281494Sandrew		if (old_l3 & ATTR_AF)
2376281494Sandrew			vm_page_aflag_set(m, PGA_REFERENCED);
2377281494Sandrew		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
2378281494Sandrew		pmap_pvh_free(&m->md, pmap, va);
2379305882Sandrew		if (TAILQ_EMPTY(&m->md.pv_list) &&
2380305882Sandrew		    (m->flags & PG_FICTITIOUS) == 0) {
2381305882Sandrew			pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
2382305882Sandrew			if (TAILQ_EMPTY(&pvh->pv_list))
2383305882Sandrew				vm_page_aflag_clear(m, PGA_WRITEABLE);
2384305882Sandrew		}
2385281494Sandrew	}
2386281494Sandrew	return (pmap_unuse_l3(pmap, va, l2e, free));
2387281494Sandrew}
2388281494Sandrew
2389281494Sandrew/*
2390281494Sandrew *	Remove the given range of addresses from the specified map.
2391281494Sandrew *
2392281494Sandrew *	It is assumed that the start and end are properly
2393281494Sandrew *	rounded to the page size.
2394281494Sandrew */
2395281494Sandrewvoid
2396281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2397281494Sandrew{
2398281494Sandrew	struct rwlock *lock;
2399281494Sandrew	vm_offset_t va, va_next;
2400297446Sandrew	pd_entry_t *l0, *l1, *l2;
2401281494Sandrew	pt_entry_t l3_paddr, *l3;
2402281494Sandrew	struct spglist free;
2403281494Sandrew
2404281494Sandrew	/*
2405281494Sandrew	 * Perform an unsynchronized read.  This is, however, safe.
2406281494Sandrew	 */
2407281494Sandrew	if (pmap->pm_stats.resident_count == 0)
2408281494Sandrew		return;
2409281494Sandrew
2410281494Sandrew	SLIST_INIT(&free);
2411281494Sandrew
2412281494Sandrew	PMAP_LOCK(pmap);
2413281494Sandrew
2414281494Sandrew	lock = NULL;
2415281494Sandrew	for (; sva < eva; sva = va_next) {
2416281494Sandrew
2417281494Sandrew		if (pmap->pm_stats.resident_count == 0)
2418281494Sandrew			break;
2419281494Sandrew
2420297446Sandrew		l0 = pmap_l0(pmap, sva);
2421297446Sandrew		if (pmap_load(l0) == 0) {
2422297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2423297446Sandrew			if (va_next < sva)
2424297446Sandrew				va_next = eva;
2425297446Sandrew			continue;
2426297446Sandrew		}
2427297446Sandrew
2428297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
2429285045Sandrew		if (pmap_load(l1) == 0) {
2430281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2431281494Sandrew			if (va_next < sva)
2432281494Sandrew				va_next = eva;
2433281494Sandrew			continue;
2434281494Sandrew		}
2435281494Sandrew
2436281494Sandrew		/*
2437281494Sandrew		 * Calculate index for next page table.
2438281494Sandrew		 */
2439281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2440281494Sandrew		if (va_next < sva)
2441281494Sandrew			va_next = eva;
2442281494Sandrew
2443281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
2444281494Sandrew		if (l2 == NULL)
2445281494Sandrew			continue;
2446281494Sandrew
2447288445Sandrew		l3_paddr = pmap_load(l2);
2448281494Sandrew
2449305882Sandrew		if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
2450305882Sandrew			/* TODO: Add pmap_remove_l2 */
2451305882Sandrew			if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET,
2452305882Sandrew			    &lock) == NULL)
2453305882Sandrew				continue;
2454305882Sandrew			l3_paddr = pmap_load(l2);
2455305882Sandrew		}
2456305882Sandrew
2457281494Sandrew		/*
2458281494Sandrew		 * Weed out invalid mappings.
2459281494Sandrew		 */
2460281494Sandrew		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
2461281494Sandrew			continue;
2462281494Sandrew
2463281494Sandrew		/*
2464281494Sandrew		 * Limit our scan to either the end of the va represented
2465281494Sandrew		 * by the current page table page, or to the end of the
2466281494Sandrew		 * range being removed.
2467281494Sandrew		 */
2468281494Sandrew		if (va_next > eva)
2469281494Sandrew			va_next = eva;
2470281494Sandrew
2471281494Sandrew		va = va_next;
2472281494Sandrew		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2473281494Sandrew		    sva += L3_SIZE) {
2474281494Sandrew			if (l3 == NULL)
2475281494Sandrew				panic("l3 == NULL");
2476285045Sandrew			if (pmap_load(l3) == 0) {
2477281494Sandrew				if (va != va_next) {
2478281494Sandrew					pmap_invalidate_range(pmap, va, sva);
2479281494Sandrew					va = va_next;
2480281494Sandrew				}
2481281494Sandrew				continue;
2482281494Sandrew			}
2483281494Sandrew			if (va == va_next)
2484281494Sandrew				va = sva;
2485281494Sandrew			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
2486281494Sandrew			    &lock)) {
2487281494Sandrew				sva += L3_SIZE;
2488281494Sandrew				break;
2489281494Sandrew			}
2490281494Sandrew		}
2491281494Sandrew		if (va != va_next)
2492281494Sandrew			pmap_invalidate_range(pmap, va, sva);
2493281494Sandrew	}
2494281494Sandrew	if (lock != NULL)
2495281494Sandrew		rw_wunlock(lock);
2496281494Sandrew	PMAP_UNLOCK(pmap);
2497281494Sandrew	pmap_free_zero_pages(&free);
2498281494Sandrew}
2499281494Sandrew
2500281494Sandrew/*
2501281494Sandrew *	Routine:	pmap_remove_all
2502281494Sandrew *	Function:
2503281494Sandrew *		Removes this physical page from
2504281494Sandrew *		all physical maps in which it resides.
2505281494Sandrew *		Reflects back modify bits to the pager.
2506281494Sandrew *
2507281494Sandrew *	Notes:
2508281494Sandrew *		Original versions of this routine were very
2509281494Sandrew *		inefficient because they iteratively called
2510281494Sandrew *		pmap_remove (slow...)
2511281494Sandrew */
2512281494Sandrew
2513281494Sandrewvoid
2514281494Sandrewpmap_remove_all(vm_page_t m)
2515281494Sandrew{
2516305882Sandrew	struct md_page *pvh;
2517281494Sandrew	pv_entry_t pv;
2518281494Sandrew	pmap_t pmap;
2519305879Sandrew	struct rwlock *lock;
2520297446Sandrew	pd_entry_t *pde, tpde;
2521297446Sandrew	pt_entry_t *pte, tpte;
2522305882Sandrew	vm_offset_t va;
2523281494Sandrew	struct spglist free;
2524305882Sandrew	int lvl, pvh_gen, md_gen;
2525281494Sandrew
2526281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2527281494Sandrew	    ("pmap_remove_all: page %p is not managed", m));
2528281494Sandrew	SLIST_INIT(&free);
2529305879Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2530305882Sandrew	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
2531305882Sandrew	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
2532305879Sandrewretry:
2533305879Sandrew	rw_wlock(lock);
2534305882Sandrew	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
2535305882Sandrew		pmap = PV_PMAP(pv);
2536305882Sandrew		if (!PMAP_TRYLOCK(pmap)) {
2537305882Sandrew			pvh_gen = pvh->pv_gen;
2538305882Sandrew			rw_wunlock(lock);
2539305882Sandrew			PMAP_LOCK(pmap);
2540305882Sandrew			rw_wlock(lock);
2541305882Sandrew			if (pvh_gen != pvh->pv_gen) {
2542305882Sandrew				rw_wunlock(lock);
2543305882Sandrew				PMAP_UNLOCK(pmap);
2544305882Sandrew				goto retry;
2545305882Sandrew			}
2546305882Sandrew		}
2547305882Sandrew		va = pv->pv_va;
2548305882Sandrew		pte = pmap_pte(pmap, va, &lvl);
2549305882Sandrew		KASSERT(pte != NULL,
2550305882Sandrew		    ("pmap_remove_all: no page table entry found"));
2551305882Sandrew		KASSERT(lvl == 2,
2552305882Sandrew		    ("pmap_remove_all: invalid pte level %d", lvl));
2553305882Sandrew
2554305882Sandrew		pmap_demote_l2_locked(pmap, pte, va, &lock);
2555305882Sandrew		PMAP_UNLOCK(pmap);
2556305882Sandrew	}
2557281494Sandrew	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2558281494Sandrew		pmap = PV_PMAP(pv);
2559305879Sandrew		if (!PMAP_TRYLOCK(pmap)) {
2560305882Sandrew			pvh_gen = pvh->pv_gen;
2561305879Sandrew			md_gen = m->md.pv_gen;
2562305879Sandrew			rw_wunlock(lock);
2563305879Sandrew			PMAP_LOCK(pmap);
2564305879Sandrew			rw_wlock(lock);
2565305882Sandrew			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
2566305879Sandrew				rw_wunlock(lock);
2567305879Sandrew				PMAP_UNLOCK(pmap);
2568305879Sandrew				goto retry;
2569305879Sandrew			}
2570305879Sandrew		}
2571281494Sandrew		pmap_resident_count_dec(pmap, 1);
2572297446Sandrew
2573297446Sandrew		pde = pmap_pde(pmap, pv->pv_va, &lvl);
2574297446Sandrew		KASSERT(pde != NULL,
2575297446Sandrew		    ("pmap_remove_all: no page directory entry found"));
2576297446Sandrew		KASSERT(lvl == 2,
2577297446Sandrew		    ("pmap_remove_all: invalid pde level %d", lvl));
2578297446Sandrew		tpde = pmap_load(pde);
2579297446Sandrew
2580297446Sandrew		pte = pmap_l2_to_l3(pde, pv->pv_va);
2581297446Sandrew		tpte = pmap_load(pte);
2582281494Sandrew		if (pmap_is_current(pmap) &&
2583297446Sandrew		    pmap_l3_valid_cacheable(tpte))
2584281494Sandrew			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2585297446Sandrew		pmap_load_clear(pte);
2586297446Sandrew		PTE_SYNC(pte);
2587285212Sandrew		pmap_invalidate_page(pmap, pv->pv_va);
2588297446Sandrew		if (tpte & ATTR_SW_WIRED)
2589281494Sandrew			pmap->pm_stats.wired_count--;
2590297446Sandrew		if ((tpte & ATTR_AF) != 0)
2591281494Sandrew			vm_page_aflag_set(m, PGA_REFERENCED);
2592281494Sandrew
2593281494Sandrew		/*
2594281494Sandrew		 * Update the vm_page_t clean and reference bits.
2595281494Sandrew		 */
2596297446Sandrew		if (pmap_page_dirty(tpte))
2597281494Sandrew			vm_page_dirty(m);
2598297446Sandrew		pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
2599281494Sandrew		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2600281494Sandrew		m->md.pv_gen++;
2601281494Sandrew		free_pv_entry(pmap, pv);
2602281494Sandrew		PMAP_UNLOCK(pmap);
2603281494Sandrew	}
2604281494Sandrew	vm_page_aflag_clear(m, PGA_WRITEABLE);
2605305879Sandrew	rw_wunlock(lock);
2606281494Sandrew	pmap_free_zero_pages(&free);
2607281494Sandrew}
2608281494Sandrew
2609281494Sandrew/*
2610281494Sandrew *	Set the physical protection on the
2611281494Sandrew *	specified range of this map as requested.
2612281494Sandrew */
2613281494Sandrewvoid
2614281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
2615281494Sandrew{
2616281494Sandrew	vm_offset_t va, va_next;
2617297446Sandrew	pd_entry_t *l0, *l1, *l2;
2618319203Sandrew	pt_entry_t *l3p, l3, nbits;
2619281494Sandrew
2620319203Sandrew	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
2621319203Sandrew	if (prot == VM_PROT_NONE) {
2622281494Sandrew		pmap_remove(pmap, sva, eva);
2623281494Sandrew		return;
2624281494Sandrew	}
2625281494Sandrew
2626319203Sandrew	if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
2627319203Sandrew	    (VM_PROT_WRITE | VM_PROT_EXECUTE))
2628281494Sandrew		return;
2629281494Sandrew
2630281494Sandrew	PMAP_LOCK(pmap);
2631281494Sandrew	for (; sva < eva; sva = va_next) {
2632281494Sandrew
2633297446Sandrew		l0 = pmap_l0(pmap, sva);
2634297446Sandrew		if (pmap_load(l0) == 0) {
2635297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2636297446Sandrew			if (va_next < sva)
2637297446Sandrew				va_next = eva;
2638297446Sandrew			continue;
2639297446Sandrew		}
2640297446Sandrew
2641297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
2642285045Sandrew		if (pmap_load(l1) == 0) {
2643281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2644281494Sandrew			if (va_next < sva)
2645281494Sandrew				va_next = eva;
2646281494Sandrew			continue;
2647281494Sandrew		}
2648281494Sandrew
2649281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2650281494Sandrew		if (va_next < sva)
2651281494Sandrew			va_next = eva;
2652281494Sandrew
2653281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
2654305882Sandrew		if (pmap_load(l2) == 0)
2655281494Sandrew			continue;
2656281494Sandrew
2657305882Sandrew		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
2658305882Sandrew			l3p = pmap_demote_l2(pmap, l2, sva);
2659305882Sandrew			if (l3p == NULL)
2660305882Sandrew				continue;
2661305882Sandrew		}
2662305882Sandrew		KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
2663305882Sandrew		    ("pmap_protect: Invalid L2 entry after demotion"));
2664305882Sandrew
2665281494Sandrew		if (va_next > eva)
2666281494Sandrew			va_next = eva;
2667281494Sandrew
2668281494Sandrew		va = va_next;
2669281494Sandrew		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
2670281494Sandrew		    sva += L3_SIZE) {
2671281494Sandrew			l3 = pmap_load(l3p);
2672319203Sandrew			if (!pmap_l3_valid(l3))
2673319203Sandrew				continue;
2674319203Sandrew
2675319203Sandrew			nbits = 0;
2676319203Sandrew			if ((prot & VM_PROT_WRITE) == 0) {
2677317354Skib				if ((l3 & ATTR_SW_MANAGED) &&
2678317354Skib				    pmap_page_dirty(l3)) {
2679317354Skib					vm_page_dirty(PHYS_TO_VM_PAGE(l3 &
2680317354Skib					    ~ATTR_MASK));
2681317354Skib				}
2682319203Sandrew				nbits |= ATTR_AP(ATTR_AP_RO);
2683281494Sandrew			}
2684319203Sandrew			if ((prot & VM_PROT_EXECUTE) == 0)
2685319203Sandrew				nbits |= ATTR_XN;
2686319203Sandrew
2687319203Sandrew			pmap_set(l3p, nbits);
2688319203Sandrew			PTE_SYNC(l3p);
2689319203Sandrew			/* XXX: Use pmap_invalidate_range */
2690323845Sandrew			pmap_invalidate_page(pmap, sva);
2691281494Sandrew		}
2692281494Sandrew	}
2693281494Sandrew	PMAP_UNLOCK(pmap);
2694281494Sandrew}
2695281494Sandrew
2696281494Sandrew/*
2697305882Sandrew * Inserts the specified page table page into the specified pmap's collection
2698305882Sandrew * of idle page table pages.  Each of a pmap's page table pages is responsible
2699305882Sandrew * for mapping a distinct range of virtual addresses.  The pmap's collection is
2700305882Sandrew * ordered by this virtual address range.
2701305882Sandrew */
2702305882Sandrewstatic __inline int
2703305882Sandrewpmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
2704305882Sandrew{
2705305882Sandrew
2706305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2707305882Sandrew	return (vm_radix_insert(&pmap->pm_root, mpte));
2708305882Sandrew}
2709305882Sandrew
2710305882Sandrew/*
2711318716Smarkj * Removes the page table page mapping the specified virtual address from the
2712318716Smarkj * specified pmap's collection of idle page table pages, and returns it.
2713318716Smarkj * Otherwise, returns NULL if there is no page table page corresponding to the
2714318716Smarkj * specified virtual address.
2715305882Sandrew */
2716305882Sandrewstatic __inline vm_page_t
2717318716Smarkjpmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
2718305882Sandrew{
2719305882Sandrew
2720305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2721318716Smarkj	return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
2722305882Sandrew}
2723305882Sandrew
2724305882Sandrew/*
2725305882Sandrew * Performs a break-before-make update of a pmap entry. This is needed when
2726305882Sandrew * either promoting or demoting pages to ensure the TLB doesn't get into an
2727305882Sandrew * inconsistent state.
2728305882Sandrew */
2729305882Sandrewstatic void
2730305882Sandrewpmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
2731305882Sandrew    vm_offset_t va, vm_size_t size)
2732305882Sandrew{
2733305882Sandrew	register_t intr;
2734305882Sandrew
2735305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2736305882Sandrew
2737305882Sandrew	/*
2738305882Sandrew	 * Ensure we don't get switched out with the page table in an
2739305882Sandrew	 * inconsistent state. We also need to ensure no interrupts fire
2740305882Sandrew	 * as they may make use of an address we are about to invalidate.
2741305882Sandrew	 */
2742305882Sandrew	intr = intr_disable();
2743305882Sandrew	critical_enter();
2744305882Sandrew
2745305882Sandrew	/* Clear the old mapping */
2746305882Sandrew	pmap_load_clear(pte);
2747305882Sandrew	PTE_SYNC(pte);
2748305882Sandrew	pmap_invalidate_range(pmap, va, va + size);
2749305882Sandrew
2750305882Sandrew	/* Create the new mapping */
2751305882Sandrew	pmap_load_store(pte, newpte);
2752305882Sandrew	PTE_SYNC(pte);
2753305882Sandrew
2754305882Sandrew	critical_exit();
2755305882Sandrew	intr_restore(intr);
2756305882Sandrew}
2757305882Sandrew
2758325238Smarkj#if VM_NRESERVLEVEL > 0
2759305882Sandrew/*
2760305882Sandrew * After promotion from 512 4KB page mappings to a single 2MB page mapping,
2761305882Sandrew * replace the many pv entries for the 4KB page mappings by a single pv entry
2762305882Sandrew * for the 2MB page mapping.
2763305882Sandrew */
2764305882Sandrewstatic void
2765305882Sandrewpmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
2766305882Sandrew    struct rwlock **lockp)
2767305882Sandrew{
2768305882Sandrew	struct md_page *pvh;
2769305882Sandrew	pv_entry_t pv;
2770305882Sandrew	vm_offset_t va_last;
2771305882Sandrew	vm_page_t m;
2772305882Sandrew
2773305882Sandrew	KASSERT((pa & L2_OFFSET) == 0,
2774305882Sandrew	    ("pmap_pv_promote_l2: pa is not 2mpage aligned"));
2775305882Sandrew	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
2776305882Sandrew
2777305882Sandrew	/*
2778305882Sandrew	 * Transfer the first page's pv entry for this mapping to the 2mpage's
2779305882Sandrew	 * pv list.  Aside from avoiding the cost of a call to get_pv_entry(),
2780305882Sandrew	 * a transfer avoids the possibility that get_pv_entry() calls
2781305882Sandrew	 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
2782305882Sandrew	 * mappings that is being promoted.
2783305882Sandrew	 */
2784305882Sandrew	m = PHYS_TO_VM_PAGE(pa);
2785305882Sandrew	va = va & ~L2_OFFSET;
2786305882Sandrew	pv = pmap_pvh_remove(&m->md, pmap, va);
2787305882Sandrew	KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
2788305882Sandrew	pvh = pa_to_pvh(pa);
2789305882Sandrew	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
2790305882Sandrew	pvh->pv_gen++;
2791305882Sandrew	/* Free the remaining NPTEPG - 1 pv entries. */
2792305882Sandrew	va_last = va + L2_SIZE - PAGE_SIZE;
2793305882Sandrew	do {
2794305882Sandrew		m++;
2795305882Sandrew		va += PAGE_SIZE;
2796305882Sandrew		pmap_pvh_free(&m->md, pmap, va);
2797305882Sandrew	} while (va < va_last);
2798305882Sandrew}
2799305882Sandrew
2800305882Sandrew/*
2801305882Sandrew * Tries to promote the 512, contiguous 4KB page mappings that are within a
2802305882Sandrew * single level 2 table entry to a single 2MB page mapping.  For promotion
2803305882Sandrew * to occur, two conditions must be met: (1) the 4KB page mappings must map
2804305882Sandrew * aligned, contiguous physical memory and (2) the 4KB page mappings must have
2805305882Sandrew * identical characteristics.
2806305882Sandrew */
2807305882Sandrewstatic void
2808305882Sandrewpmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
2809305882Sandrew    struct rwlock **lockp)
2810305882Sandrew{
2811305882Sandrew	pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
2812305882Sandrew	vm_page_t mpte;
2813305882Sandrew	vm_offset_t sva;
2814305882Sandrew
2815305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2816305882Sandrew
2817305882Sandrew	sva = va & ~L2_OFFSET;
2818305882Sandrew	firstl3 = pmap_l2_to_l3(l2, sva);
2819305882Sandrew	newl2 = pmap_load(firstl3);
2820305882Sandrew
2821305882Sandrew	/* Check the alingment is valid */
2822305882Sandrew	if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) {
2823305882Sandrew		atomic_add_long(&pmap_l2_p_failures, 1);
2824305882Sandrew		CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
2825305882Sandrew		    " in pmap %p", va, pmap);
2826305882Sandrew		return;
2827305882Sandrew	}
2828305882Sandrew
2829305882Sandrew	pa = newl2 + L2_SIZE - PAGE_SIZE;
2830305882Sandrew	for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
2831305882Sandrew		oldl3 = pmap_load(l3);
2832305882Sandrew		if (oldl3 != pa) {
2833305882Sandrew			atomic_add_long(&pmap_l2_p_failures, 1);
2834305882Sandrew			CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
2835305882Sandrew			    " in pmap %p", va, pmap);
2836305882Sandrew			return;
2837305882Sandrew		}
2838305882Sandrew		pa -= PAGE_SIZE;
2839305882Sandrew	}
2840305882Sandrew
2841305882Sandrew	/*
2842305882Sandrew	 * Save the page table page in its current state until the L2
2843305882Sandrew	 * mapping the superpage is demoted by pmap_demote_l2() or
2844305882Sandrew	 * destroyed by pmap_remove_l3().
2845305882Sandrew	 */
2846305882Sandrew	mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
2847305882Sandrew	KASSERT(mpte >= vm_page_array &&
2848305882Sandrew	    mpte < &vm_page_array[vm_page_array_size],
2849305882Sandrew	    ("pmap_promote_l2: page table page is out of range"));
2850305882Sandrew	KASSERT(mpte->pindex == pmap_l2_pindex(va),
2851305882Sandrew	    ("pmap_promote_l2: page table page's pindex is wrong"));
2852305882Sandrew	if (pmap_insert_pt_page(pmap, mpte)) {
2853305882Sandrew		atomic_add_long(&pmap_l2_p_failures, 1);
2854305882Sandrew		CTR2(KTR_PMAP,
2855305882Sandrew		    "pmap_promote_l2: failure for va %#lx in pmap %p", va,
2856305882Sandrew		    pmap);
2857305882Sandrew		return;
2858305882Sandrew	}
2859305882Sandrew
2860305882Sandrew	if ((newl2 & ATTR_SW_MANAGED) != 0)
2861305882Sandrew		pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp);
2862305882Sandrew
2863305882Sandrew	newl2 &= ~ATTR_DESCR_MASK;
2864305882Sandrew	newl2 |= L2_BLOCK;
2865305882Sandrew
2866305882Sandrew	pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE);
2867305882Sandrew
2868305882Sandrew	atomic_add_long(&pmap_l2_promotions, 1);
2869305882Sandrew	CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
2870305882Sandrew		    pmap);
2871305882Sandrew}
2872325238Smarkj#endif /* VM_NRESERVLEVEL > 0 */
2873305882Sandrew
2874305882Sandrew/*
2875281494Sandrew *	Insert the given physical page (p) at
2876281494Sandrew *	the specified virtual address (v) in the
2877281494Sandrew *	target physical map with the protection requested.
2878281494Sandrew *
2879281494Sandrew *	If specified, the page will be wired down, meaning
2880281494Sandrew *	that the related pte can not be reclaimed.
2881281494Sandrew *
2882281494Sandrew *	NB:  This is the only routine which MAY NOT lazy-evaluate
2883281494Sandrew *	or lose information.  That is, this routine must actually
2884281494Sandrew *	insert this page into the given map NOW.
2885281494Sandrew */
2886281494Sandrewint
2887281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2888281494Sandrew    u_int flags, int8_t psind __unused)
2889281494Sandrew{
2890281494Sandrew	struct rwlock *lock;
2891297446Sandrew	pd_entry_t *pde;
2892281494Sandrew	pt_entry_t new_l3, orig_l3;
2893305882Sandrew	pt_entry_t *l2, *l3;
2894281494Sandrew	pv_entry_t pv;
2895297446Sandrew	vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
2896297446Sandrew	vm_page_t mpte, om, l1_m, l2_m, l3_m;
2897281494Sandrew	boolean_t nosleep;
2898297446Sandrew	int lvl;
2899281494Sandrew
2900281494Sandrew	va = trunc_page(va);
2901281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2902281494Sandrew		VM_OBJECT_ASSERT_LOCKED(m->object);
2903281494Sandrew	pa = VM_PAGE_TO_PHYS(m);
2904285537Sandrew	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2905285537Sandrew	    L3_PAGE);
2906281494Sandrew	if ((prot & VM_PROT_WRITE) == 0)
2907281494Sandrew		new_l3 |= ATTR_AP(ATTR_AP_RO);
2908319203Sandrew	if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
2909319203Sandrew		new_l3 |= ATTR_XN;
2910281494Sandrew	if ((flags & PMAP_ENTER_WIRED) != 0)
2911281494Sandrew		new_l3 |= ATTR_SW_WIRED;
2912281494Sandrew	if ((va >> 63) == 0)
2913319203Sandrew		new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN;
2914281494Sandrew
2915285212Sandrew	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
2916285212Sandrew
2917281494Sandrew	mpte = NULL;
2918281494Sandrew
2919281494Sandrew	lock = NULL;
2920281494Sandrew	PMAP_LOCK(pmap);
2921281494Sandrew
2922305882Sandrew	pde = pmap_pde(pmap, va, &lvl);
2923305882Sandrew	if (pde != NULL && lvl == 1) {
2924305882Sandrew		l2 = pmap_l1_to_l2(pde, va);
2925305882Sandrew		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
2926305882Sandrew		    (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET,
2927305882Sandrew		    &lock)) != NULL) {
2928305882Sandrew			l3 = &l3[pmap_l3_index(va)];
2929305882Sandrew			if (va < VM_MAXUSER_ADDRESS) {
2930305882Sandrew				mpte = PHYS_TO_VM_PAGE(
2931305882Sandrew				    pmap_load(l2) & ~ATTR_MASK);
2932305882Sandrew				mpte->wire_count++;
2933305882Sandrew			}
2934305882Sandrew			goto havel3;
2935305882Sandrew		}
2936305882Sandrew	}
2937305882Sandrew
2938281494Sandrew	if (va < VM_MAXUSER_ADDRESS) {
2939281494Sandrew		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
2940281494Sandrew		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
2941281494Sandrew		if (mpte == NULL && nosleep) {
2942285212Sandrew			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
2943281494Sandrew			if (lock != NULL)
2944281494Sandrew				rw_wunlock(lock);
2945281494Sandrew			PMAP_UNLOCK(pmap);
2946281494Sandrew			return (KERN_RESOURCE_SHORTAGE);
2947281494Sandrew		}
2948297446Sandrew		pde = pmap_pde(pmap, va, &lvl);
2949297446Sandrew		KASSERT(pde != NULL,
2950297446Sandrew		    ("pmap_enter: Invalid page entry, va: 0x%lx", va));
2951297446Sandrew		KASSERT(lvl == 2,
2952297446Sandrew		    ("pmap_enter: Invalid level %d", lvl));
2953297446Sandrew
2954297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
2955281494Sandrew	} else {
2956297446Sandrew		/*
2957297446Sandrew		 * If we get a level 2 pde it must point to a level 3 entry
2958297446Sandrew		 * otherwise we will need to create the intermediate tables
2959297446Sandrew		 */
2960297446Sandrew		if (lvl < 2) {
2961297446Sandrew			switch(lvl) {
2962297446Sandrew			default:
2963297446Sandrew			case -1:
2964297446Sandrew				/* Get the l0 pde to update */
2965297446Sandrew				pde = pmap_l0(pmap, va);
2966297446Sandrew				KASSERT(pde != NULL, ("..."));
2967281494Sandrew
2968297446Sandrew				l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2969297446Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2970297446Sandrew				    VM_ALLOC_ZERO);
2971297446Sandrew				if (l1_m == NULL)
2972297446Sandrew					panic("pmap_enter: l1 pte_m == NULL");
2973297446Sandrew				if ((l1_m->flags & PG_ZERO) == 0)
2974297446Sandrew					pmap_zero_page(l1_m);
2975297446Sandrew
2976297446Sandrew				l1_pa = VM_PAGE_TO_PHYS(l1_m);
2977297446Sandrew				pmap_load_store(pde, l1_pa | L0_TABLE);
2978297446Sandrew				PTE_SYNC(pde);
2979297446Sandrew				/* FALLTHROUGH */
2980297446Sandrew			case 0:
2981297446Sandrew				/* Get the l1 pde to update */
2982297446Sandrew				pde = pmap_l1_to_l2(pde, va);
2983297446Sandrew				KASSERT(pde != NULL, ("..."));
2984297446Sandrew
2985281494Sandrew				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2986281494Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2987281494Sandrew				    VM_ALLOC_ZERO);
2988281494Sandrew				if (l2_m == NULL)
2989281494Sandrew					panic("pmap_enter: l2 pte_m == NULL");
2990281494Sandrew				if ((l2_m->flags & PG_ZERO) == 0)
2991281494Sandrew					pmap_zero_page(l2_m);
2992281494Sandrew
2993281494Sandrew				l2_pa = VM_PAGE_TO_PHYS(l2_m);
2994297446Sandrew				pmap_load_store(pde, l2_pa | L1_TABLE);
2995297446Sandrew				PTE_SYNC(pde);
2996297446Sandrew				/* FALLTHROUGH */
2997297446Sandrew			case 1:
2998297446Sandrew				/* Get the l2 pde to update */
2999297446Sandrew				pde = pmap_l1_to_l2(pde, va);
3000281494Sandrew
3001297446Sandrew				l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
3002297446Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
3003297446Sandrew				    VM_ALLOC_ZERO);
3004297446Sandrew				if (l3_m == NULL)
3005297446Sandrew					panic("pmap_enter: l3 pte_m == NULL");
3006297446Sandrew				if ((l3_m->flags & PG_ZERO) == 0)
3007297446Sandrew					pmap_zero_page(l3_m);
3008281494Sandrew
3009297446Sandrew				l3_pa = VM_PAGE_TO_PHYS(l3_m);
3010297446Sandrew				pmap_load_store(pde, l3_pa | L2_TABLE);
3011297446Sandrew				PTE_SYNC(pde);
3012297446Sandrew				break;
3013297446Sandrew			}
3014281494Sandrew		}
3015297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
3016285212Sandrew		pmap_invalidate_page(pmap, va);
3017281494Sandrew	}
3018305882Sandrewhavel3:
3019281494Sandrew
3020281494Sandrew	om = NULL;
3021281494Sandrew	orig_l3 = pmap_load(l3);
3022281494Sandrew	opa = orig_l3 & ~ATTR_MASK;
3023281494Sandrew
3024281494Sandrew	/*
3025281494Sandrew	 * Is the specified virtual address already mapped?
3026281494Sandrew	 */
3027281494Sandrew	if (pmap_l3_valid(orig_l3)) {
3028281494Sandrew		/*
3029281494Sandrew		 * Wiring change, just update stats. We don't worry about
3030281494Sandrew		 * wiring PT pages as they remain resident as long as there
3031281494Sandrew		 * are valid mappings in them. Hence, if a user page is wired,
3032281494Sandrew		 * the PT page will be also.
3033281494Sandrew		 */
3034281494Sandrew		if ((flags & PMAP_ENTER_WIRED) != 0 &&
3035281494Sandrew		    (orig_l3 & ATTR_SW_WIRED) == 0)
3036281494Sandrew			pmap->pm_stats.wired_count++;
3037281494Sandrew		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
3038281494Sandrew		    (orig_l3 & ATTR_SW_WIRED) != 0)
3039281494Sandrew			pmap->pm_stats.wired_count--;
3040281494Sandrew
3041281494Sandrew		/*
3042281494Sandrew		 * Remove the extra PT page reference.
3043281494Sandrew		 */
3044281494Sandrew		if (mpte != NULL) {
3045281494Sandrew			mpte->wire_count--;
3046281494Sandrew			KASSERT(mpte->wire_count > 0,
3047281494Sandrew			    ("pmap_enter: missing reference to page table page,"
3048281494Sandrew			     " va: 0x%lx", va));
3049281494Sandrew		}
3050281494Sandrew
3051281494Sandrew		/*
3052281494Sandrew		 * Has the physical page changed?
3053281494Sandrew		 */
3054281494Sandrew		if (opa == pa) {
3055281494Sandrew			/*
3056281494Sandrew			 * No, might be a protection or wiring change.
3057281494Sandrew			 */
3058281494Sandrew			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
3059281494Sandrew				new_l3 |= ATTR_SW_MANAGED;
3060281494Sandrew				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
3061281494Sandrew				    ATTR_AP(ATTR_AP_RW)) {
3062281494Sandrew					vm_page_aflag_set(m, PGA_WRITEABLE);
3063281494Sandrew				}
3064281494Sandrew			}
3065281494Sandrew			goto validate;
3066281494Sandrew		}
3067281494Sandrew
3068281494Sandrew		/* Flush the cache, there might be uncommitted data in it */
3069281494Sandrew		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
3070281494Sandrew			cpu_dcache_wb_range(va, L3_SIZE);
3071281494Sandrew	} else {
3072281494Sandrew		/*
3073281494Sandrew		 * Increment the counters.
3074281494Sandrew		 */
3075281494Sandrew		if ((new_l3 & ATTR_SW_WIRED) != 0)
3076281494Sandrew			pmap->pm_stats.wired_count++;
3077281494Sandrew		pmap_resident_count_inc(pmap, 1);
3078281494Sandrew	}
3079281494Sandrew	/*
3080281494Sandrew	 * Enter on the PV list if part of our managed memory.
3081281494Sandrew	 */
3082281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0) {
3083281494Sandrew		new_l3 |= ATTR_SW_MANAGED;
3084281494Sandrew		pv = get_pv_entry(pmap, &lock);
3085281494Sandrew		pv->pv_va = va;
3086281494Sandrew		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
3087281494Sandrew		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
3088281494Sandrew		m->md.pv_gen++;
3089281494Sandrew		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
3090281494Sandrew			vm_page_aflag_set(m, PGA_WRITEABLE);
3091281494Sandrew	}
3092281494Sandrew
3093281494Sandrew	/*
3094281494Sandrew	 * Update the L3 entry.
3095281494Sandrew	 */
3096281494Sandrew	if (orig_l3 != 0) {
3097281494Sandrewvalidate:
3098305882Sandrew		orig_l3 = pmap_load(l3);
3099281494Sandrew		opa = orig_l3 & ~ATTR_MASK;
3100281494Sandrew
3101281494Sandrew		if (opa != pa) {
3102305882Sandrew			pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE);
3103281494Sandrew			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
3104281494Sandrew				om = PHYS_TO_VM_PAGE(opa);
3105281494Sandrew				if (pmap_page_dirty(orig_l3))
3106281494Sandrew					vm_page_dirty(om);
3107281494Sandrew				if ((orig_l3 & ATTR_AF) != 0)
3108281494Sandrew					vm_page_aflag_set(om, PGA_REFERENCED);
3109281494Sandrew				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
3110281494Sandrew				pmap_pvh_free(&om->md, pmap, va);
3111305882Sandrew				if ((om->aflags & PGA_WRITEABLE) != 0 &&
3112305882Sandrew				    TAILQ_EMPTY(&om->md.pv_list) &&
3113305882Sandrew				    ((om->flags & PG_FICTITIOUS) != 0 ||
3114305882Sandrew				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
3115305882Sandrew					vm_page_aflag_clear(om, PGA_WRITEABLE);
3116281494Sandrew			}
3117305882Sandrew		} else {
3118305882Sandrew			pmap_load_store(l3, new_l3);
3119305882Sandrew			PTE_SYNC(l3);
3120305882Sandrew			pmap_invalidate_page(pmap, va);
3121305882Sandrew			if (pmap_page_dirty(orig_l3) &&
3122305882Sandrew			    (orig_l3 & ATTR_SW_MANAGED) != 0)
3123281494Sandrew				vm_page_dirty(m);
3124281494Sandrew		}
3125281494Sandrew	} else {
3126281494Sandrew		pmap_load_store(l3, new_l3);
3127281494Sandrew	}
3128305882Sandrew
3129305882Sandrew	PTE_SYNC(l3);
3130285212Sandrew	pmap_invalidate_page(pmap, va);
3131281494Sandrew
3132305882Sandrew	if (pmap != pmap_kernel()) {
3133305883Sandrew		if (pmap == &curproc->p_vmspace->vm_pmap &&
3134305883Sandrew		    (prot & VM_PROT_EXECUTE) != 0)
3135305883Sandrew			cpu_icache_sync_range(va, PAGE_SIZE);
3136305882Sandrew
3137325238Smarkj#if VM_NRESERVLEVEL > 0
3138305882Sandrew		if ((mpte == NULL || mpte->wire_count == NL3PG) &&
3139305882Sandrew		    pmap_superpages_enabled() &&
3140305882Sandrew		    (m->flags & PG_FICTITIOUS) == 0 &&
3141305882Sandrew		    vm_reserv_level_iffullpop(m) == 0) {
3142305882Sandrew			pmap_promote_l2(pmap, pde, va, &lock);
3143305882Sandrew		}
3144325238Smarkj#endif
3145305882Sandrew	}
3146305882Sandrew
3147281494Sandrew	if (lock != NULL)
3148281494Sandrew		rw_wunlock(lock);
3149281494Sandrew	PMAP_UNLOCK(pmap);
3150281494Sandrew	return (KERN_SUCCESS);
3151281494Sandrew}
3152281494Sandrew
3153281494Sandrew/*
3154281494Sandrew * Maps a sequence of resident pages belonging to the same object.
3155281494Sandrew * The sequence begins with the given page m_start.  This page is
3156281494Sandrew * mapped at the given virtual address start.  Each subsequent page is
3157281494Sandrew * mapped at a virtual address that is offset from start by the same
3158281494Sandrew * amount as the page is offset from m_start within the object.  The
3159281494Sandrew * last page in the sequence is the page with the largest offset from
3160281494Sandrew * m_start that can be mapped at a virtual address less than the given
3161281494Sandrew * virtual address end.  Not every virtual page between start and end
3162281494Sandrew * is mapped; only those for which a resident page exists with the
3163281494Sandrew * corresponding offset from m_start are mapped.
3164281494Sandrew */
3165281494Sandrewvoid
3166281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
3167281494Sandrew    vm_page_t m_start, vm_prot_t prot)
3168281494Sandrew{
3169281494Sandrew	struct rwlock *lock;
3170281494Sandrew	vm_offset_t va;
3171281494Sandrew	vm_page_t m, mpte;
3172281494Sandrew	vm_pindex_t diff, psize;
3173281494Sandrew
3174281494Sandrew	VM_OBJECT_ASSERT_LOCKED(m_start->object);
3175281494Sandrew
3176281494Sandrew	psize = atop(end - start);
3177281494Sandrew	mpte = NULL;
3178281494Sandrew	m = m_start;
3179281494Sandrew	lock = NULL;
3180281494Sandrew	PMAP_LOCK(pmap);
3181281494Sandrew	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
3182281494Sandrew		va = start + ptoa(diff);
3183281494Sandrew		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
3184281494Sandrew		m = TAILQ_NEXT(m, listq);
3185281494Sandrew	}
3186281494Sandrew	if (lock != NULL)
3187281494Sandrew		rw_wunlock(lock);
3188281494Sandrew	PMAP_UNLOCK(pmap);
3189281494Sandrew}
3190281494Sandrew
3191281494Sandrew/*
3192281494Sandrew * this code makes some *MAJOR* assumptions:
3193281494Sandrew * 1. Current pmap & pmap exists.
3194281494Sandrew * 2. Not wired.
3195281494Sandrew * 3. Read access.
3196281494Sandrew * 4. No page table pages.
3197281494Sandrew * but is *MUCH* faster than pmap_enter...
3198281494Sandrew */
3199281494Sandrew
3200281494Sandrewvoid
3201281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
3202281494Sandrew{
3203281494Sandrew	struct rwlock *lock;
3204281494Sandrew
3205281494Sandrew	lock = NULL;
3206281494Sandrew	PMAP_LOCK(pmap);
3207281494Sandrew	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
3208281494Sandrew	if (lock != NULL)
3209281494Sandrew		rw_wunlock(lock);
3210281494Sandrew	PMAP_UNLOCK(pmap);
3211281494Sandrew}
3212281494Sandrew
3213281494Sandrewstatic vm_page_t
3214281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
3215281494Sandrew    vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
3216281494Sandrew{
3217281494Sandrew	struct spglist free;
3218297446Sandrew	pd_entry_t *pde;
3219305882Sandrew	pt_entry_t *l2, *l3;
3220281494Sandrew	vm_paddr_t pa;
3221297446Sandrew	int lvl;
3222281494Sandrew
3223281494Sandrew	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
3224281494Sandrew	    (m->oflags & VPO_UNMANAGED) != 0,
3225281494Sandrew	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
3226281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3227281494Sandrew
3228285212Sandrew	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
3229281494Sandrew	/*
3230281494Sandrew	 * In the case that a page table page is not
3231281494Sandrew	 * resident, we are creating it here.
3232281494Sandrew	 */
3233281494Sandrew	if (va < VM_MAXUSER_ADDRESS) {
3234281494Sandrew		vm_pindex_t l2pindex;
3235281494Sandrew
3236281494Sandrew		/*
3237281494Sandrew		 * Calculate pagetable page index
3238281494Sandrew		 */
3239281494Sandrew		l2pindex = pmap_l2_pindex(va);
3240281494Sandrew		if (mpte && (mpte->pindex == l2pindex)) {
3241281494Sandrew			mpte->wire_count++;
3242281494Sandrew		} else {
3243281494Sandrew			/*
3244281494Sandrew			 * Get the l2 entry
3245281494Sandrew			 */
3246297446Sandrew			pde = pmap_pde(pmap, va, &lvl);
3247281494Sandrew
3248281494Sandrew			/*
3249281494Sandrew			 * If the page table page is mapped, we just increment
3250281494Sandrew			 * the hold count, and activate it.  Otherwise, we
3251281494Sandrew			 * attempt to allocate a page table page.  If this
3252281494Sandrew			 * attempt fails, we don't retry.  Instead, we give up.
3253281494Sandrew			 */
3254305882Sandrew			if (lvl == 1) {
3255305882Sandrew				l2 = pmap_l1_to_l2(pde, va);
3256305882Sandrew				if ((pmap_load(l2) & ATTR_DESCR_MASK) ==
3257305882Sandrew				    L2_BLOCK)
3258305882Sandrew					return (NULL);
3259305882Sandrew			}
3260297446Sandrew			if (lvl == 2 && pmap_load(pde) != 0) {
3261285045Sandrew				mpte =
3262297446Sandrew				    PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
3263281494Sandrew				mpte->wire_count++;
3264281494Sandrew			} else {
3265281494Sandrew				/*
3266281494Sandrew				 * Pass NULL instead of the PV list lock
3267281494Sandrew				 * pointer, because we don't intend to sleep.
3268281494Sandrew				 */
3269281494Sandrew				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
3270281494Sandrew				if (mpte == NULL)
3271281494Sandrew					return (mpte);
3272281494Sandrew			}
3273281494Sandrew		}
3274281494Sandrew		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
3275281494Sandrew		l3 = &l3[pmap_l3_index(va)];
3276281494Sandrew	} else {
3277281494Sandrew		mpte = NULL;
3278297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
3279297446Sandrew		KASSERT(pde != NULL,
3280297446Sandrew		    ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
3281297446Sandrew		     va));
3282297446Sandrew		KASSERT(lvl == 2,
3283297446Sandrew		    ("pmap_enter_quick_locked: Invalid level %d", lvl));
3284297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
3285281494Sandrew	}
3286297446Sandrew
3287285212Sandrew	if (pmap_load(l3) != 0) {
3288281494Sandrew		if (mpte != NULL) {
3289281494Sandrew			mpte->wire_count--;
3290281494Sandrew			mpte = NULL;
3291281494Sandrew		}
3292281494Sandrew		return (mpte);
3293281494Sandrew	}
3294281494Sandrew
3295281494Sandrew	/*
3296281494Sandrew	 * Enter on the PV list if part of our managed memory.
3297281494Sandrew	 */
3298281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0 &&
3299281494Sandrew	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
3300281494Sandrew		if (mpte != NULL) {
3301281494Sandrew			SLIST_INIT(&free);
3302281494Sandrew			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
3303281494Sandrew				pmap_invalidate_page(pmap, va);
3304281494Sandrew				pmap_free_zero_pages(&free);
3305281494Sandrew			}
3306281494Sandrew			mpte = NULL;
3307281494Sandrew		}
3308281494Sandrew		return (mpte);
3309281494Sandrew	}
3310281494Sandrew
3311281494Sandrew	/*
3312281494Sandrew	 * Increment counters
3313281494Sandrew	 */
3314281494Sandrew	pmap_resident_count_inc(pmap, 1);
3315281494Sandrew
3316285537Sandrew	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
3317305882Sandrew	    ATTR_AP(ATTR_AP_RO) | L3_PAGE;
3318319203Sandrew	if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
3319319203Sandrew		pa |= ATTR_XN;
3320319203Sandrew	else if (va < VM_MAXUSER_ADDRESS)
3321319203Sandrew		pa |= ATTR_PXN;
3322281494Sandrew
3323281494Sandrew	/*
3324281494Sandrew	 * Now validate mapping with RO protection
3325281494Sandrew	 */
3326281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0)
3327281494Sandrew		pa |= ATTR_SW_MANAGED;
3328281494Sandrew	pmap_load_store(l3, pa);
3329281494Sandrew	PTE_SYNC(l3);
3330281494Sandrew	pmap_invalidate_page(pmap, va);
3331281494Sandrew	return (mpte);
3332281494Sandrew}
3333281494Sandrew
3334281494Sandrew/*
3335281494Sandrew * This code maps large physical mmap regions into the
3336281494Sandrew * processor address space.  Note that some shortcuts
3337281494Sandrew * are taken, but the code works.
3338281494Sandrew */
3339281494Sandrewvoid
3340281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
3341281494Sandrew    vm_pindex_t pindex, vm_size_t size)
3342281494Sandrew{
3343281494Sandrew
3344281846Sandrew	VM_OBJECT_ASSERT_WLOCKED(object);
3345281846Sandrew	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
3346281846Sandrew	    ("pmap_object_init_pt: non-device object"));
3347281494Sandrew}
3348281494Sandrew
3349281494Sandrew/*
3350281494Sandrew *	Clear the wired attribute from the mappings for the specified range of
3351281494Sandrew *	addresses in the given pmap.  Every valid mapping within that range
3352281494Sandrew *	must have the wired attribute set.  In contrast, invalid mappings
3353281494Sandrew *	cannot have the wired attribute set, so they are ignored.
3354281494Sandrew *
3355281494Sandrew *	The wired attribute of the page table entry is not a hardware feature,
3356281494Sandrew *	so there is no need to invalidate any TLB entries.
3357281494Sandrew */
3358281494Sandrewvoid
3359281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
3360281494Sandrew{
3361281494Sandrew	vm_offset_t va_next;
3362297446Sandrew	pd_entry_t *l0, *l1, *l2;
3363281494Sandrew	pt_entry_t *l3;
3364281494Sandrew
3365281494Sandrew	PMAP_LOCK(pmap);
3366281494Sandrew	for (; sva < eva; sva = va_next) {
3367297446Sandrew		l0 = pmap_l0(pmap, sva);
3368297446Sandrew		if (pmap_load(l0) == 0) {
3369297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
3370297446Sandrew			if (va_next < sva)
3371297446Sandrew				va_next = eva;
3372297446Sandrew			continue;
3373297446Sandrew		}
3374297446Sandrew
3375297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
3376285045Sandrew		if (pmap_load(l1) == 0) {
3377281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
3378281494Sandrew			if (va_next < sva)
3379281494Sandrew				va_next = eva;
3380281494Sandrew			continue;
3381281494Sandrew		}
3382281494Sandrew
3383281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
3384281494Sandrew		if (va_next < sva)
3385281494Sandrew			va_next = eva;
3386281494Sandrew
3387281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
3388285045Sandrew		if (pmap_load(l2) == 0)
3389281494Sandrew			continue;
3390281494Sandrew
3391305882Sandrew		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
3392305882Sandrew			l3 = pmap_demote_l2(pmap, l2, sva);
3393305882Sandrew			if (l3 == NULL)
3394305882Sandrew				continue;
3395305882Sandrew		}
3396305882Sandrew		KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
3397305882Sandrew		    ("pmap_unwire: Invalid l2 entry after demotion"));
3398305882Sandrew
3399281494Sandrew		if (va_next > eva)
3400281494Sandrew			va_next = eva;
3401281494Sandrew		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
3402281494Sandrew		    sva += L3_SIZE) {
3403285045Sandrew			if (pmap_load(l3) == 0)
3404281494Sandrew				continue;
3405285045Sandrew			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
3406281494Sandrew				panic("pmap_unwire: l3 %#jx is missing "
3407288445Sandrew				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
3408281494Sandrew
3409281494Sandrew			/*
3410281494Sandrew			 * PG_W must be cleared atomically.  Although the pmap
3411281494Sandrew			 * lock synchronizes access to PG_W, another processor
3412281494Sandrew			 * could be setting PG_M and/or PG_A concurrently.
3413281494Sandrew			 */
3414281494Sandrew			atomic_clear_long(l3, ATTR_SW_WIRED);
3415281494Sandrew			pmap->pm_stats.wired_count--;
3416281494Sandrew		}
3417281494Sandrew	}
3418281494Sandrew	PMAP_UNLOCK(pmap);
3419281494Sandrew}
3420281494Sandrew
3421281494Sandrew/*
3422281494Sandrew *	Copy the range specified by src_addr/len
3423281494Sandrew *	from the source map to the range dst_addr/len
3424281494Sandrew *	in the destination map.
3425281494Sandrew *
3426281494Sandrew *	This routine is only advisory and need not do anything.
3427281494Sandrew */
3428281494Sandrew
3429281494Sandrewvoid
3430281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
3431281494Sandrew    vm_offset_t src_addr)
3432281494Sandrew{
3433281494Sandrew}
3434281494Sandrew
3435281494Sandrew/*
3436281494Sandrew *	pmap_zero_page zeros the specified hardware page by mapping
3437281494Sandrew *	the page into KVM and using bzero to clear its contents.
3438281494Sandrew */
3439281494Sandrewvoid
3440281494Sandrewpmap_zero_page(vm_page_t m)
3441281494Sandrew{
3442281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
3443281494Sandrew
3444281494Sandrew	pagezero((void *)va);
3445281494Sandrew}
3446281494Sandrew
3447281494Sandrew/*
3448305531Sandrew *	pmap_zero_page_area zeros the specified hardware page by mapping
3449281494Sandrew *	the page into KVM and using bzero to clear its contents.
3450281494Sandrew *
3451281494Sandrew *	off and size may not cover an area beyond a single hardware page.
3452281494Sandrew */
3453281494Sandrewvoid
3454281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size)
3455281494Sandrew{
3456281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
3457281494Sandrew
3458281494Sandrew	if (off == 0 && size == PAGE_SIZE)
3459281494Sandrew		pagezero((void *)va);
3460281494Sandrew	else
3461281494Sandrew		bzero((char *)va + off, size);
3462281494Sandrew}
3463281494Sandrew
3464281494Sandrew/*
3465305531Sandrew *	pmap_zero_page_idle zeros the specified hardware page by mapping
3466281494Sandrew *	the page into KVM and using bzero to clear its contents.  This
3467281494Sandrew *	is intended to be called from the vm_pagezero process only and
3468281494Sandrew *	outside of Giant.
3469281494Sandrew */
3470281494Sandrewvoid
3471281494Sandrewpmap_zero_page_idle(vm_page_t m)
3472281494Sandrew{
3473281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
3474281494Sandrew
3475281494Sandrew	pagezero((void *)va);
3476281494Sandrew}
3477281494Sandrew
3478281494Sandrew/*
3479281494Sandrew *	pmap_copy_page copies the specified (machine independent)
3480281494Sandrew *	page by mapping the page into virtual memory and using
3481281494Sandrew *	bcopy to copy the page, one machine dependent page at a
3482281494Sandrew *	time.
3483281494Sandrew */
3484281494Sandrewvoid
3485281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst)
3486281494Sandrew{
3487281494Sandrew	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
3488281494Sandrew	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
3489281494Sandrew
3490281494Sandrew	pagecopy((void *)src, (void *)dst);
3491281494Sandrew}
3492281494Sandrew
3493281494Sandrewint unmapped_buf_allowed = 1;
3494281494Sandrew
3495281494Sandrewvoid
3496281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
3497281494Sandrew    vm_offset_t b_offset, int xfersize)
3498281494Sandrew{
3499281494Sandrew	void *a_cp, *b_cp;
3500281494Sandrew	vm_page_t m_a, m_b;
3501281494Sandrew	vm_paddr_t p_a, p_b;
3502281494Sandrew	vm_offset_t a_pg_offset, b_pg_offset;
3503281494Sandrew	int cnt;
3504281494Sandrew
3505281494Sandrew	while (xfersize > 0) {
3506281494Sandrew		a_pg_offset = a_offset & PAGE_MASK;
3507281494Sandrew		m_a = ma[a_offset >> PAGE_SHIFT];
3508281494Sandrew		p_a = m_a->phys_addr;
3509281494Sandrew		b_pg_offset = b_offset & PAGE_MASK;
3510281494Sandrew		m_b = mb[b_offset >> PAGE_SHIFT];
3511281494Sandrew		p_b = m_b->phys_addr;
3512281494Sandrew		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
3513281494Sandrew		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
3514281494Sandrew		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
3515281494Sandrew			panic("!DMAP a %lx", p_a);
3516281494Sandrew		} else {
3517281494Sandrew			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
3518281494Sandrew		}
3519281494Sandrew		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
3520281494Sandrew			panic("!DMAP b %lx", p_b);
3521281494Sandrew		} else {
3522281494Sandrew			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
3523281494Sandrew		}
3524281494Sandrew		bcopy(a_cp, b_cp, cnt);
3525281494Sandrew		a_offset += cnt;
3526281494Sandrew		b_offset += cnt;
3527281494Sandrew		xfersize -= cnt;
3528281494Sandrew	}
3529281494Sandrew}
3530281494Sandrew
3531286296Sjahvm_offset_t
3532286296Sjahpmap_quick_enter_page(vm_page_t m)
3533286296Sjah{
3534286296Sjah
3535286296Sjah	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
3536286296Sjah}
3537286296Sjah
3538286296Sjahvoid
3539286296Sjahpmap_quick_remove_page(vm_offset_t addr)
3540286296Sjah{
3541286296Sjah}
3542286296Sjah
3543281494Sandrew/*
3544281494Sandrew * Returns true if the pmap's pv is one of the first
3545281494Sandrew * 16 pvs linked to from this page.  This count may
3546281494Sandrew * be changed upwards or downwards in the future; it
3547281494Sandrew * is only necessary that true be returned for a small
3548281494Sandrew * subset of pmaps for proper page aging.
3549281494Sandrew */
3550281494Sandrewboolean_t
3551281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m)
3552281494Sandrew{
3553305882Sandrew	struct md_page *pvh;
3554281494Sandrew	struct rwlock *lock;
3555281494Sandrew	pv_entry_t pv;
3556281494Sandrew	int loops = 0;
3557281494Sandrew	boolean_t rv;
3558281494Sandrew
3559281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3560281494Sandrew	    ("pmap_page_exists_quick: page %p is not managed", m));
3561281494Sandrew	rv = FALSE;
3562281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3563281494Sandrew	rw_rlock(lock);
3564281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3565281494Sandrew		if (PV_PMAP(pv) == pmap) {
3566281494Sandrew			rv = TRUE;
3567281494Sandrew			break;
3568281494Sandrew		}
3569281494Sandrew		loops++;
3570281494Sandrew		if (loops >= 16)
3571281494Sandrew			break;
3572281494Sandrew	}
3573305882Sandrew	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
3574305882Sandrew		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
3575305882Sandrew		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
3576305882Sandrew			if (PV_PMAP(pv) == pmap) {
3577305882Sandrew				rv = TRUE;
3578305882Sandrew				break;
3579305882Sandrew			}
3580305882Sandrew			loops++;
3581305882Sandrew			if (loops >= 16)
3582305882Sandrew				break;
3583305882Sandrew		}
3584305882Sandrew	}
3585281494Sandrew	rw_runlock(lock);
3586281494Sandrew	return (rv);
3587281494Sandrew}
3588281494Sandrew
3589281494Sandrew/*
3590281494Sandrew *	pmap_page_wired_mappings:
3591281494Sandrew *
3592281494Sandrew *	Return the number of managed mappings to the given physical page
3593281494Sandrew *	that are wired.
3594281494Sandrew */
3595281494Sandrewint
3596281494Sandrewpmap_page_wired_mappings(vm_page_t m)
3597281494Sandrew{
3598281494Sandrew	struct rwlock *lock;
3599305882Sandrew	struct md_page *pvh;
3600281494Sandrew	pmap_t pmap;
3601297446Sandrew	pt_entry_t *pte;
3602281494Sandrew	pv_entry_t pv;
3603305882Sandrew	int count, lvl, md_gen, pvh_gen;
3604281494Sandrew
3605281494Sandrew	if ((m->oflags & VPO_UNMANAGED) != 0)
3606281494Sandrew		return (0);
3607281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3608281494Sandrew	rw_rlock(lock);
3609281494Sandrewrestart:
3610281494Sandrew	count = 0;
3611281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3612281494Sandrew		pmap = PV_PMAP(pv);
3613281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
3614281494Sandrew			md_gen = m->md.pv_gen;
3615281494Sandrew			rw_runlock(lock);
3616281494Sandrew			PMAP_LOCK(pmap);
3617281494Sandrew			rw_rlock(lock);
3618281494Sandrew			if (md_gen != m->md.pv_gen) {
3619281494Sandrew				PMAP_UNLOCK(pmap);
3620281494Sandrew				goto restart;
3621281494Sandrew			}
3622281494Sandrew		}
3623297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
3624297446Sandrew		if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
3625281494Sandrew			count++;
3626281494Sandrew		PMAP_UNLOCK(pmap);
3627281494Sandrew	}
3628305882Sandrew	if ((m->flags & PG_FICTITIOUS) == 0) {
3629305882Sandrew		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
3630305882Sandrew		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
3631305882Sandrew			pmap = PV_PMAP(pv);
3632305882Sandrew			if (!PMAP_TRYLOCK(pmap)) {
3633305882Sandrew				md_gen = m->md.pv_gen;
3634305882Sandrew				pvh_gen = pvh->pv_gen;
3635305882Sandrew				rw_runlock(lock);
3636305882Sandrew				PMAP_LOCK(pmap);
3637305882Sandrew				rw_rlock(lock);
3638305882Sandrew				if (md_gen != m->md.pv_gen ||
3639305882Sandrew				    pvh_gen != pvh->pv_gen) {
3640305882Sandrew					PMAP_UNLOCK(pmap);
3641305882Sandrew					goto restart;
3642305882Sandrew				}
3643305882Sandrew			}
3644305882Sandrew			pte = pmap_pte(pmap, pv->pv_va, &lvl);
3645305882Sandrew			if (pte != NULL &&
3646305882Sandrew			    (pmap_load(pte) & ATTR_SW_WIRED) != 0)
3647305882Sandrew				count++;
3648305882Sandrew			PMAP_UNLOCK(pmap);
3649305882Sandrew		}
3650305882Sandrew	}
3651281494Sandrew	rw_runlock(lock);
3652281494Sandrew	return (count);
3653281494Sandrew}
3654281494Sandrew
3655281494Sandrew/*
3656281494Sandrew * Destroy all managed, non-wired mappings in the given user-space
3657281494Sandrew * pmap.  This pmap cannot be active on any processor besides the
3658281494Sandrew * caller.
3659305531Sandrew *
3660281494Sandrew * This function cannot be applied to the kernel pmap.  Moreover, it
3661281494Sandrew * is not intended for general use.  It is only to be used during
3662281494Sandrew * process termination.  Consequently, it can be implemented in ways
3663281494Sandrew * that make it faster than pmap_remove().  First, it can more quickly
3664281494Sandrew * destroy mappings by iterating over the pmap's collection of PV
3665281494Sandrew * entries, rather than searching the page table.  Second, it doesn't
3666281494Sandrew * have to test and clear the page table entries atomically, because
3667281494Sandrew * no processor is currently accessing the user address space.  In
3668281494Sandrew * particular, a page table entry's dirty bit won't change state once
3669281494Sandrew * this function starts.
3670281494Sandrew */
3671281494Sandrewvoid
3672281494Sandrewpmap_remove_pages(pmap_t pmap)
3673281494Sandrew{
3674297446Sandrew	pd_entry_t *pde;
3675297446Sandrew	pt_entry_t *pte, tpte;
3676281494Sandrew	struct spglist free;
3677305882Sandrew	vm_page_t m, ml3, mt;
3678281494Sandrew	pv_entry_t pv;
3679305882Sandrew	struct md_page *pvh;
3680281494Sandrew	struct pv_chunk *pc, *npc;
3681281494Sandrew	struct rwlock *lock;
3682281494Sandrew	int64_t bit;
3683281494Sandrew	uint64_t inuse, bitmask;
3684297446Sandrew	int allfree, field, freed, idx, lvl;
3685281494Sandrew	vm_paddr_t pa;
3686281494Sandrew
3687281494Sandrew	lock = NULL;
3688281494Sandrew
3689281494Sandrew	SLIST_INIT(&free);
3690281494Sandrew	PMAP_LOCK(pmap);
3691281494Sandrew	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
3692281494Sandrew		allfree = 1;
3693281494Sandrew		freed = 0;
3694281494Sandrew		for (field = 0; field < _NPCM; field++) {
3695281494Sandrew			inuse = ~pc->pc_map[field] & pc_freemask[field];
3696281494Sandrew			while (inuse != 0) {
3697281494Sandrew				bit = ffsl(inuse) - 1;
3698281494Sandrew				bitmask = 1UL << bit;
3699281494Sandrew				idx = field * 64 + bit;
3700281494Sandrew				pv = &pc->pc_pventry[idx];
3701281494Sandrew				inuse &= ~bitmask;
3702281494Sandrew
3703297446Sandrew				pde = pmap_pde(pmap, pv->pv_va, &lvl);
3704297446Sandrew				KASSERT(pde != NULL,
3705297446Sandrew				    ("Attempting to remove an unmapped page"));
3706281494Sandrew
3707305882Sandrew				switch(lvl) {
3708305882Sandrew				case 1:
3709305882Sandrew					pte = pmap_l1_to_l2(pde, pv->pv_va);
3710305882Sandrew					tpte = pmap_load(pte);
3711305882Sandrew					KASSERT((tpte & ATTR_DESCR_MASK) ==
3712305882Sandrew					    L2_BLOCK,
3713305882Sandrew					    ("Attempting to remove an invalid "
3714305882Sandrew					    "block: %lx", tpte));
3715305882Sandrew					tpte = pmap_load(pte);
3716305882Sandrew					break;
3717305882Sandrew				case 2:
3718305882Sandrew					pte = pmap_l2_to_l3(pde, pv->pv_va);
3719305882Sandrew					tpte = pmap_load(pte);
3720305882Sandrew					KASSERT((tpte & ATTR_DESCR_MASK) ==
3721305882Sandrew					    L3_PAGE,
3722305882Sandrew					    ("Attempting to remove an invalid "
3723305882Sandrew					     "page: %lx", tpte));
3724305882Sandrew					break;
3725305882Sandrew				default:
3726305882Sandrew					panic(
3727305882Sandrew					    "Invalid page directory level: %d",
3728305882Sandrew					    lvl);
3729305882Sandrew				}
3730297446Sandrew
3731281494Sandrew/*
3732281494Sandrew * We cannot remove wired pages from a process' mapping at this time
3733281494Sandrew */
3734297446Sandrew				if (tpte & ATTR_SW_WIRED) {
3735281494Sandrew					allfree = 0;
3736281494Sandrew					continue;
3737281494Sandrew				}
3738281494Sandrew
3739297446Sandrew				pa = tpte & ~ATTR_MASK;
3740281494Sandrew
3741281494Sandrew				m = PHYS_TO_VM_PAGE(pa);
3742281494Sandrew				KASSERT(m->phys_addr == pa,
3743281494Sandrew				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
3744281494Sandrew				    m, (uintmax_t)m->phys_addr,
3745297446Sandrew				    (uintmax_t)tpte));
3746281494Sandrew
3747281494Sandrew				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
3748281494Sandrew				    m < &vm_page_array[vm_page_array_size],
3749297446Sandrew				    ("pmap_remove_pages: bad pte %#jx",
3750297446Sandrew				    (uintmax_t)tpte));
3751281494Sandrew
3752305882Sandrew				if (pmap_is_current(pmap)) {
3753305882Sandrew					if (lvl == 2 &&
3754305882Sandrew					    pmap_l3_valid_cacheable(tpte)) {
3755305882Sandrew						cpu_dcache_wb_range(pv->pv_va,
3756305882Sandrew						    L3_SIZE);
3757305882Sandrew					} else if (lvl == 1 &&
3758305882Sandrew					    pmap_pte_valid_cacheable(tpte)) {
3759305882Sandrew						cpu_dcache_wb_range(pv->pv_va,
3760305882Sandrew						    L2_SIZE);
3761305882Sandrew					}
3762305882Sandrew				}
3763297446Sandrew				pmap_load_clear(pte);
3764297446Sandrew				PTE_SYNC(pte);
3765285212Sandrew				pmap_invalidate_page(pmap, pv->pv_va);
3766281494Sandrew
3767281494Sandrew				/*
3768281494Sandrew				 * Update the vm_page_t clean/reference bits.
3769281494Sandrew				 */
3770305882Sandrew				if ((tpte & ATTR_AP_RW_BIT) ==
3771305882Sandrew				    ATTR_AP(ATTR_AP_RW)) {
3772305882Sandrew					switch (lvl) {
3773305882Sandrew					case 1:
3774305882Sandrew						for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
3775305882Sandrew							vm_page_dirty(m);
3776305882Sandrew						break;
3777305882Sandrew					case 2:
3778305882Sandrew						vm_page_dirty(m);
3779305882Sandrew						break;
3780305882Sandrew					}
3781305882Sandrew				}
3782281494Sandrew
3783281494Sandrew				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
3784281494Sandrew
3785281494Sandrew				/* Mark free */
3786281494Sandrew				pc->pc_map[field] |= bitmask;
3787305882Sandrew				switch (lvl) {
3788305882Sandrew				case 1:
3789305882Sandrew					pmap_resident_count_dec(pmap,
3790305882Sandrew					    L2_SIZE / PAGE_SIZE);
3791305882Sandrew					pvh = pa_to_pvh(tpte & ~ATTR_MASK);
3792305882Sandrew					TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
3793305882Sandrew					pvh->pv_gen++;
3794305882Sandrew					if (TAILQ_EMPTY(&pvh->pv_list)) {
3795305882Sandrew						for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
3796305882Sandrew							if ((mt->aflags & PGA_WRITEABLE) != 0 &&
3797305882Sandrew							    TAILQ_EMPTY(&mt->md.pv_list))
3798305882Sandrew								vm_page_aflag_clear(mt, PGA_WRITEABLE);
3799305882Sandrew					}
3800318716Smarkj					ml3 = pmap_remove_pt_page(pmap,
3801305882Sandrew					    pv->pv_va);
3802305882Sandrew					if (ml3 != NULL) {
3803305882Sandrew						pmap_resident_count_dec(pmap,1);
3804305882Sandrew						KASSERT(ml3->wire_count == NL3PG,
3805305882Sandrew						    ("pmap_remove_pages: l3 page wire count error"));
3806305882Sandrew						ml3->wire_count = 0;
3807305882Sandrew						pmap_add_delayed_free_list(ml3,
3808305882Sandrew						    &free, FALSE);
3809305882Sandrew						atomic_subtract_int(
3810305882Sandrew						    &vm_cnt.v_wire_count, 1);
3811305882Sandrew					}
3812305882Sandrew					break;
3813305882Sandrew				case 2:
3814305882Sandrew					pmap_resident_count_dec(pmap, 1);
3815305882Sandrew					TAILQ_REMOVE(&m->md.pv_list, pv,
3816305882Sandrew					    pv_next);
3817305882Sandrew					m->md.pv_gen++;
3818305882Sandrew					if ((m->aflags & PGA_WRITEABLE) != 0 &&
3819305882Sandrew					    TAILQ_EMPTY(&m->md.pv_list) &&
3820305882Sandrew					    (m->flags & PG_FICTITIOUS) == 0) {
3821305882Sandrew						pvh = pa_to_pvh(
3822305882Sandrew						    VM_PAGE_TO_PHYS(m));
3823305882Sandrew						if (TAILQ_EMPTY(&pvh->pv_list))
3824305882Sandrew							vm_page_aflag_clear(m,
3825305882Sandrew							    PGA_WRITEABLE);
3826305882Sandrew					}
3827305882Sandrew					break;
3828305882Sandrew				}
3829297446Sandrew				pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
3830297446Sandrew				    &free);
3831281494Sandrew				freed++;
3832281494Sandrew			}
3833281494Sandrew		}
3834281494Sandrew		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
3835281494Sandrew		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
3836281494Sandrew		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
3837281494Sandrew		if (allfree) {
3838281494Sandrew			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
3839281494Sandrew			free_pv_chunk(pc);
3840281494Sandrew		}
3841281494Sandrew	}
3842281494Sandrew	pmap_invalidate_all(pmap);
3843281494Sandrew	if (lock != NULL)
3844281494Sandrew		rw_wunlock(lock);
3845281494Sandrew	PMAP_UNLOCK(pmap);
3846281494Sandrew	pmap_free_zero_pages(&free);
3847281494Sandrew}
3848281494Sandrew
3849281494Sandrew/*
3850281494Sandrew * This is used to check if a page has been accessed or modified. As we
3851281494Sandrew * don't have a bit to see if it has been modified we have to assume it
3852281494Sandrew * has been if the page is read/write.
3853281494Sandrew */
3854281494Sandrewstatic boolean_t
3855281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
3856281494Sandrew{
3857281494Sandrew	struct rwlock *lock;
3858281494Sandrew	pv_entry_t pv;
3859305882Sandrew	struct md_page *pvh;
3860297446Sandrew	pt_entry_t *pte, mask, value;
3861281494Sandrew	pmap_t pmap;
3862305882Sandrew	int lvl, md_gen, pvh_gen;
3863281494Sandrew	boolean_t rv;
3864281494Sandrew
3865281494Sandrew	rv = FALSE;
3866281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3867281494Sandrew	rw_rlock(lock);
3868281494Sandrewrestart:
3869281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3870281494Sandrew		pmap = PV_PMAP(pv);
3871281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
3872281494Sandrew			md_gen = m->md.pv_gen;
3873281494Sandrew			rw_runlock(lock);
3874281494Sandrew			PMAP_LOCK(pmap);
3875281494Sandrew			rw_rlock(lock);
3876281494Sandrew			if (md_gen != m->md.pv_gen) {
3877281494Sandrew				PMAP_UNLOCK(pmap);
3878281494Sandrew				goto restart;
3879281494Sandrew			}
3880281494Sandrew		}
3881297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
3882297446Sandrew		KASSERT(lvl == 3,
3883297446Sandrew		    ("pmap_page_test_mappings: Invalid level %d", lvl));
3884281494Sandrew		mask = 0;
3885281494Sandrew		value = 0;
3886281494Sandrew		if (modified) {
3887281494Sandrew			mask |= ATTR_AP_RW_BIT;
3888281494Sandrew			value |= ATTR_AP(ATTR_AP_RW);
3889281494Sandrew		}
3890281494Sandrew		if (accessed) {
3891281494Sandrew			mask |= ATTR_AF | ATTR_DESCR_MASK;
3892281494Sandrew			value |= ATTR_AF | L3_PAGE;
3893281494Sandrew		}
3894297446Sandrew		rv = (pmap_load(pte) & mask) == value;
3895281494Sandrew		PMAP_UNLOCK(pmap);
3896281494Sandrew		if (rv)
3897281494Sandrew			goto out;
3898281494Sandrew	}
3899305882Sandrew	if ((m->flags & PG_FICTITIOUS) == 0) {
3900305882Sandrew		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
3901305882Sandrew		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
3902305882Sandrew			pmap = PV_PMAP(pv);
3903305882Sandrew			if (!PMAP_TRYLOCK(pmap)) {
3904305882Sandrew				md_gen = m->md.pv_gen;
3905305882Sandrew				pvh_gen = pvh->pv_gen;
3906305882Sandrew				rw_runlock(lock);
3907305882Sandrew				PMAP_LOCK(pmap);
3908305882Sandrew				rw_rlock(lock);
3909305882Sandrew				if (md_gen != m->md.pv_gen ||
3910305882Sandrew				    pvh_gen != pvh->pv_gen) {
3911305882Sandrew					PMAP_UNLOCK(pmap);
3912305882Sandrew					goto restart;
3913305882Sandrew				}
3914305882Sandrew			}
3915305882Sandrew			pte = pmap_pte(pmap, pv->pv_va, &lvl);
3916305882Sandrew			KASSERT(lvl == 2,
3917305882Sandrew			    ("pmap_page_test_mappings: Invalid level %d", lvl));
3918305882Sandrew			mask = 0;
3919305882Sandrew			value = 0;
3920305882Sandrew			if (modified) {
3921305882Sandrew				mask |= ATTR_AP_RW_BIT;
3922305882Sandrew				value |= ATTR_AP(ATTR_AP_RW);
3923305882Sandrew			}
3924305882Sandrew			if (accessed) {
3925305882Sandrew				mask |= ATTR_AF | ATTR_DESCR_MASK;
3926305882Sandrew				value |= ATTR_AF | L2_BLOCK;
3927305882Sandrew			}
3928305882Sandrew			rv = (pmap_load(pte) & mask) == value;
3929305882Sandrew			PMAP_UNLOCK(pmap);
3930305882Sandrew			if (rv)
3931305882Sandrew				goto out;
3932305882Sandrew		}
3933305882Sandrew	}
3934281494Sandrewout:
3935281494Sandrew	rw_runlock(lock);
3936281494Sandrew	return (rv);
3937281494Sandrew}
3938281494Sandrew
3939281494Sandrew/*
3940281494Sandrew *	pmap_is_modified:
3941281494Sandrew *
3942281494Sandrew *	Return whether or not the specified physical page was modified
3943281494Sandrew *	in any physical maps.
3944281494Sandrew */
3945281494Sandrewboolean_t
3946281494Sandrewpmap_is_modified(vm_page_t m)
3947281494Sandrew{
3948281494Sandrew
3949281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3950281494Sandrew	    ("pmap_is_modified: page %p is not managed", m));
3951281494Sandrew
3952281494Sandrew	/*
3953281494Sandrew	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
3954281494Sandrew	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
3955281494Sandrew	 * is clear, no PTEs can have PG_M set.
3956281494Sandrew	 */
3957281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
3958281494Sandrew	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
3959281494Sandrew		return (FALSE);
3960281494Sandrew	return (pmap_page_test_mappings(m, FALSE, TRUE));
3961281494Sandrew}
3962281494Sandrew
3963281494Sandrew/*
3964281494Sandrew *	pmap_is_prefaultable:
3965281494Sandrew *
3966281494Sandrew *	Return whether or not the specified virtual address is eligible
3967281494Sandrew *	for prefault.
3968281494Sandrew */
3969281494Sandrewboolean_t
3970281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
3971281494Sandrew{
3972297446Sandrew	pt_entry_t *pte;
3973281494Sandrew	boolean_t rv;
3974297446Sandrew	int lvl;
3975281494Sandrew
3976281494Sandrew	rv = FALSE;
3977281494Sandrew	PMAP_LOCK(pmap);
3978297446Sandrew	pte = pmap_pte(pmap, addr, &lvl);
3979297446Sandrew	if (pte != NULL && pmap_load(pte) != 0) {
3980281494Sandrew		rv = TRUE;
3981281494Sandrew	}
3982281494Sandrew	PMAP_UNLOCK(pmap);
3983281494Sandrew	return (rv);
3984281494Sandrew}
3985281494Sandrew
3986281494Sandrew/*
3987281494Sandrew *	pmap_is_referenced:
3988281494Sandrew *
3989281494Sandrew *	Return whether or not the specified physical page was referenced
3990281494Sandrew *	in any physical maps.
3991281494Sandrew */
3992281494Sandrewboolean_t
3993281494Sandrewpmap_is_referenced(vm_page_t m)
3994281494Sandrew{
3995281494Sandrew
3996281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3997281494Sandrew	    ("pmap_is_referenced: page %p is not managed", m));
3998281494Sandrew	return (pmap_page_test_mappings(m, TRUE, FALSE));
3999281494Sandrew}
4000281494Sandrew
4001281494Sandrew/*
4002281494Sandrew * Clear the write and modified bits in each of the given page's mappings.
4003281494Sandrew */
4004281494Sandrewvoid
4005281494Sandrewpmap_remove_write(vm_page_t m)
4006281494Sandrew{
4007305882Sandrew	struct md_page *pvh;
4008281494Sandrew	pmap_t pmap;
4009281494Sandrew	struct rwlock *lock;
4010305882Sandrew	pv_entry_t next_pv, pv;
4011297446Sandrew	pt_entry_t oldpte, *pte;
4012305882Sandrew	vm_offset_t va;
4013305882Sandrew	int lvl, md_gen, pvh_gen;
4014281494Sandrew
4015281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
4016281494Sandrew	    ("pmap_remove_write: page %p is not managed", m));
4017281494Sandrew
4018281494Sandrew	/*
4019281494Sandrew	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
4020281494Sandrew	 * set by another thread while the object is locked.  Thus,
4021281494Sandrew	 * if PGA_WRITEABLE is clear, no page table entries need updating.
4022281494Sandrew	 */
4023281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
4024281494Sandrew	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
4025281494Sandrew		return;
4026281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
4027305882Sandrew	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
4028305882Sandrew	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
4029281494Sandrewretry_pv_loop:
4030281494Sandrew	rw_wlock(lock);
4031305882Sandrew	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
4032305882Sandrew		pmap = PV_PMAP(pv);
4033305882Sandrew		if (!PMAP_TRYLOCK(pmap)) {
4034305882Sandrew			pvh_gen = pvh->pv_gen;
4035305882Sandrew			rw_wunlock(lock);
4036305882Sandrew			PMAP_LOCK(pmap);
4037305882Sandrew			rw_wlock(lock);
4038305882Sandrew			if (pvh_gen != pvh->pv_gen) {
4039305882Sandrew				PMAP_UNLOCK(pmap);
4040305882Sandrew				rw_wunlock(lock);
4041305882Sandrew				goto retry_pv_loop;
4042305882Sandrew			}
4043305882Sandrew		}
4044305882Sandrew		va = pv->pv_va;
4045305882Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
4046305882Sandrew		if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
4047305882Sandrew			pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET,
4048305882Sandrew			    &lock);
4049305882Sandrew		KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
4050305882Sandrew		    ("inconsistent pv lock %p %p for page %p",
4051305882Sandrew		    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
4052305882Sandrew		PMAP_UNLOCK(pmap);
4053305882Sandrew	}
4054281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
4055281494Sandrew		pmap = PV_PMAP(pv);
4056281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
4057305882Sandrew			pvh_gen = pvh->pv_gen;
4058281494Sandrew			md_gen = m->md.pv_gen;
4059281494Sandrew			rw_wunlock(lock);
4060281494Sandrew			PMAP_LOCK(pmap);
4061281494Sandrew			rw_wlock(lock);
4062305882Sandrew			if (pvh_gen != pvh->pv_gen ||
4063305882Sandrew			    md_gen != m->md.pv_gen) {
4064281494Sandrew				PMAP_UNLOCK(pmap);
4065281494Sandrew				rw_wunlock(lock);
4066281494Sandrew				goto retry_pv_loop;
4067281494Sandrew			}
4068281494Sandrew		}
4069297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
4070281494Sandrewretry:
4071297446Sandrew		oldpte = pmap_load(pte);
4072297446Sandrew		if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
4073297446Sandrew			if (!atomic_cmpset_long(pte, oldpte,
4074297446Sandrew			    oldpte | ATTR_AP(ATTR_AP_RO)))
4075281494Sandrew				goto retry;
4076297446Sandrew			if ((oldpte & ATTR_AF) != 0)
4077281494Sandrew				vm_page_dirty(m);
4078281494Sandrew			pmap_invalidate_page(pmap, pv->pv_va);
4079281494Sandrew		}
4080281494Sandrew		PMAP_UNLOCK(pmap);
4081281494Sandrew	}
4082281494Sandrew	rw_wunlock(lock);
4083281494Sandrew	vm_page_aflag_clear(m, PGA_WRITEABLE);
4084281494Sandrew}
4085281494Sandrew
4086281494Sandrewstatic __inline boolean_t
4087281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
4088281494Sandrew{
4089281494Sandrew
4090281494Sandrew	return (FALSE);
4091281494Sandrew}
4092281494Sandrew
4093281494Sandrew/*
4094281494Sandrew *	pmap_ts_referenced:
4095281494Sandrew *
4096281494Sandrew *	Return a count of reference bits for a page, clearing those bits.
4097281494Sandrew *	It is not necessary for every reference bit to be cleared, but it
4098281494Sandrew *	is necessary that 0 only be returned when there are truly no
4099281494Sandrew *	reference bits set.
4100281494Sandrew *
4101324400Salc *	As an optimization, update the page's dirty field if a modified bit is
4102324400Salc *	found while counting reference bits.  This opportunistic update can be
4103324400Salc *	performed at low cost and can eliminate the need for some future calls
4104324400Salc *	to pmap_is_modified().  However, since this function stops after
4105324400Salc *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
4106324400Salc *	dirty pages.  Those dirty pages will only be detected by a future call
4107324400Salc *	to pmap_is_modified().
4108281494Sandrew */
4109281494Sandrewint
4110281494Sandrewpmap_ts_referenced(vm_page_t m)
4111281494Sandrew{
4112305882Sandrew	struct md_page *pvh;
4113281494Sandrew	pv_entry_t pv, pvf;
4114281494Sandrew	pmap_t pmap;
4115281494Sandrew	struct rwlock *lock;
4116297446Sandrew	pd_entry_t *pde, tpde;
4117297446Sandrew	pt_entry_t *pte, tpte;
4118305882Sandrew	pt_entry_t *l3;
4119305882Sandrew	vm_offset_t va;
4120281494Sandrew	vm_paddr_t pa;
4121305882Sandrew	int cleared, md_gen, not_cleared, lvl, pvh_gen;
4122281494Sandrew	struct spglist free;
4123305882Sandrew	bool demoted;
4124281494Sandrew
4125281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
4126281494Sandrew	    ("pmap_ts_referenced: page %p is not managed", m));
4127281494Sandrew	SLIST_INIT(&free);
4128281494Sandrew	cleared = 0;
4129281494Sandrew	pa = VM_PAGE_TO_PHYS(m);
4130281494Sandrew	lock = PHYS_TO_PV_LIST_LOCK(pa);
4131305882Sandrew	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
4132281494Sandrew	rw_wlock(lock);
4133281494Sandrewretry:
4134281494Sandrew	not_cleared = 0;
4135305882Sandrew	if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
4136305882Sandrew		goto small_mappings;
4137305882Sandrew	pv = pvf;
4138305882Sandrew	do {
4139305882Sandrew		if (pvf == NULL)
4140305882Sandrew			pvf = pv;
4141305882Sandrew		pmap = PV_PMAP(pv);
4142305882Sandrew		if (!PMAP_TRYLOCK(pmap)) {
4143305882Sandrew			pvh_gen = pvh->pv_gen;
4144305882Sandrew			rw_wunlock(lock);
4145305882Sandrew			PMAP_LOCK(pmap);
4146305882Sandrew			rw_wlock(lock);
4147305882Sandrew			if (pvh_gen != pvh->pv_gen) {
4148305882Sandrew				PMAP_UNLOCK(pmap);
4149305882Sandrew				goto retry;
4150305882Sandrew			}
4151305882Sandrew		}
4152305882Sandrew		va = pv->pv_va;
4153305882Sandrew		pde = pmap_pde(pmap, pv->pv_va, &lvl);
4154305882Sandrew		KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found"));
4155305882Sandrew		KASSERT(lvl == 1,
4156305882Sandrew		    ("pmap_ts_referenced: invalid pde level %d", lvl));
4157305882Sandrew		tpde = pmap_load(pde);
4158305882Sandrew		KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE,
4159305882Sandrew		    ("pmap_ts_referenced: found an invalid l1 table"));
4160305882Sandrew		pte = pmap_l1_to_l2(pde, pv->pv_va);
4161305882Sandrew		tpte = pmap_load(pte);
4162324400Salc		if (pmap_page_dirty(tpte)) {
4163324400Salc			/*
4164324400Salc			 * Although "tpte" is mapping a 2MB page, because
4165324400Salc			 * this function is called at a 4KB page granularity,
4166324400Salc			 * we only update the 4KB page under test.
4167324400Salc			 */
4168324400Salc			vm_page_dirty(m);
4169324400Salc		}
4170305882Sandrew		if ((tpte & ATTR_AF) != 0) {
4171305882Sandrew			/*
4172305882Sandrew			 * Since this reference bit is shared by 512 4KB
4173305882Sandrew			 * pages, it should not be cleared every time it is
4174305882Sandrew			 * tested.  Apply a simple "hash" function on the
4175305882Sandrew			 * physical page number, the virtual superpage number,
4176305882Sandrew			 * and the pmap address to select one 4KB page out of
4177305882Sandrew			 * the 512 on which testing the reference bit will
4178305882Sandrew			 * result in clearing that reference bit.  This
4179305882Sandrew			 * function is designed to avoid the selection of the
4180305882Sandrew			 * same 4KB page for every 2MB page mapping.
4181305882Sandrew			 *
4182305882Sandrew			 * On demotion, a mapping that hasn't been referenced
4183305882Sandrew			 * is simply destroyed.  To avoid the possibility of a
4184305882Sandrew			 * subsequent page fault on a demoted wired mapping,
4185305882Sandrew			 * always leave its reference bit set.  Moreover,
4186305882Sandrew			 * since the superpage is wired, the current state of
4187305882Sandrew			 * its reference bit won't affect page replacement.
4188305882Sandrew			 */
4189305882Sandrew			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
4190305882Sandrew			    (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
4191305882Sandrew			    (tpte & ATTR_SW_WIRED) == 0) {
4192305882Sandrew				if (safe_to_clear_referenced(pmap, tpte)) {
4193305882Sandrew					/*
4194305882Sandrew					 * TODO: We don't handle the access
4195305882Sandrew					 * flag at all. We need to be able
4196305882Sandrew					 * to set it in  the exception handler.
4197305882Sandrew					 */
4198305882Sandrew					panic("ARM64TODO: "
4199305882Sandrew					    "safe_to_clear_referenced\n");
4200305882Sandrew				} else if (pmap_demote_l2_locked(pmap, pte,
4201305882Sandrew				    pv->pv_va, &lock) != NULL) {
4202305882Sandrew					demoted = true;
4203305882Sandrew					va += VM_PAGE_TO_PHYS(m) -
4204305882Sandrew					    (tpte & ~ATTR_MASK);
4205305882Sandrew					l3 = pmap_l2_to_l3(pte, va);
4206305882Sandrew					pmap_remove_l3(pmap, l3, va,
4207305882Sandrew					    pmap_load(pte), NULL, &lock);
4208305882Sandrew				} else
4209305882Sandrew					demoted = true;
4210305882Sandrew
4211305882Sandrew				if (demoted) {
4212305882Sandrew					/*
4213305882Sandrew					 * The superpage mapping was removed
4214305882Sandrew					 * entirely and therefore 'pv' is no
4215305882Sandrew					 * longer valid.
4216305882Sandrew					 */
4217305882Sandrew					if (pvf == pv)
4218305882Sandrew						pvf = NULL;
4219305882Sandrew					pv = NULL;
4220305882Sandrew				}
4221305882Sandrew				cleared++;
4222305882Sandrew				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
4223305882Sandrew				    ("inconsistent pv lock %p %p for page %p",
4224305882Sandrew				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
4225305882Sandrew			} else
4226305882Sandrew				not_cleared++;
4227305882Sandrew		}
4228305882Sandrew		PMAP_UNLOCK(pmap);
4229305882Sandrew		/* Rotate the PV list if it has more than one entry. */
4230305882Sandrew		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
4231305882Sandrew			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
4232305882Sandrew			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
4233305882Sandrew			pvh->pv_gen++;
4234305882Sandrew		}
4235305882Sandrew		if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
4236305882Sandrew			goto out;
4237305882Sandrew	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
4238305882Sandrewsmall_mappings:
4239281494Sandrew	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
4240281494Sandrew		goto out;
4241281494Sandrew	pv = pvf;
4242281494Sandrew	do {
4243281494Sandrew		if (pvf == NULL)
4244281494Sandrew			pvf = pv;
4245281494Sandrew		pmap = PV_PMAP(pv);
4246281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
4247305882Sandrew			pvh_gen = pvh->pv_gen;
4248281494Sandrew			md_gen = m->md.pv_gen;
4249281494Sandrew			rw_wunlock(lock);
4250281494Sandrew			PMAP_LOCK(pmap);
4251281494Sandrew			rw_wlock(lock);
4252305882Sandrew			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
4253281494Sandrew				PMAP_UNLOCK(pmap);
4254281494Sandrew				goto retry;
4255281494Sandrew			}
4256281494Sandrew		}
4257297446Sandrew		pde = pmap_pde(pmap, pv->pv_va, &lvl);
4258297446Sandrew		KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
4259297446Sandrew		KASSERT(lvl == 2,
4260297446Sandrew		    ("pmap_ts_referenced: invalid pde level %d", lvl));
4261297446Sandrew		tpde = pmap_load(pde);
4262297446Sandrew		KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
4263281494Sandrew		    ("pmap_ts_referenced: found an invalid l2 table"));
4264297446Sandrew		pte = pmap_l2_to_l3(pde, pv->pv_va);
4265297446Sandrew		tpte = pmap_load(pte);
4266324400Salc		if (pmap_page_dirty(tpte))
4267324400Salc			vm_page_dirty(m);
4268297446Sandrew		if ((tpte & ATTR_AF) != 0) {
4269297446Sandrew			if (safe_to_clear_referenced(pmap, tpte)) {
4270281494Sandrew				/*
4271281494Sandrew				 * TODO: We don't handle the access flag
4272281494Sandrew				 * at all. We need to be able to set it in
4273281494Sandrew				 * the exception handler.
4274281494Sandrew				 */
4275286073Semaste				panic("ARM64TODO: safe_to_clear_referenced\n");
4276297446Sandrew			} else if ((tpte & ATTR_SW_WIRED) == 0) {
4277281494Sandrew				/*
4278281494Sandrew				 * Wired pages cannot be paged out so
4279281494Sandrew				 * doing accessed bit emulation for
4280281494Sandrew				 * them is wasted effort. We do the
4281281494Sandrew				 * hard work for unwired pages only.
4282281494Sandrew				 */
4283297446Sandrew				pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
4284288445Sandrew				    &free, &lock);
4285281494Sandrew				pmap_invalidate_page(pmap, pv->pv_va);
4286281494Sandrew				cleared++;
4287281494Sandrew				if (pvf == pv)
4288281494Sandrew					pvf = NULL;
4289281494Sandrew				pv = NULL;
4290281494Sandrew				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
4291281494Sandrew				    ("inconsistent pv lock %p %p for page %p",
4292281494Sandrew				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
4293281494Sandrew			} else
4294281494Sandrew				not_cleared++;
4295281494Sandrew		}
4296281494Sandrew		PMAP_UNLOCK(pmap);
4297281494Sandrew		/* Rotate the PV list if it has more than one entry. */
4298281494Sandrew		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
4299281494Sandrew			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
4300281494Sandrew			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
4301281494Sandrew			m->md.pv_gen++;
4302281494Sandrew		}
4303281494Sandrew	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
4304281494Sandrew	    not_cleared < PMAP_TS_REFERENCED_MAX);
4305281494Sandrewout:
4306281494Sandrew	rw_wunlock(lock);
4307281494Sandrew	pmap_free_zero_pages(&free);
4308281494Sandrew	return (cleared + not_cleared);
4309281494Sandrew}
4310281494Sandrew
4311281494Sandrew/*
4312281494Sandrew *	Apply the given advice to the specified range of addresses within the
4313281494Sandrew *	given pmap.  Depending on the advice, clear the referenced and/or
4314281494Sandrew *	modified flags in each mapping and set the mapped page's dirty field.
4315281494Sandrew */
4316281494Sandrewvoid
4317281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
4318281494Sandrew{
4319281494Sandrew}
4320281494Sandrew
4321281494Sandrew/*
4322281494Sandrew *	Clear the modify bits on the specified physical page.
4323281494Sandrew */
4324281494Sandrewvoid
4325281494Sandrewpmap_clear_modify(vm_page_t m)
4326281494Sandrew{
4327281494Sandrew
4328281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
4329281494Sandrew	    ("pmap_clear_modify: page %p is not managed", m));
4330281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
4331281494Sandrew	KASSERT(!vm_page_xbusied(m),
4332281494Sandrew	    ("pmap_clear_modify: page %p is exclusive busied", m));
4333281494Sandrew
4334281494Sandrew	/*
4335281494Sandrew	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
4336281494Sandrew	 * If the object containing the page is locked and the page is not
4337281494Sandrew	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
4338281494Sandrew	 */
4339281494Sandrew	if ((m->aflags & PGA_WRITEABLE) == 0)
4340281494Sandrew		return;
4341281846Sandrew
4342286073Semaste	/* ARM64TODO: We lack support for tracking if a page is modified */
4343281494Sandrew}
4344281494Sandrew
4345282221Sandrewvoid *
4346282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size)
4347282221Sandrew{
4348282221Sandrew
4349282221Sandrew        return ((void *)PHYS_TO_DMAP(pa));
4350282221Sandrew}
4351282221Sandrew
4352282221Sandrewvoid
4353282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size)
4354282221Sandrew{
4355282221Sandrew}
4356282221Sandrew
4357281494Sandrew/*
4358281494Sandrew * Sets the memory attribute for the specified page.
4359281494Sandrew */
4360281494Sandrewvoid
4361281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
4362281494Sandrew{
4363281494Sandrew
4364286080Sandrew	m->md.pv_memattr = ma;
4365286080Sandrew
4366286080Sandrew	/*
4367286080Sandrew	 * If "m" is a normal page, update its direct mapping.  This update
4368286080Sandrew	 * can be relied upon to perform any cache operations that are
4369286080Sandrew	 * required for data coherence.
4370286080Sandrew	 */
4371286080Sandrew	if ((m->flags & PG_FICTITIOUS) == 0 &&
4372305882Sandrew	    pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
4373305882Sandrew	    m->md.pv_memattr) != 0)
4374305882Sandrew		panic("memory attribute change on the direct map failed");
4375281494Sandrew}
4376281494Sandrew
4377281494Sandrew/*
4378305882Sandrew * Changes the specified virtual address range's memory type to that given by
4379305882Sandrew * the parameter "mode".  The specified virtual address range must be
4380305882Sandrew * completely contained within either the direct map or the kernel map.  If
4381305882Sandrew * the virtual address range is contained within the kernel map, then the
4382305882Sandrew * memory type for each of the corresponding ranges of the direct map is also
4383305882Sandrew * changed.  (The corresponding ranges of the direct map are those ranges that
4384305882Sandrew * map the same physical pages as the specified virtual address range.)  These
4385305882Sandrew * changes to the direct map are necessary because Intel describes the
4386305882Sandrew * behavior of their processors as "undefined" if two or more mappings to the
4387305882Sandrew * same physical page have different memory types.
4388305882Sandrew *
4389305882Sandrew * Returns zero if the change completed successfully, and either EINVAL or
4390305882Sandrew * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
4391305882Sandrew * of the virtual address range was not mapped, and ENOMEM is returned if
4392305882Sandrew * there was insufficient memory available to complete the change.  In the
4393305882Sandrew * latter case, the memory type may have been changed on some part of the
4394305882Sandrew * virtual address range or the direct map.
4395305882Sandrew */
4396305882Sandrewstatic int
4397305882Sandrewpmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
4398305882Sandrew{
4399305882Sandrew	int error;
4400305882Sandrew
4401305882Sandrew	PMAP_LOCK(kernel_pmap);
4402305882Sandrew	error = pmap_change_attr_locked(va, size, mode);
4403305882Sandrew	PMAP_UNLOCK(kernel_pmap);
4404305882Sandrew	return (error);
4405305882Sandrew}
4406305882Sandrew
4407305882Sandrewstatic int
4408305882Sandrewpmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
4409305882Sandrew{
4410305882Sandrew	vm_offset_t base, offset, tmpva;
4411305882Sandrew	pt_entry_t l3, *pte, *newpte;
4412305882Sandrew	int lvl;
4413305882Sandrew
4414305882Sandrew	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
4415305882Sandrew	base = trunc_page(va);
4416305882Sandrew	offset = va & PAGE_MASK;
4417305882Sandrew	size = round_page(offset + size);
4418305882Sandrew
4419305882Sandrew	if (!VIRT_IN_DMAP(base))
4420305882Sandrew		return (EINVAL);
4421305882Sandrew
4422305882Sandrew	for (tmpva = base; tmpva < base + size; ) {
4423305882Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
4424305882Sandrew		if (pte == NULL)
4425305882Sandrew			return (EINVAL);
4426305882Sandrew
4427305882Sandrew		if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) {
4428305882Sandrew			/*
4429305882Sandrew			 * We already have the correct attribute,
4430305882Sandrew			 * ignore this entry.
4431305882Sandrew			 */
4432305882Sandrew			switch (lvl) {
4433305882Sandrew			default:
4434305882Sandrew				panic("Invalid DMAP table level: %d\n", lvl);
4435305882Sandrew			case 1:
4436305882Sandrew				tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
4437305882Sandrew				break;
4438305882Sandrew			case 2:
4439305882Sandrew				tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
4440305882Sandrew				break;
4441305882Sandrew			case 3:
4442305882Sandrew				tmpva += PAGE_SIZE;
4443305882Sandrew				break;
4444305882Sandrew			}
4445305882Sandrew		} else {
4446305882Sandrew			/*
4447305882Sandrew			 * Split the entry to an level 3 table, then
4448305882Sandrew			 * set the new attribute.
4449305882Sandrew			 */
4450305882Sandrew			switch (lvl) {
4451305882Sandrew			default:
4452305882Sandrew				panic("Invalid DMAP table level: %d\n", lvl);
4453305882Sandrew			case 1:
4454305882Sandrew				newpte = pmap_demote_l1(kernel_pmap, pte,
4455305882Sandrew				    tmpva & ~L1_OFFSET);
4456305882Sandrew				if (newpte == NULL)
4457305882Sandrew					return (EINVAL);
4458305882Sandrew				pte = pmap_l1_to_l2(pte, tmpva);
4459305882Sandrew			case 2:
4460305882Sandrew				newpte = pmap_demote_l2(kernel_pmap, pte,
4461305882Sandrew				    tmpva & ~L2_OFFSET);
4462305882Sandrew				if (newpte == NULL)
4463305882Sandrew					return (EINVAL);
4464305882Sandrew				pte = pmap_l2_to_l3(pte, tmpva);
4465305882Sandrew			case 3:
4466305882Sandrew				/* Update the entry */
4467305882Sandrew				l3 = pmap_load(pte);
4468305882Sandrew				l3 &= ~ATTR_IDX_MASK;
4469305882Sandrew				l3 |= ATTR_IDX(mode);
4470319203Sandrew				if (mode == DEVICE_MEMORY)
4471319203Sandrew					l3 |= ATTR_XN;
4472305882Sandrew
4473305882Sandrew				pmap_update_entry(kernel_pmap, pte, l3, tmpva,
4474305882Sandrew				    PAGE_SIZE);
4475305882Sandrew
4476305882Sandrew				/*
4477305882Sandrew				 * If moving to a non-cacheable entry flush
4478305882Sandrew				 * the cache.
4479305882Sandrew				 */
4480305882Sandrew				if (mode == VM_MEMATTR_UNCACHEABLE)
4481305882Sandrew					cpu_dcache_wbinv_range(tmpva, L3_SIZE);
4482305882Sandrew
4483305882Sandrew				break;
4484305882Sandrew			}
4485305882Sandrew			tmpva += PAGE_SIZE;
4486305882Sandrew		}
4487305882Sandrew	}
4488305882Sandrew
4489305882Sandrew	return (0);
4490305882Sandrew}
4491305882Sandrew
4492305882Sandrew/*
4493305882Sandrew * Create an L2 table to map all addresses within an L1 mapping.
4494305882Sandrew */
4495305882Sandrewstatic pt_entry_t *
4496305882Sandrewpmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
4497305882Sandrew{
4498305882Sandrew	pt_entry_t *l2, newl2, oldl1;
4499305882Sandrew	vm_offset_t tmpl1;
4500305882Sandrew	vm_paddr_t l2phys, phys;
4501305882Sandrew	vm_page_t ml2;
4502305882Sandrew	int i;
4503305882Sandrew
4504305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
4505305882Sandrew	oldl1 = pmap_load(l1);
4506305882Sandrew	KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
4507305882Sandrew	    ("pmap_demote_l1: Demoting a non-block entry"));
4508305882Sandrew	KASSERT((va & L1_OFFSET) == 0,
4509305882Sandrew	    ("pmap_demote_l1: Invalid virtual address %#lx", va));
4510305882Sandrew	KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
4511305882Sandrew	    ("pmap_demote_l1: Level 1 table shouldn't be managed"));
4512305882Sandrew
4513305882Sandrew	tmpl1 = 0;
4514305882Sandrew	if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
4515305882Sandrew		tmpl1 = kva_alloc(PAGE_SIZE);
4516305882Sandrew		if (tmpl1 == 0)
4517305882Sandrew			return (NULL);
4518305882Sandrew	}
4519305882Sandrew
4520305882Sandrew	if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
4521305882Sandrew	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
4522305882Sandrew		CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
4523305882Sandrew		    " in pmap %p", va, pmap);
4524305882Sandrew		return (NULL);
4525305882Sandrew	}
4526305882Sandrew
4527305882Sandrew	l2phys = VM_PAGE_TO_PHYS(ml2);
4528305882Sandrew	l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
4529305882Sandrew
4530305882Sandrew	/* Address the range points at */
4531305882Sandrew	phys = oldl1 & ~ATTR_MASK;
4532305882Sandrew	/* The attributed from the old l1 table to be copied */
4533305882Sandrew	newl2 = oldl1 & ATTR_MASK;
4534305882Sandrew
4535305882Sandrew	/* Create the new entries */
4536305882Sandrew	for (i = 0; i < Ln_ENTRIES; i++) {
4537305882Sandrew		l2[i] = newl2 | phys;
4538305882Sandrew		phys += L2_SIZE;
4539305882Sandrew	}
4540305882Sandrew	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
4541305882Sandrew	KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK),
4542305882Sandrew	    ("Invalid l2 page (%lx != %lx)", l2[0],
4543305882Sandrew	    (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
4544305882Sandrew
4545305882Sandrew	if (tmpl1 != 0) {
4546305882Sandrew		pmap_kenter(tmpl1, PAGE_SIZE,
4547305882Sandrew		    DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY);
4548305882Sandrew		l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
4549305882Sandrew	}
4550305882Sandrew
4551305882Sandrew	pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
4552305882Sandrew
4553305882Sandrew	if (tmpl1 != 0) {
4554305882Sandrew		pmap_kremove(tmpl1);
4555305882Sandrew		kva_free(tmpl1, PAGE_SIZE);
4556305882Sandrew	}
4557305882Sandrew
4558305882Sandrew	return (l2);
4559305882Sandrew}
4560305882Sandrew
4561305882Sandrew/*
4562305882Sandrew * Create an L3 table to map all addresses within an L2 mapping.
4563305882Sandrew */
4564305882Sandrewstatic pt_entry_t *
4565305882Sandrewpmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
4566305882Sandrew    struct rwlock **lockp)
4567305882Sandrew{
4568305882Sandrew	pt_entry_t *l3, newl3, oldl2;
4569305882Sandrew	vm_offset_t tmpl2;
4570305882Sandrew	vm_paddr_t l3phys, phys;
4571305882Sandrew	vm_page_t ml3;
4572305882Sandrew	int i;
4573305882Sandrew
4574305882Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
4575305882Sandrew	l3 = NULL;
4576305882Sandrew	oldl2 = pmap_load(l2);
4577305882Sandrew	KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
4578305882Sandrew	    ("pmap_demote_l2: Demoting a non-block entry"));
4579305882Sandrew	KASSERT((va & L2_OFFSET) == 0,
4580305882Sandrew	    ("pmap_demote_l2: Invalid virtual address %#lx", va));
4581305882Sandrew
4582305882Sandrew	tmpl2 = 0;
4583305882Sandrew	if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
4584305882Sandrew		tmpl2 = kva_alloc(PAGE_SIZE);
4585305882Sandrew		if (tmpl2 == 0)
4586305882Sandrew			return (NULL);
4587305882Sandrew	}
4588305882Sandrew
4589318716Smarkj	if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
4590305882Sandrew		ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
4591305882Sandrew		    (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
4592305882Sandrew		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
4593305882Sandrew		if (ml3 == NULL) {
4594305882Sandrew			CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
4595305882Sandrew			    " in pmap %p", va, pmap);
4596305882Sandrew			goto fail;
4597305882Sandrew		}
4598305882Sandrew		if (va < VM_MAXUSER_ADDRESS)
4599305882Sandrew			pmap_resident_count_inc(pmap, 1);
4600305882Sandrew	}
4601305882Sandrew
4602305882Sandrew	l3phys = VM_PAGE_TO_PHYS(ml3);
4603305882Sandrew	l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
4604305882Sandrew
4605305882Sandrew	/* Address the range points at */
4606305882Sandrew	phys = oldl2 & ~ATTR_MASK;
4607305882Sandrew	/* The attributed from the old l2 table to be copied */
4608305882Sandrew	newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE;
4609305882Sandrew
4610305882Sandrew	/*
4611305882Sandrew	 * If the page table page is new, initialize it.
4612305882Sandrew	 */
4613305882Sandrew	if (ml3->wire_count == 1) {
4614336765Smarkj		ml3->wire_count = NL3PG;
4615305882Sandrew		for (i = 0; i < Ln_ENTRIES; i++) {
4616305882Sandrew			l3[i] = newl3 | phys;
4617305882Sandrew			phys += L3_SIZE;
4618305882Sandrew		}
4619305882Sandrew		cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE);
4620305882Sandrew	}
4621305882Sandrew	KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE),
4622305882Sandrew	    ("Invalid l3 page (%lx != %lx)", l3[0],
4623305882Sandrew	    (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE));
4624305882Sandrew
4625305882Sandrew	/*
4626305882Sandrew	 * Map the temporary page so we don't lose access to the l2 table.
4627305882Sandrew	 */
4628305882Sandrew	if (tmpl2 != 0) {
4629305882Sandrew		pmap_kenter(tmpl2, PAGE_SIZE,
4630305882Sandrew		    DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY);
4631305882Sandrew		l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
4632305882Sandrew	}
4633305882Sandrew
4634305882Sandrew	/*
4635305882Sandrew	 * The spare PV entries must be reserved prior to demoting the
4636305882Sandrew	 * mapping, that is, prior to changing the PDE.  Otherwise, the state
4637305882Sandrew	 * of the L2 and the PV lists will be inconsistent, which can result
4638305882Sandrew	 * in reclaim_pv_chunk() attempting to remove a PV entry from the
4639305882Sandrew	 * wrong PV list and pmap_pv_demote_l2() failing to find the expected
4640305882Sandrew	 * PV entry for the 2MB page mapping that is being demoted.
4641305882Sandrew	 */
4642305882Sandrew	if ((oldl2 & ATTR_SW_MANAGED) != 0)
4643305882Sandrew		reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
4644305882Sandrew
4645305882Sandrew	pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
4646305882Sandrew
4647305882Sandrew	/*
4648305882Sandrew	 * Demote the PV entry.
4649305882Sandrew	 */
4650305882Sandrew	if ((oldl2 & ATTR_SW_MANAGED) != 0)
4651305882Sandrew		pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp);
4652305882Sandrew
4653305882Sandrew	atomic_add_long(&pmap_l2_demotions, 1);
4654305882Sandrew	CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx"
4655305882Sandrew	    " in pmap %p %lx", va, pmap, l3[0]);
4656305882Sandrew
4657305882Sandrewfail:
4658305882Sandrew	if (tmpl2 != 0) {
4659305882Sandrew		pmap_kremove(tmpl2);
4660305882Sandrew		kva_free(tmpl2, PAGE_SIZE);
4661305882Sandrew	}
4662305882Sandrew
4663305882Sandrew	return (l3);
4664305882Sandrew
4665305882Sandrew}
4666305882Sandrew
4667305882Sandrewstatic pt_entry_t *
4668305882Sandrewpmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
4669305882Sandrew{
4670305882Sandrew	struct rwlock *lock;
4671305882Sandrew	pt_entry_t *l3;
4672305882Sandrew
4673305882Sandrew	lock = NULL;
4674305882Sandrew	l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
4675305882Sandrew	if (lock != NULL)
4676305882Sandrew		rw_wunlock(lock);
4677305882Sandrew	return (l3);
4678305882Sandrew}
4679305882Sandrew
4680305882Sandrew/*
4681281494Sandrew * perform the pmap work for mincore
4682281494Sandrew */
4683281494Sandrewint
4684281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
4685281494Sandrew{
4686337466Smarkj	pt_entry_t *pte, tpte;
4687337466Smarkj	vm_paddr_t mask, pa;
4688337466Smarkj	int lvl, val;
4689287570Sandrew	bool managed;
4690281494Sandrew
4691287570Sandrew	PMAP_LOCK(pmap);
4692287570Sandrewretry:
4693287570Sandrew	val = 0;
4694337466Smarkj	pte = pmap_pte(pmap, addr, &lvl);
4695337466Smarkj	if (pte != NULL) {
4696337466Smarkj		tpte = pmap_load(pte);
4697287570Sandrew
4698337466Smarkj		switch (lvl) {
4699337466Smarkj		case 3:
4700337466Smarkj			mask = L3_OFFSET;
4701337466Smarkj			break;
4702337466Smarkj		case 2:
4703337466Smarkj			mask = L2_OFFSET;
4704337466Smarkj			break;
4705337466Smarkj		case 1:
4706337466Smarkj			mask = L1_OFFSET;
4707337466Smarkj			break;
4708337466Smarkj		default:
4709337466Smarkj			panic("pmap_mincore: invalid level %d", lvl);
4710337466Smarkj		}
4711295425Swma
4712337466Smarkj		val = MINCORE_INCORE;
4713337466Smarkj		if (lvl != 3)
4714337466Smarkj			val |= MINCORE_SUPER;
4715337466Smarkj		if (pmap_page_dirty(tpte))
4716287570Sandrew			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
4717337466Smarkj		if ((tpte & ATTR_AF) == ATTR_AF)
4718287570Sandrew			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
4719287570Sandrew
4720337466Smarkj		managed = (tpte & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
4721337466Smarkj		pa = (tpte & ~ATTR_MASK) | (addr & mask);
4722337466Smarkj	} else
4723337466Smarkj		managed = false;
4724295425Swma
4725287570Sandrew	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
4726287570Sandrew	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
4727287570Sandrew		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
4728287570Sandrew		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
4729287570Sandrew			goto retry;
4730287570Sandrew	} else
4731287570Sandrew		PA_UNLOCK_COND(*locked_pa);
4732287570Sandrew	PMAP_UNLOCK(pmap);
4733287570Sandrew
4734287570Sandrew	return (val);
4735281494Sandrew}
4736281494Sandrew
4737281494Sandrewvoid
4738281494Sandrewpmap_activate(struct thread *td)
4739281494Sandrew{
4740281494Sandrew	pmap_t	pmap;
4741281494Sandrew
4742281494Sandrew	critical_enter();
4743281494Sandrew	pmap = vmspace_pmap(td->td_proc->p_vmspace);
4744297446Sandrew	td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
4745297446Sandrew	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
4746285212Sandrew	pmap_invalidate_all(pmap);
4747281494Sandrew	critical_exit();
4748281494Sandrew}
4749281494Sandrew
4750281494Sandrewvoid
4751287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
4752281494Sandrew{
4753281494Sandrew
4754287105Sandrew	if (va >= VM_MIN_KERNEL_ADDRESS) {
4755287105Sandrew		cpu_icache_sync_range(va, sz);
4756287105Sandrew	} else {
4757287105Sandrew		u_int len, offset;
4758287105Sandrew		vm_paddr_t pa;
4759287105Sandrew
4760287105Sandrew		/* Find the length of data in this page to flush */
4761287105Sandrew		offset = va & PAGE_MASK;
4762287105Sandrew		len = imin(PAGE_SIZE - offset, sz);
4763287105Sandrew
4764287105Sandrew		while (sz != 0) {
4765287105Sandrew			/* Extract the physical address & find it in the DMAP */
4766287105Sandrew			pa = pmap_extract(pmap, va);
4767287105Sandrew			if (pa != 0)
4768287105Sandrew				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
4769287105Sandrew
4770287105Sandrew			/* Move to the next page */
4771287105Sandrew			sz -= len;
4772287105Sandrew			va += len;
4773287105Sandrew			/* Set the length for the next iteration */
4774287105Sandrew			len = imin(PAGE_SIZE, sz);
4775287105Sandrew		}
4776287105Sandrew	}
4777281494Sandrew}
4778281494Sandrew
4779305882Sandrewint
4780305882Sandrewpmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
4781305882Sandrew{
4782305882Sandrew#ifdef SMP
4783305882Sandrew	uint64_t par;
4784305882Sandrew#endif
4785305882Sandrew
4786305882Sandrew	switch (ESR_ELx_EXCEPTION(esr)) {
4787305882Sandrew	case EXCP_DATA_ABORT_L:
4788305882Sandrew	case EXCP_DATA_ABORT:
4789305882Sandrew		break;
4790305882Sandrew	default:
4791305882Sandrew		return (KERN_FAILURE);
4792305882Sandrew	}
4793305882Sandrew
4794305882Sandrew#ifdef SMP
4795305882Sandrew	PMAP_LOCK(pmap);
4796305882Sandrew	switch (esr & ISS_DATA_DFSC_MASK) {
4797305882Sandrew	case ISS_DATA_DFSC_TF_L0:
4798305882Sandrew	case ISS_DATA_DFSC_TF_L1:
4799305882Sandrew	case ISS_DATA_DFSC_TF_L2:
4800305882Sandrew	case ISS_DATA_DFSC_TF_L3:
4801305882Sandrew		/* Ask the MMU to check the address */
4802305882Sandrew		if (pmap == kernel_pmap)
4803305882Sandrew			par = arm64_address_translate_s1e1r(far);
4804305882Sandrew		else
4805305882Sandrew			par = arm64_address_translate_s1e0r(far);
4806305882Sandrew
4807305882Sandrew		/*
4808305882Sandrew		 * If the translation was successful the address was invalid
4809305882Sandrew		 * due to a break-before-make sequence. We can unlock and
4810305882Sandrew		 * return success to the trap handler.
4811305882Sandrew		 */
4812305882Sandrew		if (PAR_SUCCESS(par)) {
4813305882Sandrew			PMAP_UNLOCK(pmap);
4814305882Sandrew			return (KERN_SUCCESS);
4815305882Sandrew		}
4816305882Sandrew		break;
4817305882Sandrew	default:
4818305882Sandrew		break;
4819305882Sandrew	}
4820305882Sandrew	PMAP_UNLOCK(pmap);
4821305882Sandrew#endif
4822305882Sandrew
4823305882Sandrew	return (KERN_FAILURE);
4824305882Sandrew}
4825305882Sandrew
4826281494Sandrew/*
4827281494Sandrew *	Increase the starting virtual address of the given mapping if a
4828281494Sandrew *	different alignment might result in more superpage mappings.
4829281494Sandrew */
4830281494Sandrewvoid
4831281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
4832281494Sandrew    vm_offset_t *addr, vm_size_t size)
4833281494Sandrew{
4834305880Sandrew	vm_offset_t superpage_offset;
4835305880Sandrew
4836305880Sandrew	if (size < L2_SIZE)
4837305880Sandrew		return;
4838305880Sandrew	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
4839305880Sandrew		offset += ptoa(object->pg_color);
4840305880Sandrew	superpage_offset = offset & L2_OFFSET;
4841305880Sandrew	if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
4842305880Sandrew	    (*addr & L2_OFFSET) == superpage_offset)
4843305880Sandrew		return;
4844305880Sandrew	if ((*addr & L2_OFFSET) < superpage_offset)
4845305880Sandrew		*addr = (*addr & ~L2_OFFSET) + superpage_offset;
4846305880Sandrew	else
4847305880Sandrew		*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
4848281494Sandrew}
4849281494Sandrew
4850281494Sandrew/**
4851281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are
4852281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping
4853281494Sandrew * that will be removed when calling pmap_unmap_io_transient.
4854281494Sandrew *
4855281494Sandrew * \param page        The pages the caller wishes to obtain the virtual
4856281494Sandrew *                    address on the kernel memory map.
4857281494Sandrew * \param vaddr       On return contains the kernel virtual memory address
4858281494Sandrew *                    of the pages passed in the page parameter.
4859281494Sandrew * \param count       Number of pages passed in.
4860281494Sandrew * \param can_fault   TRUE if the thread using the mapped pages can take
4861281494Sandrew *                    page faults, FALSE otherwise.
4862281494Sandrew *
4863281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when
4864281494Sandrew *          finished or FALSE otherwise.
4865281494Sandrew *
4866281494Sandrew */
4867281494Sandrewboolean_t
4868281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
4869281494Sandrew    boolean_t can_fault)
4870281494Sandrew{
4871281494Sandrew	vm_paddr_t paddr;
4872281494Sandrew	boolean_t needs_mapping;
4873281494Sandrew	int error, i;
4874281494Sandrew
4875281494Sandrew	/*
4876281494Sandrew	 * Allocate any KVA space that we need, this is done in a separate
4877281494Sandrew	 * loop to prevent calling vmem_alloc while pinned.
4878281494Sandrew	 */
4879281494Sandrew	needs_mapping = FALSE;
4880281494Sandrew	for (i = 0; i < count; i++) {
4881281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
4882297617Sandrew		if (__predict_false(!PHYS_IN_DMAP(paddr))) {
4883281494Sandrew			error = vmem_alloc(kernel_arena, PAGE_SIZE,
4884281494Sandrew			    M_BESTFIT | M_WAITOK, &vaddr[i]);
4885281494Sandrew			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
4886281494Sandrew			needs_mapping = TRUE;
4887281494Sandrew		} else {
4888281494Sandrew			vaddr[i] = PHYS_TO_DMAP(paddr);
4889281494Sandrew		}
4890281494Sandrew	}
4891281494Sandrew
4892281494Sandrew	/* Exit early if everything is covered by the DMAP */
4893281494Sandrew	if (!needs_mapping)
4894281494Sandrew		return (FALSE);
4895281494Sandrew
4896281494Sandrew	if (!can_fault)
4897281494Sandrew		sched_pin();
4898281494Sandrew	for (i = 0; i < count; i++) {
4899281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
4900297617Sandrew		if (!PHYS_IN_DMAP(paddr)) {
4901281494Sandrew			panic(
4902281494Sandrew			   "pmap_map_io_transient: TODO: Map out of DMAP data");
4903281494Sandrew		}
4904281494Sandrew	}
4905281494Sandrew
4906281494Sandrew	return (needs_mapping);
4907281494Sandrew}
4908281494Sandrew
4909281494Sandrewvoid
4910281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
4911281494Sandrew    boolean_t can_fault)
4912281494Sandrew{
4913281494Sandrew	vm_paddr_t paddr;
4914281494Sandrew	int i;
4915281494Sandrew
4916281494Sandrew	if (!can_fault)
4917281494Sandrew		sched_unpin();
4918281494Sandrew	for (i = 0; i < count; i++) {
4919281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
4920297617Sandrew		if (!PHYS_IN_DMAP(paddr)) {
4921286073Semaste			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
4922281494Sandrew		}
4923281494Sandrew	}
4924281494Sandrew}
4925