pmap.c revision 305879
1281494Sandrew/*-
2281494Sandrew * Copyright (c) 1991 Regents of the University of California.
3281494Sandrew * All rights reserved.
4281494Sandrew * Copyright (c) 1994 John S. Dyson
5281494Sandrew * All rights reserved.
6281494Sandrew * Copyright (c) 1994 David Greenman
7281494Sandrew * All rights reserved.
8281494Sandrew * Copyright (c) 2003 Peter Wemm
9281494Sandrew * All rights reserved.
10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11281494Sandrew * All rights reserved.
12281494Sandrew * Copyright (c) 2014 Andrew Turner
13281494Sandrew * All rights reserved.
14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation
15281494Sandrew * All rights reserved.
16281494Sandrew *
17281494Sandrew * This code is derived from software contributed to Berkeley by
18281494Sandrew * the Systems Programming Group of the University of Utah Computer
19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc.
20281494Sandrew *
21281494Sandrew * This software was developed by Andrew Turner under sponsorship from
22281494Sandrew * the FreeBSD Foundation.
23281494Sandrew *
24281494Sandrew * Redistribution and use in source and binary forms, with or without
25281494Sandrew * modification, are permitted provided that the following conditions
26281494Sandrew * are met:
27281494Sandrew * 1. Redistributions of source code must retain the above copyright
28281494Sandrew *    notice, this list of conditions and the following disclaimer.
29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright
30281494Sandrew *    notice, this list of conditions and the following disclaimer in the
31281494Sandrew *    documentation and/or other materials provided with the distribution.
32281494Sandrew * 3. All advertising materials mentioning features or use of this software
33281494Sandrew *    must display the following acknowledgement:
34281494Sandrew *	This product includes software developed by the University of
35281494Sandrew *	California, Berkeley and its contributors.
36281494Sandrew * 4. Neither the name of the University nor the names of its contributors
37281494Sandrew *    may be used to endorse or promote products derived from this software
38281494Sandrew *    without specific prior written permission.
39281494Sandrew *
40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43281494Sandrew * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50281494Sandrew * SUCH DAMAGE.
51281494Sandrew *
52281494Sandrew *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
53281494Sandrew */
54281494Sandrew/*-
55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc.
56281494Sandrew * All rights reserved.
57281494Sandrew *
58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder,
59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the
60281494Sandrew * Security Research Division of Network Associates, Inc. under
61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62281494Sandrew * CHATS research program.
63281494Sandrew *
64281494Sandrew * Redistribution and use in source and binary forms, with or without
65281494Sandrew * modification, are permitted provided that the following conditions
66281494Sandrew * are met:
67281494Sandrew * 1. Redistributions of source code must retain the above copyright
68281494Sandrew *    notice, this list of conditions and the following disclaimer.
69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright
70281494Sandrew *    notice, this list of conditions and the following disclaimer in the
71281494Sandrew *    documentation and/or other materials provided with the distribution.
72281494Sandrew *
73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76281494Sandrew * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83281494Sandrew * SUCH DAMAGE.
84281494Sandrew */
85281494Sandrew
86281494Sandrew#include <sys/cdefs.h>
87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 305879 2016-09-16 12:17:01Z andrew $");
88281494Sandrew
89281494Sandrew/*
90281494Sandrew *	Manages physical address maps.
91281494Sandrew *
92281494Sandrew *	Since the information managed by this module is
93281494Sandrew *	also stored by the logical address mapping module,
94281494Sandrew *	this module may throw away valid virtual-to-physical
95281494Sandrew *	mappings at almost any time.  However, invalidations
96281494Sandrew *	of virtual-to-physical mappings must be done as
97281494Sandrew *	requested.
98281494Sandrew *
99281494Sandrew *	In order to cope with hardware architectures which
100281494Sandrew *	make virtual-to-physical map invalidates expensive,
101281494Sandrew *	this module may delay invalidate or reduced protection
102281494Sandrew *	operations until such time as they are actually
103281494Sandrew *	necessary.  This module is given full information as
104281494Sandrew *	to which processors are currently using which maps,
105281494Sandrew *	and to when physical maps must be made correct.
106281494Sandrew */
107281494Sandrew
108281494Sandrew#include <sys/param.h>
109281494Sandrew#include <sys/bus.h>
110281494Sandrew#include <sys/systm.h>
111281494Sandrew#include <sys/kernel.h>
112281494Sandrew#include <sys/ktr.h>
113281494Sandrew#include <sys/lock.h>
114281494Sandrew#include <sys/malloc.h>
115281494Sandrew#include <sys/mman.h>
116281494Sandrew#include <sys/msgbuf.h>
117281494Sandrew#include <sys/mutex.h>
118281494Sandrew#include <sys/proc.h>
119281494Sandrew#include <sys/rwlock.h>
120281494Sandrew#include <sys/sx.h>
121281494Sandrew#include <sys/vmem.h>
122281494Sandrew#include <sys/vmmeter.h>
123281494Sandrew#include <sys/sched.h>
124281494Sandrew#include <sys/sysctl.h>
125281494Sandrew#include <sys/_unrhdr.h>
126281494Sandrew#include <sys/smp.h>
127281494Sandrew
128281494Sandrew#include <vm/vm.h>
129281494Sandrew#include <vm/vm_param.h>
130281494Sandrew#include <vm/vm_kern.h>
131281494Sandrew#include <vm/vm_page.h>
132281494Sandrew#include <vm/vm_map.h>
133281494Sandrew#include <vm/vm_object.h>
134281494Sandrew#include <vm/vm_extern.h>
135281494Sandrew#include <vm/vm_pageout.h>
136281494Sandrew#include <vm/vm_pager.h>
137281494Sandrew#include <vm/vm_radix.h>
138281494Sandrew#include <vm/vm_reserv.h>
139281494Sandrew#include <vm/uma.h>
140281494Sandrew
141281494Sandrew#include <machine/machdep.h>
142281494Sandrew#include <machine/md_var.h>
143281494Sandrew#include <machine/pcb.h>
144281494Sandrew
145297446Sandrew#define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
146297446Sandrew#define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
147297446Sandrew#define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
148297446Sandrew#define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
149281494Sandrew
150297446Sandrew#define	NUL0E		L0_ENTRIES
151297446Sandrew#define	NUL1E		(NUL0E * NL1PG)
152297446Sandrew#define	NUL2E		(NUL1E * NL2PG)
153297446Sandrew
154281494Sandrew#if !defined(DIAGNOSTIC)
155281494Sandrew#ifdef __GNUC_GNU_INLINE__
156281494Sandrew#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
157281494Sandrew#else
158281494Sandrew#define PMAP_INLINE	extern inline
159281494Sandrew#endif
160281494Sandrew#else
161281494Sandrew#define PMAP_INLINE
162281494Sandrew#endif
163281494Sandrew
164281494Sandrew/*
165281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S
166281494Sandrew */
167281494Sandrew#define	DEVICE_MEMORY	0
168281494Sandrew#define	UNCACHED_MEMORY	1
169281494Sandrew#define	CACHED_MEMORY	2
170281494Sandrew
171281494Sandrew
172281494Sandrew#ifdef PV_STATS
173281494Sandrew#define PV_STAT(x)	do { x ; } while (0)
174281494Sandrew#else
175281494Sandrew#define PV_STAT(x)	do { } while (0)
176281494Sandrew#endif
177281494Sandrew
178281494Sandrew#define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
179281494Sandrew
180281494Sandrew#define	NPV_LIST_LOCKS	MAXCPU
181281494Sandrew
182281494Sandrew#define	PHYS_TO_PV_LIST_LOCK(pa)	\
183281494Sandrew			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
184281494Sandrew
185281494Sandrew#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
186281494Sandrew	struct rwlock **_lockp = (lockp);		\
187281494Sandrew	struct rwlock *_new_lock;			\
188281494Sandrew							\
189281494Sandrew	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
190281494Sandrew	if (_new_lock != *_lockp) {			\
191281494Sandrew		if (*_lockp != NULL)			\
192281494Sandrew			rw_wunlock(*_lockp);		\
193281494Sandrew		*_lockp = _new_lock;			\
194281494Sandrew		rw_wlock(*_lockp);			\
195281494Sandrew	}						\
196281494Sandrew} while (0)
197281494Sandrew
198281494Sandrew#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
199281494Sandrew			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
200281494Sandrew
201281494Sandrew#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
202281494Sandrew	struct rwlock **_lockp = (lockp);		\
203281494Sandrew							\
204281494Sandrew	if (*_lockp != NULL) {				\
205281494Sandrew		rw_wunlock(*_lockp);			\
206281494Sandrew		*_lockp = NULL;				\
207281494Sandrew	}						\
208281494Sandrew} while (0)
209281494Sandrew
210281494Sandrew#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
211281494Sandrew			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
212281494Sandrew
213281494Sandrewstruct pmap kernel_pmap_store;
214281494Sandrew
215281494Sandrewvm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
216281494Sandrewvm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
217281494Sandrewvm_offset_t kernel_vm_end = 0;
218281494Sandrew
219281494Sandrewstruct msgbuf *msgbufp = NULL;
220281494Sandrew
221291246Sandrewvm_paddr_t dmap_phys_base;	/* The start of the dmap region */
222297958Sandrewvm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
223297958Sandrewvm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
224291246Sandrew
225297914Sandrew/* This code assumes all L1 DMAP entries will be used */
226297914SandrewCTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
227297914SandrewCTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
228297914Sandrew
229297914Sandrew#define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
230297914Sandrewextern pt_entry_t pagetable_dmap[];
231297914Sandrew
232281494Sandrew/*
233281494Sandrew * Data for the pv entry allocation mechanism
234281494Sandrew */
235281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
236281494Sandrewstatic struct mtx pv_chunks_mutex;
237281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS];
238281494Sandrew
239281494Sandrewstatic void	free_pv_chunk(struct pv_chunk *pc);
240281494Sandrewstatic void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
241281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
242281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
243281494Sandrewstatic void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
244281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
245281494Sandrew		    vm_offset_t va);
246281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
247281494Sandrew    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
248281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
249281494Sandrew    pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
250281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
251281494Sandrew    vm_page_t m, struct rwlock **lockp);
252281494Sandrew
253281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
254281494Sandrew		struct rwlock **lockp);
255281494Sandrew
256281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
257281494Sandrew    struct spglist *free);
258281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
259281494Sandrew
260288445Sandrew/*
261288445Sandrew * These load the old table data and store the new value.
262288445Sandrew * They need to be atomic as the System MMU may write to the table at
263288445Sandrew * the same time as the CPU.
264288445Sandrew */
265288445Sandrew#define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
266288445Sandrew#define	pmap_set(table, mask) atomic_set_64(table, mask)
267288445Sandrew#define	pmap_load_clear(table) atomic_swap_64(table, 0)
268288445Sandrew#define	pmap_load(table) (*table)
269288445Sandrew
270281494Sandrew/********************/
271281494Sandrew/* Inline functions */
272281494Sandrew/********************/
273281494Sandrew
274281494Sandrewstatic __inline void
275281494Sandrewpagecopy(void *s, void *d)
276281494Sandrew{
277281494Sandrew
278281494Sandrew	memcpy(d, s, PAGE_SIZE);
279281494Sandrew}
280281494Sandrew
281297446Sandrew#define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & L0_ADDR_MASK)
282281494Sandrew#define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
283281494Sandrew#define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
284281494Sandrew#define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
285281494Sandrew
286281494Sandrewstatic __inline pd_entry_t *
287297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va)
288297446Sandrew{
289297446Sandrew
290297446Sandrew	return (&pmap->pm_l0[pmap_l0_index(va)]);
291297446Sandrew}
292297446Sandrew
293297446Sandrewstatic __inline pd_entry_t *
294297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
295297446Sandrew{
296297446Sandrew	pd_entry_t *l1;
297297446Sandrew
298297446Sandrew	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
299297446Sandrew	return (&l1[pmap_l1_index(va)]);
300297446Sandrew}
301297446Sandrew
302297446Sandrewstatic __inline pd_entry_t *
303281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va)
304281494Sandrew{
305297446Sandrew	pd_entry_t *l0;
306281494Sandrew
307297446Sandrew	l0 = pmap_l0(pmap, va);
308297446Sandrew	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
309297446Sandrew		return (NULL);
310297446Sandrew
311297446Sandrew	return (pmap_l0_to_l1(l0, va));
312281494Sandrew}
313281494Sandrew
314281494Sandrewstatic __inline pd_entry_t *
315281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
316281494Sandrew{
317281494Sandrew	pd_entry_t *l2;
318281494Sandrew
319288445Sandrew	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
320281494Sandrew	return (&l2[pmap_l2_index(va)]);
321281494Sandrew}
322281494Sandrew
323281494Sandrewstatic __inline pd_entry_t *
324281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va)
325281494Sandrew{
326281494Sandrew	pd_entry_t *l1;
327281494Sandrew
328281494Sandrew	l1 = pmap_l1(pmap, va);
329288445Sandrew	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
330281494Sandrew		return (NULL);
331281494Sandrew
332281494Sandrew	return (pmap_l1_to_l2(l1, va));
333281494Sandrew}
334281494Sandrew
335281494Sandrewstatic __inline pt_entry_t *
336281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
337281494Sandrew{
338281494Sandrew	pt_entry_t *l3;
339281494Sandrew
340288445Sandrew	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
341281494Sandrew	return (&l3[pmap_l3_index(va)]);
342281494Sandrew}
343281494Sandrew
344297446Sandrew/*
345297446Sandrew * Returns the lowest valid pde for a given virtual address.
346297446Sandrew * The next level may or may not point to a valid page or block.
347297446Sandrew */
348297446Sandrewstatic __inline pd_entry_t *
349297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level)
350297446Sandrew{
351297446Sandrew	pd_entry_t *l0, *l1, *l2, desc;
352297446Sandrew
353297446Sandrew	l0 = pmap_l0(pmap, va);
354297446Sandrew	desc = pmap_load(l0) & ATTR_DESCR_MASK;
355297446Sandrew	if (desc != L0_TABLE) {
356297446Sandrew		*level = -1;
357297446Sandrew		return (NULL);
358297446Sandrew	}
359297446Sandrew
360297446Sandrew	l1 = pmap_l0_to_l1(l0, va);
361297446Sandrew	desc = pmap_load(l1) & ATTR_DESCR_MASK;
362297446Sandrew	if (desc != L1_TABLE) {
363297446Sandrew		*level = 0;
364297446Sandrew		return (l0);
365297446Sandrew	}
366297446Sandrew
367297446Sandrew	l2 = pmap_l1_to_l2(l1, va);
368297446Sandrew	desc = pmap_load(l2) & ATTR_DESCR_MASK;
369297446Sandrew	if (desc != L2_TABLE) {
370297446Sandrew		*level = 1;
371297446Sandrew		return (l1);
372297446Sandrew	}
373297446Sandrew
374297446Sandrew	*level = 2;
375297446Sandrew	return (l2);
376297446Sandrew}
377297446Sandrew
378297446Sandrew/*
379297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual
380297446Sandrew * address. If there are no valid entries return NULL and set the level to
381297446Sandrew * the first invalid level.
382297446Sandrew */
383281494Sandrewstatic __inline pt_entry_t *
384297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level)
385281494Sandrew{
386297446Sandrew	pd_entry_t *l1, *l2, desc;
387297446Sandrew	pt_entry_t *l3;
388281494Sandrew
389297446Sandrew	l1 = pmap_l1(pmap, va);
390297446Sandrew	if (l1 == NULL) {
391297446Sandrew		*level = 0;
392281494Sandrew		return (NULL);
393297446Sandrew	}
394297446Sandrew	desc = pmap_load(l1) & ATTR_DESCR_MASK;
395297446Sandrew	if (desc == L1_BLOCK) {
396297446Sandrew		*level = 1;
397297446Sandrew		return (l1);
398297446Sandrew	}
399281494Sandrew
400297446Sandrew	if (desc != L1_TABLE) {
401297446Sandrew		*level = 1;
402297446Sandrew		return (NULL);
403297446Sandrew	}
404297446Sandrew
405297446Sandrew	l2 = pmap_l1_to_l2(l1, va);
406297446Sandrew	desc = pmap_load(l2) & ATTR_DESCR_MASK;
407297446Sandrew	if (desc == L2_BLOCK) {
408297446Sandrew		*level = 2;
409297446Sandrew		return (l2);
410297446Sandrew	}
411297446Sandrew
412297446Sandrew	if (desc != L2_TABLE) {
413297446Sandrew		*level = 2;
414297446Sandrew		return (NULL);
415297446Sandrew	}
416297446Sandrew
417297446Sandrew	*level = 3;
418297446Sandrew	l3 = pmap_l2_to_l3(l2, va);
419297446Sandrew	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
420297446Sandrew		return (NULL);
421297446Sandrew
422297446Sandrew	return (l3);
423281494Sandrew}
424281494Sandrew
425286956Sandrewbool
426297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
427297446Sandrew    pd_entry_t **l2, pt_entry_t **l3)
428286956Sandrew{
429297446Sandrew	pd_entry_t *l0p, *l1p, *l2p;
430286956Sandrew
431297446Sandrew	if (pmap->pm_l0 == NULL)
432286956Sandrew		return (false);
433286956Sandrew
434297446Sandrew	l0p = pmap_l0(pmap, va);
435297446Sandrew	*l0 = l0p;
436297446Sandrew
437297446Sandrew	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
438297446Sandrew		return (false);
439297446Sandrew
440297446Sandrew	l1p = pmap_l0_to_l1(l0p, va);
441286956Sandrew	*l1 = l1p;
442286956Sandrew
443288445Sandrew	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
444286956Sandrew		*l2 = NULL;
445286956Sandrew		*l3 = NULL;
446286956Sandrew		return (true);
447286956Sandrew	}
448286956Sandrew
449288445Sandrew	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
450286956Sandrew		return (false);
451286956Sandrew
452286956Sandrew	l2p = pmap_l1_to_l2(l1p, va);
453286956Sandrew	*l2 = l2p;
454286956Sandrew
455288445Sandrew	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
456286956Sandrew		*l3 = NULL;
457286956Sandrew		return (true);
458286956Sandrew	}
459286956Sandrew
460286956Sandrew	*l3 = pmap_l2_to_l3(l2p, va);
461286956Sandrew
462286956Sandrew	return (true);
463286956Sandrew}
464286956Sandrew
465281494Sandrewstatic __inline int
466281494Sandrewpmap_is_current(pmap_t pmap)
467281494Sandrew{
468281494Sandrew
469281494Sandrew	return ((pmap == pmap_kernel()) ||
470281494Sandrew	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
471281494Sandrew}
472281494Sandrew
473281494Sandrewstatic __inline int
474281494Sandrewpmap_l3_valid(pt_entry_t l3)
475281494Sandrew{
476281494Sandrew
477281494Sandrew	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
478281494Sandrew}
479281494Sandrew
480281494Sandrewstatic __inline int
481281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3)
482281494Sandrew{
483281494Sandrew
484281494Sandrew	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
485281494Sandrew	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
486281494Sandrew}
487281494Sandrew
488281494Sandrew#define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
489281494Sandrew
490281494Sandrew/*
491281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on
492281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is.
493281494Sandrew */
494281494Sandrewstatic inline int
495281494Sandrewpmap_page_dirty(pt_entry_t pte)
496281494Sandrew{
497281494Sandrew
498281494Sandrew	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
499281494Sandrew	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
500281494Sandrew}
501281494Sandrew
502281494Sandrewstatic __inline void
503281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count)
504281494Sandrew{
505281494Sandrew
506281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
507281494Sandrew	pmap->pm_stats.resident_count += count;
508281494Sandrew}
509281494Sandrew
510281494Sandrewstatic __inline void
511281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count)
512281494Sandrew{
513281494Sandrew
514281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
515281494Sandrew	KASSERT(pmap->pm_stats.resident_count >= count,
516281494Sandrew	    ("pmap %p resident count underflow %ld %d", pmap,
517281494Sandrew	    pmap->pm_stats.resident_count, count));
518281494Sandrew	pmap->pm_stats.resident_count -= count;
519281494Sandrew}
520281494Sandrew
521281494Sandrewstatic pt_entry_t *
522281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
523281494Sandrew    u_int *l2_slot)
524281494Sandrew{
525281494Sandrew	pt_entry_t *l2;
526281494Sandrew	pd_entry_t *l1;
527281494Sandrew
528281494Sandrew	l1 = (pd_entry_t *)l1pt;
529281494Sandrew	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
530281494Sandrew
531281494Sandrew	/* Check locore has used a table L1 map */
532281494Sandrew	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
533281494Sandrew	   ("Invalid bootstrap L1 table"));
534281494Sandrew	/* Find the address of the L2 table */
535281494Sandrew	l2 = (pt_entry_t *)init_pt_va;
536281494Sandrew	*l2_slot = pmap_l2_index(va);
537281494Sandrew
538281494Sandrew	return (l2);
539281494Sandrew}
540281494Sandrew
541281494Sandrewstatic vm_paddr_t
542281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
543281494Sandrew{
544281494Sandrew	u_int l1_slot, l2_slot;
545281494Sandrew	pt_entry_t *l2;
546281494Sandrew
547281494Sandrew	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
548281494Sandrew
549281494Sandrew	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
550281494Sandrew}
551281494Sandrew
552281494Sandrewstatic void
553297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
554281494Sandrew{
555281494Sandrew	vm_offset_t va;
556281494Sandrew	vm_paddr_t pa;
557281494Sandrew	u_int l1_slot;
558281494Sandrew
559297958Sandrew	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
560281494Sandrew	va = DMAP_MIN_ADDRESS;
561297958Sandrew	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
562281494Sandrew	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
563297914Sandrew		l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
564281494Sandrew
565297914Sandrew		pmap_load_store(&pagetable_dmap[l1_slot],
566285537Sandrew		    (pa & ~L1_OFFSET) | ATTR_DEFAULT |
567285537Sandrew		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
568281494Sandrew	}
569281494Sandrew
570297958Sandrew	/* Set the upper limit of the DMAP region */
571297958Sandrew	dmap_phys_max = pa;
572297958Sandrew	dmap_max_addr = va;
573297958Sandrew
574297914Sandrew	cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
575297914Sandrew	    PAGE_SIZE * DMAP_TABLES);
576281494Sandrew	cpu_tlb_flushID();
577281494Sandrew}
578281494Sandrew
579281494Sandrewstatic vm_offset_t
580281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
581281494Sandrew{
582281494Sandrew	vm_offset_t l2pt;
583281494Sandrew	vm_paddr_t pa;
584281494Sandrew	pd_entry_t *l1;
585281494Sandrew	u_int l1_slot;
586281494Sandrew
587281494Sandrew	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
588281494Sandrew
589281494Sandrew	l1 = (pd_entry_t *)l1pt;
590281494Sandrew	l1_slot = pmap_l1_index(va);
591281494Sandrew	l2pt = l2_start;
592281494Sandrew
593281494Sandrew	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
594281494Sandrew		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
595281494Sandrew
596281494Sandrew		pa = pmap_early_vtophys(l1pt, l2pt);
597281494Sandrew		pmap_load_store(&l1[l1_slot],
598281494Sandrew		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
599281494Sandrew		l2pt += PAGE_SIZE;
600281494Sandrew	}
601281494Sandrew
602281494Sandrew	/* Clean the L2 page table */
603281494Sandrew	memset((void *)l2_start, 0, l2pt - l2_start);
604281494Sandrew	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
605281494Sandrew
606281494Sandrew	/* Flush the l1 table to ram */
607281494Sandrew	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
608281494Sandrew
609281494Sandrew	return l2pt;
610281494Sandrew}
611281494Sandrew
612281494Sandrewstatic vm_offset_t
613281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
614281494Sandrew{
615281494Sandrew	vm_offset_t l2pt, l3pt;
616281494Sandrew	vm_paddr_t pa;
617281494Sandrew	pd_entry_t *l2;
618281494Sandrew	u_int l2_slot;
619281494Sandrew
620281494Sandrew	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
621281494Sandrew
622281494Sandrew	l2 = pmap_l2(kernel_pmap, va);
623298433Spfg	l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
624281494Sandrew	l2pt = (vm_offset_t)l2;
625281494Sandrew	l2_slot = pmap_l2_index(va);
626281494Sandrew	l3pt = l3_start;
627281494Sandrew
628281494Sandrew	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
629281494Sandrew		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
630281494Sandrew
631281494Sandrew		pa = pmap_early_vtophys(l1pt, l3pt);
632281494Sandrew		pmap_load_store(&l2[l2_slot],
633281494Sandrew		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
634281494Sandrew		l3pt += PAGE_SIZE;
635281494Sandrew	}
636281494Sandrew
637281494Sandrew	/* Clean the L2 page table */
638281494Sandrew	memset((void *)l3_start, 0, l3pt - l3_start);
639281494Sandrew	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
640281494Sandrew
641281494Sandrew	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
642281494Sandrew
643281494Sandrew	return l3pt;
644281494Sandrew}
645281494Sandrew
646281494Sandrew/*
647281494Sandrew *	Bootstrap the system enough to run with virtual memory.
648281494Sandrew */
649281494Sandrewvoid
650297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
651297446Sandrew    vm_size_t kernlen)
652281494Sandrew{
653281494Sandrew	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
654281494Sandrew	uint64_t kern_delta;
655281494Sandrew	pt_entry_t *l2;
656281494Sandrew	vm_offset_t va, freemempos;
657281494Sandrew	vm_offset_t dpcpu, msgbufpv;
658297958Sandrew	vm_paddr_t pa, max_pa, min_pa;
659291246Sandrew	int i;
660281494Sandrew
661281494Sandrew	kern_delta = KERNBASE - kernstart;
662281494Sandrew	physmem = 0;
663281494Sandrew
664281494Sandrew	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
665281494Sandrew	printf("%lx\n", l1pt);
666281494Sandrew	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
667281494Sandrew
668281494Sandrew	/* Set this early so we can use the pagetable walking functions */
669297446Sandrew	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
670281494Sandrew	PMAP_LOCK_INIT(kernel_pmap);
671281494Sandrew
672291246Sandrew	/* Assume the address we were loaded to is a valid physical address */
673297958Sandrew	min_pa = max_pa = KERNBASE - kern_delta;
674291246Sandrew
675291246Sandrew	/*
676291246Sandrew	 * Find the minimum physical address. physmap is sorted,
677291246Sandrew	 * but may contain empty ranges.
678291246Sandrew	 */
679291246Sandrew	for (i = 0; i < (physmap_idx * 2); i += 2) {
680291246Sandrew		if (physmap[i] == physmap[i + 1])
681291246Sandrew			continue;
682291246Sandrew		if (physmap[i] <= min_pa)
683291246Sandrew			min_pa = physmap[i];
684297958Sandrew		if (physmap[i + 1] > max_pa)
685297958Sandrew			max_pa = physmap[i + 1];
686291246Sandrew	}
687291246Sandrew
688281494Sandrew	/* Create a direct map region early so we can use it for pa -> va */
689297958Sandrew	pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
690281494Sandrew
691281494Sandrew	va = KERNBASE;
692281494Sandrew	pa = KERNBASE - kern_delta;
693281494Sandrew
694281494Sandrew	/*
695281494Sandrew	 * Start to initialise phys_avail by copying from physmap
696281494Sandrew	 * up to the physical address KERNBASE points at.
697281494Sandrew	 */
698281494Sandrew	map_slot = avail_slot = 0;
699295157Sandrew	for (; map_slot < (physmap_idx * 2) &&
700295157Sandrew	    avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
701281494Sandrew		if (physmap[map_slot] == physmap[map_slot + 1])
702281494Sandrew			continue;
703281494Sandrew
704281494Sandrew		if (physmap[map_slot] <= pa &&
705281494Sandrew		    physmap[map_slot + 1] > pa)
706281494Sandrew			break;
707281494Sandrew
708281494Sandrew		phys_avail[avail_slot] = physmap[map_slot];
709281494Sandrew		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
710281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
711281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
712281494Sandrew		avail_slot += 2;
713281494Sandrew	}
714281494Sandrew
715281494Sandrew	/* Add the memory before the kernel */
716295157Sandrew	if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
717281494Sandrew		phys_avail[avail_slot] = physmap[map_slot];
718281494Sandrew		phys_avail[avail_slot + 1] = pa;
719281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
720281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
721281494Sandrew		avail_slot += 2;
722281494Sandrew	}
723281494Sandrew	used_map_slot = map_slot;
724281494Sandrew
725281494Sandrew	/*
726281494Sandrew	 * Read the page table to find out what is already mapped.
727281494Sandrew	 * This assumes we have mapped a block of memory from KERNBASE
728281494Sandrew	 * using a single L1 entry.
729281494Sandrew	 */
730281494Sandrew	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
731281494Sandrew
732281494Sandrew	/* Sanity check the index, KERNBASE should be the first VA */
733281494Sandrew	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
734281494Sandrew
735281494Sandrew	/* Find how many pages we have mapped */
736281494Sandrew	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
737281494Sandrew		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
738281494Sandrew			break;
739281494Sandrew
740281494Sandrew		/* Check locore used L2 blocks */
741281494Sandrew		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
742281494Sandrew		    ("Invalid bootstrap L2 table"));
743281494Sandrew		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
744281494Sandrew		    ("Incorrect PA in L2 table"));
745281494Sandrew
746281494Sandrew		va += L2_SIZE;
747281494Sandrew		pa += L2_SIZE;
748281494Sandrew	}
749281494Sandrew
750281494Sandrew	va = roundup2(va, L1_SIZE);
751281494Sandrew
752281494Sandrew	freemempos = KERNBASE + kernlen;
753281494Sandrew	freemempos = roundup2(freemempos, PAGE_SIZE);
754281494Sandrew	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
755281494Sandrew	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
756281494Sandrew	/* And the l3 tables for the early devmap */
757281494Sandrew	freemempos = pmap_bootstrap_l3(l1pt,
758281494Sandrew	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
759281494Sandrew
760281494Sandrew	cpu_tlb_flushID();
761281494Sandrew
762281494Sandrew#define alloc_pages(var, np)						\
763281494Sandrew	(var) = freemempos;						\
764281494Sandrew	freemempos += (np * PAGE_SIZE);					\
765281494Sandrew	memset((char *)(var), 0, ((np) * PAGE_SIZE));
766281494Sandrew
767281494Sandrew	/* Allocate dynamic per-cpu area. */
768281494Sandrew	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
769281494Sandrew	dpcpu_init((void *)dpcpu, 0);
770281494Sandrew
771281494Sandrew	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
772281494Sandrew	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
773281494Sandrew	msgbufp = (void *)msgbufpv;
774281494Sandrew
775281494Sandrew	virtual_avail = roundup2(freemempos, L1_SIZE);
776281494Sandrew	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
777281494Sandrew	kernel_vm_end = virtual_avail;
778305531Sandrew
779281494Sandrew	pa = pmap_early_vtophys(l1pt, freemempos);
780281494Sandrew
781281494Sandrew	/* Finish initialising physmap */
782281494Sandrew	map_slot = used_map_slot;
783281494Sandrew	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
784281494Sandrew	    map_slot < (physmap_idx * 2); map_slot += 2) {
785281494Sandrew		if (physmap[map_slot] == physmap[map_slot + 1])
786281494Sandrew			continue;
787281494Sandrew
788281494Sandrew		/* Have we used the current range? */
789281494Sandrew		if (physmap[map_slot + 1] <= pa)
790281494Sandrew			continue;
791281494Sandrew
792281494Sandrew		/* Do we need to split the entry? */
793281494Sandrew		if (physmap[map_slot] < pa) {
794281494Sandrew			phys_avail[avail_slot] = pa;
795281494Sandrew			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
796281494Sandrew		} else {
797281494Sandrew			phys_avail[avail_slot] = physmap[map_slot];
798281494Sandrew			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
799281494Sandrew		}
800281494Sandrew		physmem += (phys_avail[avail_slot + 1] -
801281494Sandrew		    phys_avail[avail_slot]) >> PAGE_SHIFT;
802281494Sandrew
803281494Sandrew		avail_slot += 2;
804281494Sandrew	}
805281494Sandrew	phys_avail[avail_slot] = 0;
806281494Sandrew	phys_avail[avail_slot + 1] = 0;
807281494Sandrew
808281494Sandrew	/*
809281494Sandrew	 * Maxmem isn't the "maximum memory", it's one larger than the
810281494Sandrew	 * highest page of the physical address space.  It should be
811281494Sandrew	 * called something like "Maxphyspage".
812281494Sandrew	 */
813281494Sandrew	Maxmem = atop(phys_avail[avail_slot - 1]);
814281494Sandrew
815281494Sandrew	cpu_tlb_flushID();
816281494Sandrew}
817281494Sandrew
818281494Sandrew/*
819281494Sandrew *	Initialize a vm_page's machine-dependent fields.
820281494Sandrew */
821281494Sandrewvoid
822281494Sandrewpmap_page_init(vm_page_t m)
823281494Sandrew{
824281494Sandrew
825281494Sandrew	TAILQ_INIT(&m->md.pv_list);
826281494Sandrew	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
827281494Sandrew}
828281494Sandrew
829281494Sandrew/*
830281494Sandrew *	Initialize the pmap module.
831281494Sandrew *	Called by vm_init, to initialize any structures that the pmap
832281494Sandrew *	system needs to map virtual memory.
833281494Sandrew */
834281494Sandrewvoid
835281494Sandrewpmap_init(void)
836281494Sandrew{
837281494Sandrew	int i;
838281494Sandrew
839281494Sandrew	/*
840281494Sandrew	 * Initialize the pv chunk list mutex.
841281494Sandrew	 */
842281494Sandrew	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
843281494Sandrew
844281494Sandrew	/*
845281494Sandrew	 * Initialize the pool of pv list locks.
846281494Sandrew	 */
847281494Sandrew	for (i = 0; i < NPV_LIST_LOCKS; i++)
848281494Sandrew		rw_init(&pv_list_locks[i], "pmap pv list");
849281494Sandrew}
850281494Sandrew
851281494Sandrew/*
852305540Sandrew * Invalidate a single TLB entry.
853281494Sandrew */
854281494SandrewPMAP_INLINE void
855281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va)
856281494Sandrew{
857281494Sandrew
858281494Sandrew	sched_pin();
859281494Sandrew	__asm __volatile(
860305540Sandrew	    "dsb  ishst		\n"
861281494Sandrew	    "tlbi vaae1is, %0	\n"
862305540Sandrew	    "dsb  ish		\n"
863281494Sandrew	    "isb		\n"
864281494Sandrew	    : : "r"(va >> PAGE_SHIFT));
865281494Sandrew	sched_unpin();
866281494Sandrew}
867281494Sandrew
868281494SandrewPMAP_INLINE void
869281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
870281494Sandrew{
871281494Sandrew	vm_offset_t addr;
872281494Sandrew
873281494Sandrew	sched_pin();
874305540Sandrew	dsb(ishst);
875296828Swma	for (addr = sva; addr < eva; addr += PAGE_SIZE) {
876281494Sandrew		__asm __volatile(
877296828Swma		    "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
878281494Sandrew	}
879281494Sandrew	__asm __volatile(
880305540Sandrew	    "dsb  ish	\n"
881281494Sandrew	    "isb	\n");
882281494Sandrew	sched_unpin();
883281494Sandrew}
884281494Sandrew
885281494SandrewPMAP_INLINE void
886281494Sandrewpmap_invalidate_all(pmap_t pmap)
887281494Sandrew{
888281494Sandrew
889281494Sandrew	sched_pin();
890281494Sandrew	__asm __volatile(
891305540Sandrew	    "dsb  ishst		\n"
892281494Sandrew	    "tlbi vmalle1is	\n"
893305540Sandrew	    "dsb  ish		\n"
894281494Sandrew	    "isb		\n");
895281494Sandrew	sched_unpin();
896281494Sandrew}
897281494Sandrew
898281494Sandrew/*
899281494Sandrew *	Routine:	pmap_extract
900281494Sandrew *	Function:
901281494Sandrew *		Extract the physical page address associated
902281494Sandrew *		with the given map/virtual_address pair.
903281494Sandrew */
904305531Sandrewvm_paddr_t
905281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va)
906281494Sandrew{
907297446Sandrew	pt_entry_t *pte, tpte;
908281494Sandrew	vm_paddr_t pa;
909297446Sandrew	int lvl;
910281494Sandrew
911281494Sandrew	pa = 0;
912281494Sandrew	PMAP_LOCK(pmap);
913281494Sandrew	/*
914297446Sandrew	 * Find the block or page map for this virtual address. pmap_pte
915297446Sandrew	 * will return either a valid block/page entry, or NULL.
916281494Sandrew	 */
917297446Sandrew	pte = pmap_pte(pmap, va, &lvl);
918297446Sandrew	if (pte != NULL) {
919297446Sandrew		tpte = pmap_load(pte);
920297446Sandrew		pa = tpte & ~ATTR_MASK;
921297446Sandrew		switch(lvl) {
922297446Sandrew		case 1:
923297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
924297446Sandrew			    ("pmap_extract: Invalid L1 pte found: %lx",
925297446Sandrew			    tpte & ATTR_DESCR_MASK));
926297446Sandrew			pa |= (va & L1_OFFSET);
927297446Sandrew			break;
928297446Sandrew		case 2:
929297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
930297446Sandrew			    ("pmap_extract: Invalid L2 pte found: %lx",
931297446Sandrew			    tpte & ATTR_DESCR_MASK));
932297446Sandrew			pa |= (va & L2_OFFSET);
933297446Sandrew			break;
934297446Sandrew		case 3:
935297446Sandrew			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
936297446Sandrew			    ("pmap_extract: Invalid L3 pte found: %lx",
937297446Sandrew			    tpte & ATTR_DESCR_MASK));
938297446Sandrew			pa |= (va & L3_OFFSET);
939297446Sandrew			break;
940297446Sandrew		}
941281494Sandrew	}
942281494Sandrew	PMAP_UNLOCK(pmap);
943281494Sandrew	return (pa);
944281494Sandrew}
945281494Sandrew
946281494Sandrew/*
947281494Sandrew *	Routine:	pmap_extract_and_hold
948281494Sandrew *	Function:
949281494Sandrew *		Atomically extract and hold the physical page
950281494Sandrew *		with the given pmap and virtual address pair
951281494Sandrew *		if that mapping permits the given protection.
952281494Sandrew */
953281494Sandrewvm_page_t
954281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
955281494Sandrew{
956297446Sandrew	pt_entry_t *pte, tpte;
957281494Sandrew	vm_paddr_t pa;
958281494Sandrew	vm_page_t m;
959297446Sandrew	int lvl;
960281494Sandrew
961281494Sandrew	pa = 0;
962281494Sandrew	m = NULL;
963281494Sandrew	PMAP_LOCK(pmap);
964281494Sandrewretry:
965297446Sandrew	pte = pmap_pte(pmap, va, &lvl);
966297446Sandrew	if (pte != NULL) {
967297446Sandrew		tpte = pmap_load(pte);
968297446Sandrew
969297446Sandrew		KASSERT(lvl > 0 && lvl <= 3,
970297446Sandrew		    ("pmap_extract_and_hold: Invalid level %d", lvl));
971297446Sandrew		CTASSERT(L1_BLOCK == L2_BLOCK);
972297446Sandrew		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
973297446Sandrew		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
974297446Sandrew		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
975297446Sandrew		     tpte & ATTR_DESCR_MASK));
976297446Sandrew		if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
977281494Sandrew		    ((prot & VM_PROT_WRITE) == 0)) {
978297446Sandrew			if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
979281494Sandrew				goto retry;
980297446Sandrew			m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
981281494Sandrew			vm_page_hold(m);
982281494Sandrew		}
983281494Sandrew	}
984281494Sandrew	PA_UNLOCK_COND(pa);
985281494Sandrew	PMAP_UNLOCK(pmap);
986281494Sandrew	return (m);
987281494Sandrew}
988281494Sandrew
989281494Sandrewvm_paddr_t
990281494Sandrewpmap_kextract(vm_offset_t va)
991281494Sandrew{
992297446Sandrew	pt_entry_t *pte, tpte;
993281494Sandrew	vm_paddr_t pa;
994297446Sandrew	int lvl;
995281494Sandrew
996281494Sandrew	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
997281494Sandrew		pa = DMAP_TO_PHYS(va);
998281494Sandrew	} else {
999297446Sandrew		pa = 0;
1000297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1001297446Sandrew		if (pte != NULL) {
1002297446Sandrew			tpte = pmap_load(pte);
1003297446Sandrew			pa = tpte & ~ATTR_MASK;
1004297446Sandrew			switch(lvl) {
1005297446Sandrew			case 1:
1006297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
1007297446Sandrew				    ("pmap_kextract: Invalid L1 pte found: %lx",
1008297446Sandrew				    tpte & ATTR_DESCR_MASK));
1009297446Sandrew				pa |= (va & L1_OFFSET);
1010297446Sandrew				break;
1011297446Sandrew			case 2:
1012297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
1013297446Sandrew				    ("pmap_kextract: Invalid L2 pte found: %lx",
1014297446Sandrew				    tpte & ATTR_DESCR_MASK));
1015297446Sandrew				pa |= (va & L2_OFFSET);
1016297446Sandrew				break;
1017297446Sandrew			case 3:
1018297446Sandrew				KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
1019297446Sandrew				    ("pmap_kextract: Invalid L3 pte found: %lx",
1020297446Sandrew				    tpte & ATTR_DESCR_MASK));
1021297446Sandrew				pa |= (va & L3_OFFSET);
1022297446Sandrew				break;
1023297446Sandrew			}
1024297446Sandrew		}
1025281494Sandrew	}
1026281494Sandrew	return (pa);
1027281494Sandrew}
1028281494Sandrew
1029281494Sandrew/***************************************************
1030281494Sandrew * Low level mapping routines.....
1031281494Sandrew ***************************************************/
1032281494Sandrew
1033305542Sandrewstatic void
1034305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
1035281494Sandrew{
1036297446Sandrew	pd_entry_t *pde;
1037297446Sandrew	pt_entry_t *pte;
1038285212Sandrew	vm_offset_t va;
1039297446Sandrew	int lvl;
1040281494Sandrew
1041281494Sandrew	KASSERT((pa & L3_OFFSET) == 0,
1042305542Sandrew	   ("pmap_kenter: Invalid physical address"));
1043285212Sandrew	KASSERT((sva & L3_OFFSET) == 0,
1044305542Sandrew	   ("pmap_kenter: Invalid virtual address"));
1045281494Sandrew	KASSERT((size & PAGE_MASK) == 0,
1046305542Sandrew	    ("pmap_kenter: Mapping is not page-sized"));
1047281494Sandrew
1048285212Sandrew	va = sva;
1049281494Sandrew	while (size != 0) {
1050297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
1051297446Sandrew		KASSERT(pde != NULL,
1052305542Sandrew		    ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
1053305542Sandrew		KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
1054297446Sandrew
1055297446Sandrew		pte = pmap_l2_to_l3(pde, va);
1056297446Sandrew		pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
1057305542Sandrew		    ATTR_IDX(mode) | L3_PAGE);
1058297446Sandrew		PTE_SYNC(pte);
1059281494Sandrew
1060281494Sandrew		va += PAGE_SIZE;
1061281494Sandrew		pa += PAGE_SIZE;
1062281494Sandrew		size -= PAGE_SIZE;
1063281494Sandrew	}
1064285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1065281494Sandrew}
1066281494Sandrew
1067305542Sandrewvoid
1068305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
1069305542Sandrew{
1070305542Sandrew
1071305542Sandrew	pmap_kenter(sva, size, pa, DEVICE_MEMORY);
1072305542Sandrew}
1073305542Sandrew
1074281494Sandrew/*
1075281494Sandrew * Remove a page from the kernel pagetables.
1076281494Sandrew */
1077281494SandrewPMAP_INLINE void
1078281494Sandrewpmap_kremove(vm_offset_t va)
1079281494Sandrew{
1080297446Sandrew	pt_entry_t *pte;
1081297446Sandrew	int lvl;
1082281494Sandrew
1083297446Sandrew	pte = pmap_pte(kernel_pmap, va, &lvl);
1084297446Sandrew	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
1085297446Sandrew	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
1086281494Sandrew
1087297446Sandrew	if (pmap_l3_valid_cacheable(pmap_load(pte)))
1088281494Sandrew		cpu_dcache_wb_range(va, L3_SIZE);
1089297446Sandrew	pmap_load_clear(pte);
1090297446Sandrew	PTE_SYNC(pte);
1091285212Sandrew	pmap_invalidate_page(kernel_pmap, va);
1092281494Sandrew}
1093281494Sandrew
1094281494Sandrewvoid
1095285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size)
1096281494Sandrew{
1097297446Sandrew	pt_entry_t *pte;
1098285212Sandrew	vm_offset_t va;
1099297446Sandrew	int lvl;
1100281494Sandrew
1101285212Sandrew	KASSERT((sva & L3_OFFSET) == 0,
1102281494Sandrew	   ("pmap_kremove_device: Invalid virtual address"));
1103281494Sandrew	KASSERT((size & PAGE_MASK) == 0,
1104281494Sandrew	    ("pmap_kremove_device: Mapping is not page-sized"));
1105281494Sandrew
1106285212Sandrew	va = sva;
1107281494Sandrew	while (size != 0) {
1108297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1109297446Sandrew		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
1110297446Sandrew		KASSERT(lvl == 3,
1111297446Sandrew		    ("Invalid device pagetable level: %d != 3", lvl));
1112297446Sandrew		pmap_load_clear(pte);
1113297446Sandrew		PTE_SYNC(pte);
1114281494Sandrew
1115281494Sandrew		va += PAGE_SIZE;
1116281494Sandrew		size -= PAGE_SIZE;
1117281494Sandrew	}
1118285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1119281494Sandrew}
1120281494Sandrew
1121281494Sandrew/*
1122281494Sandrew *	Used to map a range of physical addresses into kernel
1123281494Sandrew *	virtual address space.
1124281494Sandrew *
1125281494Sandrew *	The value passed in '*virt' is a suggested virtual address for
1126281494Sandrew *	the mapping. Architectures which can support a direct-mapped
1127281494Sandrew *	physical to virtual region can return the appropriate address
1128281494Sandrew *	within that region, leaving '*virt' unchanged. Other
1129281494Sandrew *	architectures should map the pages starting at '*virt' and
1130281494Sandrew *	update '*virt' with the first usable address after the mapped
1131281494Sandrew *	region.
1132281494Sandrew */
1133281494Sandrewvm_offset_t
1134281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1135281494Sandrew{
1136281494Sandrew	return PHYS_TO_DMAP(start);
1137281494Sandrew}
1138281494Sandrew
1139281494Sandrew
1140281494Sandrew/*
1141281494Sandrew * Add a list of wired pages to the kva
1142281494Sandrew * this routine is only used for temporary
1143281494Sandrew * kernel mappings that do not need to have
1144281494Sandrew * page modification or references recorded.
1145281494Sandrew * Note that old mappings are simply written
1146281494Sandrew * over.  The page *must* be wired.
1147281494Sandrew * Note: SMP coherent.  Uses a ranged shootdown IPI.
1148281494Sandrew */
1149281494Sandrewvoid
1150281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
1151281494Sandrew{
1152297446Sandrew	pd_entry_t *pde;
1153297446Sandrew	pt_entry_t *pte, pa;
1154281494Sandrew	vm_offset_t va;
1155281494Sandrew	vm_page_t m;
1156297446Sandrew	int i, lvl;
1157281494Sandrew
1158281494Sandrew	va = sva;
1159281494Sandrew	for (i = 0; i < count; i++) {
1160297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
1161297446Sandrew		KASSERT(pde != NULL,
1162297446Sandrew		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
1163297446Sandrew		KASSERT(lvl == 2,
1164297446Sandrew		    ("pmap_qenter: Invalid level %d", lvl));
1165297446Sandrew
1166281494Sandrew		m = ma[i];
1167285537Sandrew		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
1168285537Sandrew		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
1169297446Sandrew		pte = pmap_l2_to_l3(pde, va);
1170297446Sandrew		pmap_load_store(pte, pa);
1171297446Sandrew		PTE_SYNC(pte);
1172281494Sandrew
1173281494Sandrew		va += L3_SIZE;
1174281494Sandrew	}
1175285212Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1176281494Sandrew}
1177281494Sandrew
1178281494Sandrew/*
1179281494Sandrew * This routine tears out page mappings from the
1180281494Sandrew * kernel -- it is meant only for temporary mappings.
1181281494Sandrew */
1182281494Sandrewvoid
1183281494Sandrewpmap_qremove(vm_offset_t sva, int count)
1184281494Sandrew{
1185297446Sandrew	pt_entry_t *pte;
1186281494Sandrew	vm_offset_t va;
1187297446Sandrew	int lvl;
1188281494Sandrew
1189285212Sandrew	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1190285212Sandrew
1191281494Sandrew	va = sva;
1192281494Sandrew	while (count-- > 0) {
1193297446Sandrew		pte = pmap_pte(kernel_pmap, va, &lvl);
1194297446Sandrew		KASSERT(lvl == 3,
1195297446Sandrew		    ("Invalid device pagetable level: %d != 3", lvl));
1196297446Sandrew		if (pte != NULL) {
1197297446Sandrew			if (pmap_l3_valid_cacheable(pmap_load(pte)))
1198297446Sandrew				cpu_dcache_wb_range(va, L3_SIZE);
1199297446Sandrew			pmap_load_clear(pte);
1200297446Sandrew			PTE_SYNC(pte);
1201297446Sandrew		}
1202285212Sandrew
1203281494Sandrew		va += PAGE_SIZE;
1204281494Sandrew	}
1205281494Sandrew	pmap_invalidate_range(kernel_pmap, sva, va);
1206281494Sandrew}
1207281494Sandrew
1208281494Sandrew/***************************************************
1209281494Sandrew * Page table page management routines.....
1210281494Sandrew ***************************************************/
1211281494Sandrewstatic __inline void
1212281494Sandrewpmap_free_zero_pages(struct spglist *free)
1213281494Sandrew{
1214281494Sandrew	vm_page_t m;
1215281494Sandrew
1216281494Sandrew	while ((m = SLIST_FIRST(free)) != NULL) {
1217281494Sandrew		SLIST_REMOVE_HEAD(free, plinks.s.ss);
1218281494Sandrew		/* Preserve the page's PG_ZERO setting. */
1219281494Sandrew		vm_page_free_toq(m);
1220281494Sandrew	}
1221281494Sandrew}
1222281494Sandrew
1223281494Sandrew/*
1224281494Sandrew * Schedule the specified unused page table page to be freed.  Specifically,
1225281494Sandrew * add the page to the specified list of pages that will be released to the
1226281494Sandrew * physical memory manager after the TLB has been updated.
1227281494Sandrew */
1228281494Sandrewstatic __inline void
1229281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1230281494Sandrew    boolean_t set_PG_ZERO)
1231281494Sandrew{
1232281494Sandrew
1233281494Sandrew	if (set_PG_ZERO)
1234281494Sandrew		m->flags |= PG_ZERO;
1235281494Sandrew	else
1236281494Sandrew		m->flags &= ~PG_ZERO;
1237281494Sandrew	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1238281494Sandrew}
1239305531Sandrew
1240281494Sandrew/*
1241281494Sandrew * Decrements a page table page's wire count, which is used to record the
1242281494Sandrew * number of valid page table entries within the page.  If the wire count
1243281494Sandrew * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1244281494Sandrew * page table page was unmapped and FALSE otherwise.
1245281494Sandrew */
1246281494Sandrewstatic inline boolean_t
1247281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1248281494Sandrew{
1249281494Sandrew
1250281494Sandrew	--m->wire_count;
1251281494Sandrew	if (m->wire_count == 0) {
1252281494Sandrew		_pmap_unwire_l3(pmap, va, m, free);
1253281494Sandrew		return (TRUE);
1254281494Sandrew	} else
1255281494Sandrew		return (FALSE);
1256281494Sandrew}
1257281494Sandrew
1258281494Sandrewstatic void
1259281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1260281494Sandrew{
1261281494Sandrew
1262281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1263281494Sandrew	/*
1264281494Sandrew	 * unmap the page table page
1265281494Sandrew	 */
1266297446Sandrew	if (m->pindex >= (NUL2E + NUL1E)) {
1267297446Sandrew		/* l1 page */
1268297446Sandrew		pd_entry_t *l0;
1269297446Sandrew
1270297446Sandrew		l0 = pmap_l0(pmap, va);
1271297446Sandrew		pmap_load_clear(l0);
1272297446Sandrew		PTE_SYNC(l0);
1273297446Sandrew	} else if (m->pindex >= NUL2E) {
1274297446Sandrew		/* l2 page */
1275281494Sandrew		pd_entry_t *l1;
1276297446Sandrew
1277281494Sandrew		l1 = pmap_l1(pmap, va);
1278281494Sandrew		pmap_load_clear(l1);
1279281494Sandrew		PTE_SYNC(l1);
1280281494Sandrew	} else {
1281297446Sandrew		/* l3 page */
1282281494Sandrew		pd_entry_t *l2;
1283297446Sandrew
1284281494Sandrew		l2 = pmap_l2(pmap, va);
1285281494Sandrew		pmap_load_clear(l2);
1286281494Sandrew		PTE_SYNC(l2);
1287281494Sandrew	}
1288281494Sandrew	pmap_resident_count_dec(pmap, 1);
1289297446Sandrew	if (m->pindex < NUL2E) {
1290297446Sandrew		/* We just released an l3, unhold the matching l2 */
1291297446Sandrew		pd_entry_t *l1, tl1;
1292297446Sandrew		vm_page_t l2pg;
1293281494Sandrew
1294297446Sandrew		l1 = pmap_l1(pmap, va);
1295297446Sandrew		tl1 = pmap_load(l1);
1296297446Sandrew		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1297297446Sandrew		pmap_unwire_l3(pmap, va, l2pg, free);
1298297446Sandrew	} else if (m->pindex < (NUL2E + NUL1E)) {
1299297446Sandrew		/* We just released an l2, unhold the matching l1 */
1300297446Sandrew		pd_entry_t *l0, tl0;
1301297446Sandrew		vm_page_t l1pg;
1302297446Sandrew
1303297446Sandrew		l0 = pmap_l0(pmap, va);
1304297446Sandrew		tl0 = pmap_load(l0);
1305297446Sandrew		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1306297446Sandrew		pmap_unwire_l3(pmap, va, l1pg, free);
1307281494Sandrew	}
1308285212Sandrew	pmap_invalidate_page(pmap, va);
1309281494Sandrew
1310281494Sandrew	/*
1311281494Sandrew	 * This is a release store so that the ordinary store unmapping
1312281494Sandrew	 * the page table page is globally performed before TLB shoot-
1313281494Sandrew	 * down is begun.
1314281494Sandrew	 */
1315281494Sandrew	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1316281494Sandrew
1317305531Sandrew	/*
1318281494Sandrew	 * Put page on a list so that it is released after
1319281494Sandrew	 * *ALL* TLB shootdown is done
1320281494Sandrew	 */
1321281494Sandrew	pmap_add_delayed_free_list(m, free, TRUE);
1322281494Sandrew}
1323281494Sandrew
1324281494Sandrew/*
1325281494Sandrew * After removing an l3 entry, this routine is used to
1326281494Sandrew * conditionally free the page, and manage the hold/wire counts.
1327281494Sandrew */
1328281494Sandrewstatic int
1329281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1330281494Sandrew    struct spglist *free)
1331281494Sandrew{
1332281494Sandrew	vm_page_t mpte;
1333281494Sandrew
1334281494Sandrew	if (va >= VM_MAXUSER_ADDRESS)
1335281494Sandrew		return (0);
1336281494Sandrew	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1337281494Sandrew	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1338281494Sandrew	return (pmap_unwire_l3(pmap, va, mpte, free));
1339281494Sandrew}
1340281494Sandrew
1341281494Sandrewvoid
1342281494Sandrewpmap_pinit0(pmap_t pmap)
1343281494Sandrew{
1344281494Sandrew
1345281494Sandrew	PMAP_LOCK_INIT(pmap);
1346281494Sandrew	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1347297446Sandrew	pmap->pm_l0 = kernel_pmap->pm_l0;
1348281494Sandrew}
1349281494Sandrew
1350281494Sandrewint
1351281494Sandrewpmap_pinit(pmap_t pmap)
1352281494Sandrew{
1353297446Sandrew	vm_paddr_t l0phys;
1354297446Sandrew	vm_page_t l0pt;
1355281494Sandrew
1356281494Sandrew	/*
1357297446Sandrew	 * allocate the l0 page
1358281494Sandrew	 */
1359297446Sandrew	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1360281494Sandrew	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1361281494Sandrew		VM_WAIT;
1362281494Sandrew
1363297446Sandrew	l0phys = VM_PAGE_TO_PHYS(l0pt);
1364297446Sandrew	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
1365281494Sandrew
1366297446Sandrew	if ((l0pt->flags & PG_ZERO) == 0)
1367297446Sandrew		pagezero(pmap->pm_l0);
1368281494Sandrew
1369281494Sandrew	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1370281494Sandrew
1371281494Sandrew	return (1);
1372281494Sandrew}
1373281494Sandrew
1374281494Sandrew/*
1375281494Sandrew * This routine is called if the desired page table page does not exist.
1376281494Sandrew *
1377281494Sandrew * If page table page allocation fails, this routine may sleep before
1378281494Sandrew * returning NULL.  It sleeps only if a lock pointer was given.
1379281494Sandrew *
1380281494Sandrew * Note: If a page allocation fails at page table level two or three,
1381281494Sandrew * one or two pages may be held during the wait, only to be released
1382281494Sandrew * afterwards.  This conservative approach is easily argued to avoid
1383281494Sandrew * race conditions.
1384281494Sandrew */
1385281494Sandrewstatic vm_page_t
1386281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1387281494Sandrew{
1388297446Sandrew	vm_page_t m, l1pg, l2pg;
1389281494Sandrew
1390281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1391281494Sandrew
1392281494Sandrew	/*
1393281494Sandrew	 * Allocate a page table page.
1394281494Sandrew	 */
1395281494Sandrew	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1396281494Sandrew	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1397281494Sandrew		if (lockp != NULL) {
1398281494Sandrew			RELEASE_PV_LIST_LOCK(lockp);
1399281494Sandrew			PMAP_UNLOCK(pmap);
1400281494Sandrew			VM_WAIT;
1401281494Sandrew			PMAP_LOCK(pmap);
1402281494Sandrew		}
1403281494Sandrew
1404281494Sandrew		/*
1405281494Sandrew		 * Indicate the need to retry.  While waiting, the page table
1406281494Sandrew		 * page may have been allocated.
1407281494Sandrew		 */
1408281494Sandrew		return (NULL);
1409281494Sandrew	}
1410281494Sandrew	if ((m->flags & PG_ZERO) == 0)
1411281494Sandrew		pmap_zero_page(m);
1412281494Sandrew
1413281494Sandrew	/*
1414281494Sandrew	 * Map the pagetable page into the process address space, if
1415281494Sandrew	 * it isn't already there.
1416281494Sandrew	 */
1417281494Sandrew
1418297446Sandrew	if (ptepindex >= (NUL2E + NUL1E)) {
1419297446Sandrew		pd_entry_t *l0;
1420297446Sandrew		vm_pindex_t l0index;
1421281494Sandrew
1422297446Sandrew		l0index = ptepindex - (NUL2E + NUL1E);
1423297446Sandrew		l0 = &pmap->pm_l0[l0index];
1424297446Sandrew		pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
1425297446Sandrew		PTE_SYNC(l0);
1426297446Sandrew	} else if (ptepindex >= NUL2E) {
1427297446Sandrew		vm_pindex_t l0index, l1index;
1428297446Sandrew		pd_entry_t *l0, *l1;
1429297446Sandrew		pd_entry_t tl0;
1430297446Sandrew
1431297446Sandrew		l1index = ptepindex - NUL2E;
1432297446Sandrew		l0index = l1index >> L0_ENTRIES_SHIFT;
1433297446Sandrew
1434297446Sandrew		l0 = &pmap->pm_l0[l0index];
1435297446Sandrew		tl0 = pmap_load(l0);
1436297446Sandrew		if (tl0 == 0) {
1437297446Sandrew			/* recurse for allocating page dir */
1438297446Sandrew			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
1439297446Sandrew			    lockp) == NULL) {
1440297446Sandrew				--m->wire_count;
1441297446Sandrew				/* XXX: release mem barrier? */
1442297446Sandrew				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1443297446Sandrew				vm_page_free_zero(m);
1444297446Sandrew				return (NULL);
1445297446Sandrew			}
1446297446Sandrew		} else {
1447297446Sandrew			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1448297446Sandrew			l1pg->wire_count++;
1449297446Sandrew		}
1450297446Sandrew
1451297446Sandrew		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
1452297446Sandrew		l1 = &l1[ptepindex & Ln_ADDR_MASK];
1453281494Sandrew		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1454281494Sandrew		PTE_SYNC(l1);
1455281494Sandrew	} else {
1456297446Sandrew		vm_pindex_t l0index, l1index;
1457297446Sandrew		pd_entry_t *l0, *l1, *l2;
1458297446Sandrew		pd_entry_t tl0, tl1;
1459281494Sandrew
1460297446Sandrew		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
1461297446Sandrew		l0index = l1index >> L0_ENTRIES_SHIFT;
1462297446Sandrew
1463297446Sandrew		l0 = &pmap->pm_l0[l0index];
1464297446Sandrew		tl0 = pmap_load(l0);
1465297446Sandrew		if (tl0 == 0) {
1466281494Sandrew			/* recurse for allocating page dir */
1467297446Sandrew			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1468281494Sandrew			    lockp) == NULL) {
1469281494Sandrew				--m->wire_count;
1470281494Sandrew				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1471281494Sandrew				vm_page_free_zero(m);
1472281494Sandrew				return (NULL);
1473281494Sandrew			}
1474297446Sandrew			tl0 = pmap_load(l0);
1475297446Sandrew			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1476297446Sandrew			l1 = &l1[l1index & Ln_ADDR_MASK];
1477281494Sandrew		} else {
1478297446Sandrew			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1479297446Sandrew			l1 = &l1[l1index & Ln_ADDR_MASK];
1480297446Sandrew			tl1 = pmap_load(l1);
1481297446Sandrew			if (tl1 == 0) {
1482297446Sandrew				/* recurse for allocating page dir */
1483297446Sandrew				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1484297446Sandrew				    lockp) == NULL) {
1485297446Sandrew					--m->wire_count;
1486297446Sandrew					/* XXX: release mem barrier? */
1487297446Sandrew					atomic_subtract_int(
1488297446Sandrew					    &vm_cnt.v_wire_count, 1);
1489297446Sandrew					vm_page_free_zero(m);
1490297446Sandrew					return (NULL);
1491297446Sandrew				}
1492297446Sandrew			} else {
1493297446Sandrew				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1494297446Sandrew				l2pg->wire_count++;
1495297446Sandrew			}
1496281494Sandrew		}
1497281494Sandrew
1498288445Sandrew		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
1499281494Sandrew		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1500285537Sandrew		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
1501281494Sandrew		PTE_SYNC(l2);
1502281494Sandrew	}
1503281494Sandrew
1504281494Sandrew	pmap_resident_count_inc(pmap, 1);
1505281494Sandrew
1506281494Sandrew	return (m);
1507281494Sandrew}
1508281494Sandrew
1509281494Sandrewstatic vm_page_t
1510281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1511281494Sandrew{
1512281494Sandrew	vm_pindex_t ptepindex;
1513297446Sandrew	pd_entry_t *pde, tpde;
1514281494Sandrew	vm_page_t m;
1515297446Sandrew	int lvl;
1516281494Sandrew
1517281494Sandrew	/*
1518281494Sandrew	 * Calculate pagetable page index
1519281494Sandrew	 */
1520281494Sandrew	ptepindex = pmap_l2_pindex(va);
1521281494Sandrewretry:
1522281494Sandrew	/*
1523281494Sandrew	 * Get the page directory entry
1524281494Sandrew	 */
1525297446Sandrew	pde = pmap_pde(pmap, va, &lvl);
1526281494Sandrew
1527281494Sandrew	/*
1528297446Sandrew	 * If the page table page is mapped, we just increment the hold count,
1529297446Sandrew	 * and activate it. If we get a level 2 pde it will point to a level 3
1530297446Sandrew	 * table.
1531281494Sandrew	 */
1532297446Sandrew	if (lvl == 2) {
1533297446Sandrew		tpde = pmap_load(pde);
1534297446Sandrew		if (tpde != 0) {
1535297446Sandrew			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
1536297446Sandrew			m->wire_count++;
1537297446Sandrew			return (m);
1538297446Sandrew		}
1539281494Sandrew	}
1540297446Sandrew
1541297446Sandrew	/*
1542297446Sandrew	 * Here if the pte page isn't mapped, or if it has been deallocated.
1543297446Sandrew	 */
1544297446Sandrew	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1545297446Sandrew	if (m == NULL && lockp != NULL)
1546297446Sandrew		goto retry;
1547297446Sandrew
1548281494Sandrew	return (m);
1549281494Sandrew}
1550281494Sandrew
1551281494Sandrew
1552281494Sandrew/***************************************************
1553281494Sandrew * Pmap allocation/deallocation routines.
1554281494Sandrew ***************************************************/
1555281494Sandrew
1556281494Sandrew/*
1557281494Sandrew * Release any resources held by the given physical map.
1558281494Sandrew * Called when a pmap initialized by pmap_pinit is being released.
1559281494Sandrew * Should only be called if the map contains no valid mappings.
1560281494Sandrew */
1561281494Sandrewvoid
1562281494Sandrewpmap_release(pmap_t pmap)
1563281494Sandrew{
1564281494Sandrew	vm_page_t m;
1565281494Sandrew
1566281494Sandrew	KASSERT(pmap->pm_stats.resident_count == 0,
1567281494Sandrew	    ("pmap_release: pmap resident count %ld != 0",
1568281494Sandrew	    pmap->pm_stats.resident_count));
1569281494Sandrew
1570297446Sandrew	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
1571281494Sandrew
1572281494Sandrew	m->wire_count--;
1573281494Sandrew	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1574281494Sandrew	vm_page_free_zero(m);
1575281494Sandrew}
1576281494Sandrew
1577281494Sandrew#if 0
1578281494Sandrewstatic int
1579281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS)
1580281494Sandrew{
1581281494Sandrew	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1582281494Sandrew
1583281494Sandrew	return sysctl_handle_long(oidp, &ksize, 0, req);
1584281494Sandrew}
1585305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1586281494Sandrew    0, 0, kvm_size, "LU", "Size of KVM");
1587281494Sandrew
1588281494Sandrewstatic int
1589281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS)
1590281494Sandrew{
1591281494Sandrew	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1592281494Sandrew
1593281494Sandrew	return sysctl_handle_long(oidp, &kfree, 0, req);
1594281494Sandrew}
1595305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1596281494Sandrew    0, 0, kvm_free, "LU", "Amount of KVM free");
1597281494Sandrew#endif /* 0 */
1598281494Sandrew
1599281494Sandrew/*
1600281494Sandrew * grow the number of kernel page table entries, if needed
1601281494Sandrew */
1602281494Sandrewvoid
1603281494Sandrewpmap_growkernel(vm_offset_t addr)
1604281494Sandrew{
1605281494Sandrew	vm_paddr_t paddr;
1606281494Sandrew	vm_page_t nkpg;
1607297446Sandrew	pd_entry_t *l0, *l1, *l2;
1608281494Sandrew
1609281494Sandrew	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1610281494Sandrew
1611281494Sandrew	addr = roundup2(addr, L2_SIZE);
1612281494Sandrew	if (addr - 1 >= kernel_map->max_offset)
1613281494Sandrew		addr = kernel_map->max_offset;
1614281494Sandrew	while (kernel_vm_end < addr) {
1615297446Sandrew		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
1616297446Sandrew		KASSERT(pmap_load(l0) != 0,
1617297446Sandrew		    ("pmap_growkernel: No level 0 kernel entry"));
1618297446Sandrew
1619297446Sandrew		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
1620285045Sandrew		if (pmap_load(l1) == 0) {
1621281494Sandrew			/* We need a new PDP entry */
1622281494Sandrew			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1623281494Sandrew			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1624281494Sandrew			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1625281494Sandrew			if (nkpg == NULL)
1626281494Sandrew				panic("pmap_growkernel: no memory to grow kernel");
1627281494Sandrew			if ((nkpg->flags & PG_ZERO) == 0)
1628281494Sandrew				pmap_zero_page(nkpg);
1629281494Sandrew			paddr = VM_PAGE_TO_PHYS(nkpg);
1630281494Sandrew			pmap_load_store(l1, paddr | L1_TABLE);
1631281494Sandrew			PTE_SYNC(l1);
1632281494Sandrew			continue; /* try again */
1633281494Sandrew		}
1634281494Sandrew		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1635285045Sandrew		if ((pmap_load(l2) & ATTR_AF) != 0) {
1636281494Sandrew			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1637281494Sandrew			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1638281494Sandrew				kernel_vm_end = kernel_map->max_offset;
1639305531Sandrew				break;
1640281494Sandrew			}
1641281494Sandrew			continue;
1642281494Sandrew		}
1643281494Sandrew
1644281494Sandrew		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1645281494Sandrew		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1646281494Sandrew		    VM_ALLOC_ZERO);
1647281494Sandrew		if (nkpg == NULL)
1648281494Sandrew			panic("pmap_growkernel: no memory to grow kernel");
1649281494Sandrew		if ((nkpg->flags & PG_ZERO) == 0)
1650281494Sandrew			pmap_zero_page(nkpg);
1651281494Sandrew		paddr = VM_PAGE_TO_PHYS(nkpg);
1652281494Sandrew		pmap_load_store(l2, paddr | L2_TABLE);
1653281494Sandrew		PTE_SYNC(l2);
1654285212Sandrew		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1655281494Sandrew
1656281494Sandrew		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1657281494Sandrew		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1658281494Sandrew			kernel_vm_end = kernel_map->max_offset;
1659305531Sandrew			break;
1660281494Sandrew		}
1661281494Sandrew	}
1662281494Sandrew}
1663281494Sandrew
1664281494Sandrew
1665281494Sandrew/***************************************************
1666281494Sandrew * page management routines.
1667281494Sandrew ***************************************************/
1668281494Sandrew
1669281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1670281494SandrewCTASSERT(_NPCM == 3);
1671281494SandrewCTASSERT(_NPCPV == 168);
1672281494Sandrew
1673281494Sandrewstatic __inline struct pv_chunk *
1674281494Sandrewpv_to_chunk(pv_entry_t pv)
1675281494Sandrew{
1676281494Sandrew
1677281494Sandrew	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1678281494Sandrew}
1679281494Sandrew
1680281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1681281494Sandrew
1682281494Sandrew#define	PC_FREE0	0xfffffffffffffffful
1683281494Sandrew#define	PC_FREE1	0xfffffffffffffffful
1684281494Sandrew#define	PC_FREE2	0x000000fffffffffful
1685281494Sandrew
1686281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1687281494Sandrew
1688281494Sandrew#if 0
1689281494Sandrew#ifdef PV_STATS
1690281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1691281494Sandrew
1692281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1693281494Sandrew	"Current number of pv entry chunks");
1694281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1695281494Sandrew	"Current number of pv entry chunks allocated");
1696281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1697281494Sandrew	"Current number of pv entry chunks frees");
1698281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1699281494Sandrew	"Number of times tried to get a chunk page but failed.");
1700281494Sandrew
1701281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1702281494Sandrewstatic int pv_entry_spare;
1703281494Sandrew
1704281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1705281494Sandrew	"Current number of pv entry frees");
1706281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1707281494Sandrew	"Current number of pv entry allocs");
1708281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1709281494Sandrew	"Current number of pv entries");
1710281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1711281494Sandrew	"Current number of spare pv entries");
1712281494Sandrew#endif
1713281494Sandrew#endif /* 0 */
1714281494Sandrew
1715281494Sandrew/*
1716281494Sandrew * We are in a serious low memory condition.  Resort to
1717281494Sandrew * drastic measures to free some pages so we can allocate
1718281494Sandrew * another pv entry chunk.
1719281494Sandrew *
1720281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap.
1721281494Sandrew *
1722281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will
1723281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby
1724281494Sandrew * exacerbating the shortage of free pv entries.
1725281494Sandrew */
1726281494Sandrewstatic vm_page_t
1727281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1728281494Sandrew{
1729281494Sandrew
1730286073Semaste	panic("ARM64TODO: reclaim_pv_chunk");
1731281494Sandrew}
1732281494Sandrew
1733281494Sandrew/*
1734281494Sandrew * free the pv_entry back to the free list
1735281494Sandrew */
1736281494Sandrewstatic void
1737281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv)
1738281494Sandrew{
1739281494Sandrew	struct pv_chunk *pc;
1740281494Sandrew	int idx, field, bit;
1741281494Sandrew
1742281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1743281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1744281494Sandrew	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1745281494Sandrew	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1746281494Sandrew	pc = pv_to_chunk(pv);
1747281494Sandrew	idx = pv - &pc->pc_pventry[0];
1748281494Sandrew	field = idx / 64;
1749281494Sandrew	bit = idx % 64;
1750281494Sandrew	pc->pc_map[field] |= 1ul << bit;
1751281494Sandrew	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1752281494Sandrew	    pc->pc_map[2] != PC_FREE2) {
1753281494Sandrew		/* 98% of the time, pc is already at the head of the list. */
1754281494Sandrew		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1755281494Sandrew			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1756281494Sandrew			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1757281494Sandrew		}
1758281494Sandrew		return;
1759281494Sandrew	}
1760281494Sandrew	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1761281494Sandrew	free_pv_chunk(pc);
1762281494Sandrew}
1763281494Sandrew
1764281494Sandrewstatic void
1765281494Sandrewfree_pv_chunk(struct pv_chunk *pc)
1766281494Sandrew{
1767281494Sandrew	vm_page_t m;
1768281494Sandrew
1769281494Sandrew	mtx_lock(&pv_chunks_mutex);
1770281494Sandrew 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1771281494Sandrew	mtx_unlock(&pv_chunks_mutex);
1772281494Sandrew	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1773281494Sandrew	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1774281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1775281494Sandrew	/* entire chunk is free, return it */
1776281494Sandrew	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1777281494Sandrew	dump_drop_page(m->phys_addr);
1778288256Salc	vm_page_unwire(m, PQ_NONE);
1779281494Sandrew	vm_page_free(m);
1780281494Sandrew}
1781281494Sandrew
1782281494Sandrew/*
1783281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when
1784281494Sandrew * needed.  If this PV chunk allocation fails and a PV list lock pointer was
1785281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
1786281494Sandrew * returned.
1787281494Sandrew *
1788281494Sandrew * The given PV list lock may be released.
1789281494Sandrew */
1790281494Sandrewstatic pv_entry_t
1791281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp)
1792281494Sandrew{
1793281494Sandrew	int bit, field;
1794281494Sandrew	pv_entry_t pv;
1795281494Sandrew	struct pv_chunk *pc;
1796281494Sandrew	vm_page_t m;
1797281494Sandrew
1798281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1799281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1800281494Sandrewretry:
1801281494Sandrew	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1802281494Sandrew	if (pc != NULL) {
1803281494Sandrew		for (field = 0; field < _NPCM; field++) {
1804281494Sandrew			if (pc->pc_map[field]) {
1805281494Sandrew				bit = ffsl(pc->pc_map[field]) - 1;
1806281494Sandrew				break;
1807281494Sandrew			}
1808281494Sandrew		}
1809281494Sandrew		if (field < _NPCM) {
1810281494Sandrew			pv = &pc->pc_pventry[field * 64 + bit];
1811281494Sandrew			pc->pc_map[field] &= ~(1ul << bit);
1812281494Sandrew			/* If this was the last item, move it to tail */
1813281494Sandrew			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1814281494Sandrew			    pc->pc_map[2] == 0) {
1815281494Sandrew				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1816281494Sandrew				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1817281494Sandrew				    pc_list);
1818281494Sandrew			}
1819281494Sandrew			PV_STAT(atomic_add_long(&pv_entry_count, 1));
1820281494Sandrew			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1821281494Sandrew			return (pv);
1822281494Sandrew		}
1823281494Sandrew	}
1824281494Sandrew	/* No free items, allocate another chunk */
1825281494Sandrew	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1826281494Sandrew	    VM_ALLOC_WIRED);
1827281494Sandrew	if (m == NULL) {
1828281494Sandrew		if (lockp == NULL) {
1829281494Sandrew			PV_STAT(pc_chunk_tryfail++);
1830281494Sandrew			return (NULL);
1831281494Sandrew		}
1832281494Sandrew		m = reclaim_pv_chunk(pmap, lockp);
1833281494Sandrew		if (m == NULL)
1834281494Sandrew			goto retry;
1835281494Sandrew	}
1836281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1837281494Sandrew	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1838281494Sandrew	dump_add_page(m->phys_addr);
1839281494Sandrew	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1840281494Sandrew	pc->pc_pmap = pmap;
1841281494Sandrew	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
1842281494Sandrew	pc->pc_map[1] = PC_FREE1;
1843281494Sandrew	pc->pc_map[2] = PC_FREE2;
1844281494Sandrew	mtx_lock(&pv_chunks_mutex);
1845281494Sandrew	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1846281494Sandrew	mtx_unlock(&pv_chunks_mutex);
1847281494Sandrew	pv = &pc->pc_pventry[0];
1848281494Sandrew	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1849281494Sandrew	PV_STAT(atomic_add_long(&pv_entry_count, 1));
1850281494Sandrew	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1851281494Sandrew	return (pv);
1852281494Sandrew}
1853281494Sandrew
1854281494Sandrew/*
1855281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual
1856281494Sandrew * address from the specified pv list.  Returns the pv entry if found and NULL
1857281494Sandrew * otherwise.  This operation can be performed on pv lists for either 4KB or
1858281494Sandrew * 2MB page mappings.
1859281494Sandrew */
1860281494Sandrewstatic __inline pv_entry_t
1861281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1862281494Sandrew{
1863281494Sandrew	pv_entry_t pv;
1864281494Sandrew
1865281494Sandrew	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1866281494Sandrew		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1867281494Sandrew			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1868281494Sandrew			pvh->pv_gen++;
1869281494Sandrew			break;
1870281494Sandrew		}
1871281494Sandrew	}
1872281494Sandrew	return (pv);
1873281494Sandrew}
1874281494Sandrew
1875281494Sandrew/*
1876281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual
1877281494Sandrew * address.  This operation can be performed on pv lists for either 4KB or 2MB
1878281494Sandrew * page mappings.
1879281494Sandrew */
1880281494Sandrewstatic void
1881281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1882281494Sandrew{
1883281494Sandrew	pv_entry_t pv;
1884281494Sandrew
1885281494Sandrew	pv = pmap_pvh_remove(pvh, pmap, va);
1886281494Sandrew	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1887281494Sandrew	free_pv_entry(pmap, pv);
1888281494Sandrew}
1889281494Sandrew
1890281494Sandrew/*
1891281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required
1892281494Sandrew * memory can be allocated without resorting to reclamation.
1893281494Sandrew */
1894281494Sandrewstatic boolean_t
1895281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1896281494Sandrew    struct rwlock **lockp)
1897281494Sandrew{
1898281494Sandrew	pv_entry_t pv;
1899281494Sandrew
1900281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1901281494Sandrew	/* Pass NULL instead of the lock pointer to disable reclamation. */
1902281494Sandrew	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1903281494Sandrew		pv->pv_va = va;
1904281494Sandrew		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1905281494Sandrew		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1906281494Sandrew		m->md.pv_gen++;
1907281494Sandrew		return (TRUE);
1908281494Sandrew	} else
1909281494Sandrew		return (FALSE);
1910281494Sandrew}
1911281494Sandrew
1912281494Sandrew/*
1913281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process
1914281494Sandrew */
1915281494Sandrewstatic int
1916305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1917281494Sandrew    pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1918281494Sandrew{
1919281494Sandrew	pt_entry_t old_l3;
1920281494Sandrew	vm_page_t m;
1921281494Sandrew
1922281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1923281494Sandrew	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1924281494Sandrew		cpu_dcache_wb_range(va, L3_SIZE);
1925281494Sandrew	old_l3 = pmap_load_clear(l3);
1926281494Sandrew	PTE_SYNC(l3);
1927285212Sandrew	pmap_invalidate_page(pmap, va);
1928281494Sandrew	if (old_l3 & ATTR_SW_WIRED)
1929281494Sandrew		pmap->pm_stats.wired_count -= 1;
1930281494Sandrew	pmap_resident_count_dec(pmap, 1);
1931281494Sandrew	if (old_l3 & ATTR_SW_MANAGED) {
1932281494Sandrew		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
1933281494Sandrew		if (pmap_page_dirty(old_l3))
1934281494Sandrew			vm_page_dirty(m);
1935281494Sandrew		if (old_l3 & ATTR_AF)
1936281494Sandrew			vm_page_aflag_set(m, PGA_REFERENCED);
1937281494Sandrew		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1938281494Sandrew		pmap_pvh_free(&m->md, pmap, va);
1939281494Sandrew	}
1940281494Sandrew	return (pmap_unuse_l3(pmap, va, l2e, free));
1941281494Sandrew}
1942281494Sandrew
1943281494Sandrew/*
1944281494Sandrew *	Remove the given range of addresses from the specified map.
1945281494Sandrew *
1946281494Sandrew *	It is assumed that the start and end are properly
1947281494Sandrew *	rounded to the page size.
1948281494Sandrew */
1949281494Sandrewvoid
1950281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1951281494Sandrew{
1952281494Sandrew	struct rwlock *lock;
1953281494Sandrew	vm_offset_t va, va_next;
1954297446Sandrew	pd_entry_t *l0, *l1, *l2;
1955281494Sandrew	pt_entry_t l3_paddr, *l3;
1956281494Sandrew	struct spglist free;
1957281494Sandrew	int anyvalid;
1958281494Sandrew
1959281494Sandrew	/*
1960281494Sandrew	 * Perform an unsynchronized read.  This is, however, safe.
1961281494Sandrew	 */
1962281494Sandrew	if (pmap->pm_stats.resident_count == 0)
1963281494Sandrew		return;
1964281494Sandrew
1965281494Sandrew	anyvalid = 0;
1966281494Sandrew	SLIST_INIT(&free);
1967281494Sandrew
1968281494Sandrew	PMAP_LOCK(pmap);
1969281494Sandrew
1970281494Sandrew	lock = NULL;
1971281494Sandrew	for (; sva < eva; sva = va_next) {
1972281494Sandrew
1973281494Sandrew		if (pmap->pm_stats.resident_count == 0)
1974281494Sandrew			break;
1975281494Sandrew
1976297446Sandrew		l0 = pmap_l0(pmap, sva);
1977297446Sandrew		if (pmap_load(l0) == 0) {
1978297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
1979297446Sandrew			if (va_next < sva)
1980297446Sandrew				va_next = eva;
1981297446Sandrew			continue;
1982297446Sandrew		}
1983297446Sandrew
1984297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
1985285045Sandrew		if (pmap_load(l1) == 0) {
1986281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1987281494Sandrew			if (va_next < sva)
1988281494Sandrew				va_next = eva;
1989281494Sandrew			continue;
1990281494Sandrew		}
1991281494Sandrew
1992281494Sandrew		/*
1993281494Sandrew		 * Calculate index for next page table.
1994281494Sandrew		 */
1995281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1996281494Sandrew		if (va_next < sva)
1997281494Sandrew			va_next = eva;
1998281494Sandrew
1999281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
2000281494Sandrew		if (l2 == NULL)
2001281494Sandrew			continue;
2002281494Sandrew
2003288445Sandrew		l3_paddr = pmap_load(l2);
2004281494Sandrew
2005281494Sandrew		/*
2006281494Sandrew		 * Weed out invalid mappings.
2007281494Sandrew		 */
2008281494Sandrew		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
2009281494Sandrew			continue;
2010281494Sandrew
2011281494Sandrew		/*
2012281494Sandrew		 * Limit our scan to either the end of the va represented
2013281494Sandrew		 * by the current page table page, or to the end of the
2014281494Sandrew		 * range being removed.
2015281494Sandrew		 */
2016281494Sandrew		if (va_next > eva)
2017281494Sandrew			va_next = eva;
2018281494Sandrew
2019281494Sandrew		va = va_next;
2020281494Sandrew		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2021281494Sandrew		    sva += L3_SIZE) {
2022281494Sandrew			if (l3 == NULL)
2023281494Sandrew				panic("l3 == NULL");
2024285045Sandrew			if (pmap_load(l3) == 0) {
2025281494Sandrew				if (va != va_next) {
2026281494Sandrew					pmap_invalidate_range(pmap, va, sva);
2027281494Sandrew					va = va_next;
2028281494Sandrew				}
2029281494Sandrew				continue;
2030281494Sandrew			}
2031281494Sandrew			if (va == va_next)
2032281494Sandrew				va = sva;
2033281494Sandrew			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
2034281494Sandrew			    &lock)) {
2035281494Sandrew				sva += L3_SIZE;
2036281494Sandrew				break;
2037281494Sandrew			}
2038281494Sandrew		}
2039281494Sandrew		if (va != va_next)
2040281494Sandrew			pmap_invalidate_range(pmap, va, sva);
2041281494Sandrew	}
2042281494Sandrew	if (lock != NULL)
2043281494Sandrew		rw_wunlock(lock);
2044281494Sandrew	if (anyvalid)
2045281494Sandrew		pmap_invalidate_all(pmap);
2046281494Sandrew	PMAP_UNLOCK(pmap);
2047281494Sandrew	pmap_free_zero_pages(&free);
2048281494Sandrew}
2049281494Sandrew
2050281494Sandrew/*
2051281494Sandrew *	Routine:	pmap_remove_all
2052281494Sandrew *	Function:
2053281494Sandrew *		Removes this physical page from
2054281494Sandrew *		all physical maps in which it resides.
2055281494Sandrew *		Reflects back modify bits to the pager.
2056281494Sandrew *
2057281494Sandrew *	Notes:
2058281494Sandrew *		Original versions of this routine were very
2059281494Sandrew *		inefficient because they iteratively called
2060281494Sandrew *		pmap_remove (slow...)
2061281494Sandrew */
2062281494Sandrew
2063281494Sandrewvoid
2064281494Sandrewpmap_remove_all(vm_page_t m)
2065281494Sandrew{
2066281494Sandrew	pv_entry_t pv;
2067281494Sandrew	pmap_t pmap;
2068305879Sandrew	struct rwlock *lock;
2069297446Sandrew	pd_entry_t *pde, tpde;
2070297446Sandrew	pt_entry_t *pte, tpte;
2071281494Sandrew	struct spglist free;
2072305879Sandrew	int lvl, md_gen;
2073281494Sandrew
2074281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2075281494Sandrew	    ("pmap_remove_all: page %p is not managed", m));
2076281494Sandrew	SLIST_INIT(&free);
2077305879Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2078305879Sandrewretry:
2079305879Sandrew	rw_wlock(lock);
2080281494Sandrew	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2081281494Sandrew		pmap = PV_PMAP(pv);
2082305879Sandrew		if (!PMAP_TRYLOCK(pmap)) {
2083305879Sandrew			md_gen = m->md.pv_gen;
2084305879Sandrew			rw_wunlock(lock);
2085305879Sandrew			PMAP_LOCK(pmap);
2086305879Sandrew			rw_wlock(lock);
2087305879Sandrew			if (md_gen != m->md.pv_gen) {
2088305879Sandrew				rw_wunlock(lock);
2089305879Sandrew				PMAP_UNLOCK(pmap);
2090305879Sandrew				goto retry;
2091305879Sandrew			}
2092305879Sandrew		}
2093281494Sandrew		pmap_resident_count_dec(pmap, 1);
2094297446Sandrew
2095297446Sandrew		pde = pmap_pde(pmap, pv->pv_va, &lvl);
2096297446Sandrew		KASSERT(pde != NULL,
2097297446Sandrew		    ("pmap_remove_all: no page directory entry found"));
2098297446Sandrew		KASSERT(lvl == 2,
2099297446Sandrew		    ("pmap_remove_all: invalid pde level %d", lvl));
2100297446Sandrew		tpde = pmap_load(pde);
2101297446Sandrew
2102297446Sandrew		pte = pmap_l2_to_l3(pde, pv->pv_va);
2103297446Sandrew		tpte = pmap_load(pte);
2104281494Sandrew		if (pmap_is_current(pmap) &&
2105297446Sandrew		    pmap_l3_valid_cacheable(tpte))
2106281494Sandrew			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2107297446Sandrew		pmap_load_clear(pte);
2108297446Sandrew		PTE_SYNC(pte);
2109285212Sandrew		pmap_invalidate_page(pmap, pv->pv_va);
2110297446Sandrew		if (tpte & ATTR_SW_WIRED)
2111281494Sandrew			pmap->pm_stats.wired_count--;
2112297446Sandrew		if ((tpte & ATTR_AF) != 0)
2113281494Sandrew			vm_page_aflag_set(m, PGA_REFERENCED);
2114281494Sandrew
2115281494Sandrew		/*
2116281494Sandrew		 * Update the vm_page_t clean and reference bits.
2117281494Sandrew		 */
2118297446Sandrew		if (pmap_page_dirty(tpte))
2119281494Sandrew			vm_page_dirty(m);
2120297446Sandrew		pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
2121281494Sandrew		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2122281494Sandrew		m->md.pv_gen++;
2123281494Sandrew		free_pv_entry(pmap, pv);
2124281494Sandrew		PMAP_UNLOCK(pmap);
2125281494Sandrew	}
2126281494Sandrew	vm_page_aflag_clear(m, PGA_WRITEABLE);
2127305879Sandrew	rw_wunlock(lock);
2128281494Sandrew	pmap_free_zero_pages(&free);
2129281494Sandrew}
2130281494Sandrew
2131281494Sandrew/*
2132281494Sandrew *	Set the physical protection on the
2133281494Sandrew *	specified range of this map as requested.
2134281494Sandrew */
2135281494Sandrewvoid
2136281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
2137281494Sandrew{
2138281494Sandrew	vm_offset_t va, va_next;
2139297446Sandrew	pd_entry_t *l0, *l1, *l2;
2140281494Sandrew	pt_entry_t *l3p, l3;
2141281494Sandrew
2142281494Sandrew	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2143281494Sandrew		pmap_remove(pmap, sva, eva);
2144281494Sandrew		return;
2145281494Sandrew	}
2146281494Sandrew
2147281494Sandrew	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
2148281494Sandrew		return;
2149281494Sandrew
2150281494Sandrew	PMAP_LOCK(pmap);
2151281494Sandrew	for (; sva < eva; sva = va_next) {
2152281494Sandrew
2153297446Sandrew		l0 = pmap_l0(pmap, sva);
2154297446Sandrew		if (pmap_load(l0) == 0) {
2155297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2156297446Sandrew			if (va_next < sva)
2157297446Sandrew				va_next = eva;
2158297446Sandrew			continue;
2159297446Sandrew		}
2160297446Sandrew
2161297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
2162285045Sandrew		if (pmap_load(l1) == 0) {
2163281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2164281494Sandrew			if (va_next < sva)
2165281494Sandrew				va_next = eva;
2166281494Sandrew			continue;
2167281494Sandrew		}
2168281494Sandrew
2169281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2170281494Sandrew		if (va_next < sva)
2171281494Sandrew			va_next = eva;
2172281494Sandrew
2173281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
2174288445Sandrew		if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
2175281494Sandrew			continue;
2176281494Sandrew
2177281494Sandrew		if (va_next > eva)
2178281494Sandrew			va_next = eva;
2179281494Sandrew
2180281494Sandrew		va = va_next;
2181281494Sandrew		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
2182281494Sandrew		    sva += L3_SIZE) {
2183281494Sandrew			l3 = pmap_load(l3p);
2184281494Sandrew			if (pmap_l3_valid(l3)) {
2185281494Sandrew				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
2186281494Sandrew				PTE_SYNC(l3p);
2187285212Sandrew				/* XXX: Use pmap_invalidate_range */
2188285212Sandrew				pmap_invalidate_page(pmap, va);
2189281494Sandrew			}
2190281494Sandrew		}
2191281494Sandrew	}
2192281494Sandrew	PMAP_UNLOCK(pmap);
2193281494Sandrew
2194281494Sandrew	/* TODO: Only invalidate entries we are touching */
2195281494Sandrew	pmap_invalidate_all(pmap);
2196281494Sandrew}
2197281494Sandrew
2198281494Sandrew/*
2199281494Sandrew *	Insert the given physical page (p) at
2200281494Sandrew *	the specified virtual address (v) in the
2201281494Sandrew *	target physical map with the protection requested.
2202281494Sandrew *
2203281494Sandrew *	If specified, the page will be wired down, meaning
2204281494Sandrew *	that the related pte can not be reclaimed.
2205281494Sandrew *
2206281494Sandrew *	NB:  This is the only routine which MAY NOT lazy-evaluate
2207281494Sandrew *	or lose information.  That is, this routine must actually
2208281494Sandrew *	insert this page into the given map NOW.
2209281494Sandrew */
2210281494Sandrewint
2211281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2212281494Sandrew    u_int flags, int8_t psind __unused)
2213281494Sandrew{
2214281494Sandrew	struct rwlock *lock;
2215297446Sandrew	pd_entry_t *pde;
2216281494Sandrew	pt_entry_t new_l3, orig_l3;
2217281494Sandrew	pt_entry_t *l3;
2218281494Sandrew	pv_entry_t pv;
2219297446Sandrew	vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
2220297446Sandrew	vm_page_t mpte, om, l1_m, l2_m, l3_m;
2221281494Sandrew	boolean_t nosleep;
2222297446Sandrew	int lvl;
2223281494Sandrew
2224281494Sandrew	va = trunc_page(va);
2225281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2226281494Sandrew		VM_OBJECT_ASSERT_LOCKED(m->object);
2227281494Sandrew	pa = VM_PAGE_TO_PHYS(m);
2228285537Sandrew	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2229285537Sandrew	    L3_PAGE);
2230281494Sandrew	if ((prot & VM_PROT_WRITE) == 0)
2231281494Sandrew		new_l3 |= ATTR_AP(ATTR_AP_RO);
2232281494Sandrew	if ((flags & PMAP_ENTER_WIRED) != 0)
2233281494Sandrew		new_l3 |= ATTR_SW_WIRED;
2234281494Sandrew	if ((va >> 63) == 0)
2235281494Sandrew		new_l3 |= ATTR_AP(ATTR_AP_USER);
2236281494Sandrew
2237285212Sandrew	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
2238285212Sandrew
2239281494Sandrew	mpte = NULL;
2240281494Sandrew
2241281494Sandrew	lock = NULL;
2242281494Sandrew	PMAP_LOCK(pmap);
2243281494Sandrew
2244281494Sandrew	if (va < VM_MAXUSER_ADDRESS) {
2245281494Sandrew		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
2246281494Sandrew		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
2247281494Sandrew		if (mpte == NULL && nosleep) {
2248285212Sandrew			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
2249281494Sandrew			if (lock != NULL)
2250281494Sandrew				rw_wunlock(lock);
2251281494Sandrew			PMAP_UNLOCK(pmap);
2252281494Sandrew			return (KERN_RESOURCE_SHORTAGE);
2253281494Sandrew		}
2254297446Sandrew		pde = pmap_pde(pmap, va, &lvl);
2255297446Sandrew		KASSERT(pde != NULL,
2256297446Sandrew		    ("pmap_enter: Invalid page entry, va: 0x%lx", va));
2257297446Sandrew		KASSERT(lvl == 2,
2258297446Sandrew		    ("pmap_enter: Invalid level %d", lvl));
2259297446Sandrew
2260297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
2261281494Sandrew	} else {
2262297446Sandrew		pde = pmap_pde(pmap, va, &lvl);
2263297446Sandrew		/*
2264297446Sandrew		 * If we get a level 2 pde it must point to a level 3 entry
2265297446Sandrew		 * otherwise we will need to create the intermediate tables
2266297446Sandrew		 */
2267297446Sandrew		if (lvl < 2) {
2268297446Sandrew			switch(lvl) {
2269297446Sandrew			default:
2270297446Sandrew			case -1:
2271297446Sandrew				/* Get the l0 pde to update */
2272297446Sandrew				pde = pmap_l0(pmap, va);
2273297446Sandrew				KASSERT(pde != NULL, ("..."));
2274281494Sandrew
2275297446Sandrew				l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2276297446Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2277297446Sandrew				    VM_ALLOC_ZERO);
2278297446Sandrew				if (l1_m == NULL)
2279297446Sandrew					panic("pmap_enter: l1 pte_m == NULL");
2280297446Sandrew				if ((l1_m->flags & PG_ZERO) == 0)
2281297446Sandrew					pmap_zero_page(l1_m);
2282297446Sandrew
2283297446Sandrew				l1_pa = VM_PAGE_TO_PHYS(l1_m);
2284297446Sandrew				pmap_load_store(pde, l1_pa | L0_TABLE);
2285297446Sandrew				PTE_SYNC(pde);
2286297446Sandrew				/* FALLTHROUGH */
2287297446Sandrew			case 0:
2288297446Sandrew				/* Get the l1 pde to update */
2289297446Sandrew				pde = pmap_l1_to_l2(pde, va);
2290297446Sandrew				KASSERT(pde != NULL, ("..."));
2291297446Sandrew
2292281494Sandrew				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2293281494Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2294281494Sandrew				    VM_ALLOC_ZERO);
2295281494Sandrew				if (l2_m == NULL)
2296281494Sandrew					panic("pmap_enter: l2 pte_m == NULL");
2297281494Sandrew				if ((l2_m->flags & PG_ZERO) == 0)
2298281494Sandrew					pmap_zero_page(l2_m);
2299281494Sandrew
2300281494Sandrew				l2_pa = VM_PAGE_TO_PHYS(l2_m);
2301297446Sandrew				pmap_load_store(pde, l2_pa | L1_TABLE);
2302297446Sandrew				PTE_SYNC(pde);
2303297446Sandrew				/* FALLTHROUGH */
2304297446Sandrew			case 1:
2305297446Sandrew				/* Get the l2 pde to update */
2306297446Sandrew				pde = pmap_l1_to_l2(pde, va);
2307281494Sandrew
2308297446Sandrew				l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2309297446Sandrew				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2310297446Sandrew				    VM_ALLOC_ZERO);
2311297446Sandrew				if (l3_m == NULL)
2312297446Sandrew					panic("pmap_enter: l3 pte_m == NULL");
2313297446Sandrew				if ((l3_m->flags & PG_ZERO) == 0)
2314297446Sandrew					pmap_zero_page(l3_m);
2315281494Sandrew
2316297446Sandrew				l3_pa = VM_PAGE_TO_PHYS(l3_m);
2317297446Sandrew				pmap_load_store(pde, l3_pa | L2_TABLE);
2318297446Sandrew				PTE_SYNC(pde);
2319297446Sandrew				break;
2320297446Sandrew			}
2321281494Sandrew		}
2322297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
2323285212Sandrew		pmap_invalidate_page(pmap, va);
2324281494Sandrew	}
2325281494Sandrew
2326281494Sandrew	om = NULL;
2327281494Sandrew	orig_l3 = pmap_load(l3);
2328281494Sandrew	opa = orig_l3 & ~ATTR_MASK;
2329281494Sandrew
2330281494Sandrew	/*
2331281494Sandrew	 * Is the specified virtual address already mapped?
2332281494Sandrew	 */
2333281494Sandrew	if (pmap_l3_valid(orig_l3)) {
2334281494Sandrew		/*
2335281494Sandrew		 * Wiring change, just update stats. We don't worry about
2336281494Sandrew		 * wiring PT pages as they remain resident as long as there
2337281494Sandrew		 * are valid mappings in them. Hence, if a user page is wired,
2338281494Sandrew		 * the PT page will be also.
2339281494Sandrew		 */
2340281494Sandrew		if ((flags & PMAP_ENTER_WIRED) != 0 &&
2341281494Sandrew		    (orig_l3 & ATTR_SW_WIRED) == 0)
2342281494Sandrew			pmap->pm_stats.wired_count++;
2343281494Sandrew		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
2344281494Sandrew		    (orig_l3 & ATTR_SW_WIRED) != 0)
2345281494Sandrew			pmap->pm_stats.wired_count--;
2346281494Sandrew
2347281494Sandrew		/*
2348281494Sandrew		 * Remove the extra PT page reference.
2349281494Sandrew		 */
2350281494Sandrew		if (mpte != NULL) {
2351281494Sandrew			mpte->wire_count--;
2352281494Sandrew			KASSERT(mpte->wire_count > 0,
2353281494Sandrew			    ("pmap_enter: missing reference to page table page,"
2354281494Sandrew			     " va: 0x%lx", va));
2355281494Sandrew		}
2356281494Sandrew
2357281494Sandrew		/*
2358281494Sandrew		 * Has the physical page changed?
2359281494Sandrew		 */
2360281494Sandrew		if (opa == pa) {
2361281494Sandrew			/*
2362281494Sandrew			 * No, might be a protection or wiring change.
2363281494Sandrew			 */
2364281494Sandrew			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2365281494Sandrew				new_l3 |= ATTR_SW_MANAGED;
2366281494Sandrew				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
2367281494Sandrew				    ATTR_AP(ATTR_AP_RW)) {
2368281494Sandrew					vm_page_aflag_set(m, PGA_WRITEABLE);
2369281494Sandrew				}
2370281494Sandrew			}
2371281494Sandrew			goto validate;
2372281494Sandrew		}
2373281494Sandrew
2374281494Sandrew		/* Flush the cache, there might be uncommitted data in it */
2375281494Sandrew		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2376281494Sandrew			cpu_dcache_wb_range(va, L3_SIZE);
2377281494Sandrew	} else {
2378281494Sandrew		/*
2379281494Sandrew		 * Increment the counters.
2380281494Sandrew		 */
2381281494Sandrew		if ((new_l3 & ATTR_SW_WIRED) != 0)
2382281494Sandrew			pmap->pm_stats.wired_count++;
2383281494Sandrew		pmap_resident_count_inc(pmap, 1);
2384281494Sandrew	}
2385281494Sandrew	/*
2386281494Sandrew	 * Enter on the PV list if part of our managed memory.
2387281494Sandrew	 */
2388281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0) {
2389281494Sandrew		new_l3 |= ATTR_SW_MANAGED;
2390281494Sandrew		pv = get_pv_entry(pmap, &lock);
2391281494Sandrew		pv->pv_va = va;
2392281494Sandrew		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2393281494Sandrew		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2394281494Sandrew		m->md.pv_gen++;
2395281494Sandrew		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2396281494Sandrew			vm_page_aflag_set(m, PGA_WRITEABLE);
2397281494Sandrew	}
2398281494Sandrew
2399281494Sandrew	/*
2400281494Sandrew	 * Update the L3 entry.
2401281494Sandrew	 */
2402281494Sandrew	if (orig_l3 != 0) {
2403281494Sandrewvalidate:
2404281494Sandrew		orig_l3 = pmap_load_store(l3, new_l3);
2405281494Sandrew		PTE_SYNC(l3);
2406281494Sandrew		opa = orig_l3 & ~ATTR_MASK;
2407281494Sandrew
2408281494Sandrew		if (opa != pa) {
2409281494Sandrew			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2410281494Sandrew				om = PHYS_TO_VM_PAGE(opa);
2411281494Sandrew				if (pmap_page_dirty(orig_l3))
2412281494Sandrew					vm_page_dirty(om);
2413281494Sandrew				if ((orig_l3 & ATTR_AF) != 0)
2414281494Sandrew					vm_page_aflag_set(om, PGA_REFERENCED);
2415281494Sandrew				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2416281494Sandrew				pmap_pvh_free(&om->md, pmap, va);
2417281494Sandrew			}
2418281494Sandrew		} else if (pmap_page_dirty(orig_l3)) {
2419281494Sandrew			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
2420281494Sandrew				vm_page_dirty(m);
2421281494Sandrew		}
2422281494Sandrew	} else {
2423281494Sandrew		pmap_load_store(l3, new_l3);
2424281494Sandrew		PTE_SYNC(l3);
2425281494Sandrew	}
2426285212Sandrew	pmap_invalidate_page(pmap, va);
2427281494Sandrew	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2428281494Sandrew	    cpu_icache_sync_range(va, PAGE_SIZE);
2429281494Sandrew
2430281494Sandrew	if (lock != NULL)
2431281494Sandrew		rw_wunlock(lock);
2432281494Sandrew	PMAP_UNLOCK(pmap);
2433281494Sandrew	return (KERN_SUCCESS);
2434281494Sandrew}
2435281494Sandrew
2436281494Sandrew/*
2437281494Sandrew * Maps a sequence of resident pages belonging to the same object.
2438281494Sandrew * The sequence begins with the given page m_start.  This page is
2439281494Sandrew * mapped at the given virtual address start.  Each subsequent page is
2440281494Sandrew * mapped at a virtual address that is offset from start by the same
2441281494Sandrew * amount as the page is offset from m_start within the object.  The
2442281494Sandrew * last page in the sequence is the page with the largest offset from
2443281494Sandrew * m_start that can be mapped at a virtual address less than the given
2444281494Sandrew * virtual address end.  Not every virtual page between start and end
2445281494Sandrew * is mapped; only those for which a resident page exists with the
2446281494Sandrew * corresponding offset from m_start are mapped.
2447281494Sandrew */
2448281494Sandrewvoid
2449281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2450281494Sandrew    vm_page_t m_start, vm_prot_t prot)
2451281494Sandrew{
2452281494Sandrew	struct rwlock *lock;
2453281494Sandrew	vm_offset_t va;
2454281494Sandrew	vm_page_t m, mpte;
2455281494Sandrew	vm_pindex_t diff, psize;
2456281494Sandrew
2457281494Sandrew	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2458281494Sandrew
2459281494Sandrew	psize = atop(end - start);
2460281494Sandrew	mpte = NULL;
2461281494Sandrew	m = m_start;
2462281494Sandrew	lock = NULL;
2463281494Sandrew	PMAP_LOCK(pmap);
2464281494Sandrew	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2465281494Sandrew		va = start + ptoa(diff);
2466281494Sandrew		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2467281494Sandrew		m = TAILQ_NEXT(m, listq);
2468281494Sandrew	}
2469281494Sandrew	if (lock != NULL)
2470281494Sandrew		rw_wunlock(lock);
2471281494Sandrew	PMAP_UNLOCK(pmap);
2472281494Sandrew}
2473281494Sandrew
2474281494Sandrew/*
2475281494Sandrew * this code makes some *MAJOR* assumptions:
2476281494Sandrew * 1. Current pmap & pmap exists.
2477281494Sandrew * 2. Not wired.
2478281494Sandrew * 3. Read access.
2479281494Sandrew * 4. No page table pages.
2480281494Sandrew * but is *MUCH* faster than pmap_enter...
2481281494Sandrew */
2482281494Sandrew
2483281494Sandrewvoid
2484281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2485281494Sandrew{
2486281494Sandrew	struct rwlock *lock;
2487281494Sandrew
2488281494Sandrew	lock = NULL;
2489281494Sandrew	PMAP_LOCK(pmap);
2490281494Sandrew	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2491281494Sandrew	if (lock != NULL)
2492281494Sandrew		rw_wunlock(lock);
2493281494Sandrew	PMAP_UNLOCK(pmap);
2494281494Sandrew}
2495281494Sandrew
2496281494Sandrewstatic vm_page_t
2497281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2498281494Sandrew    vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2499281494Sandrew{
2500281494Sandrew	struct spglist free;
2501297446Sandrew	pd_entry_t *pde;
2502281494Sandrew	pt_entry_t *l3;
2503281494Sandrew	vm_paddr_t pa;
2504297446Sandrew	int lvl;
2505281494Sandrew
2506281494Sandrew	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2507281494Sandrew	    (m->oflags & VPO_UNMANAGED) != 0,
2508281494Sandrew	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2509281494Sandrew	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2510281494Sandrew
2511285212Sandrew	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
2512281494Sandrew	/*
2513281494Sandrew	 * In the case that a page table page is not
2514281494Sandrew	 * resident, we are creating it here.
2515281494Sandrew	 */
2516281494Sandrew	if (va < VM_MAXUSER_ADDRESS) {
2517281494Sandrew		vm_pindex_t l2pindex;
2518281494Sandrew
2519281494Sandrew		/*
2520281494Sandrew		 * Calculate pagetable page index
2521281494Sandrew		 */
2522281494Sandrew		l2pindex = pmap_l2_pindex(va);
2523281494Sandrew		if (mpte && (mpte->pindex == l2pindex)) {
2524281494Sandrew			mpte->wire_count++;
2525281494Sandrew		} else {
2526281494Sandrew			/*
2527281494Sandrew			 * Get the l2 entry
2528281494Sandrew			 */
2529297446Sandrew			pde = pmap_pde(pmap, va, &lvl);
2530281494Sandrew
2531281494Sandrew			/*
2532281494Sandrew			 * If the page table page is mapped, we just increment
2533281494Sandrew			 * the hold count, and activate it.  Otherwise, we
2534281494Sandrew			 * attempt to allocate a page table page.  If this
2535281494Sandrew			 * attempt fails, we don't retry.  Instead, we give up.
2536281494Sandrew			 */
2537297446Sandrew			if (lvl == 2 && pmap_load(pde) != 0) {
2538285045Sandrew				mpte =
2539297446Sandrew				    PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
2540281494Sandrew				mpte->wire_count++;
2541281494Sandrew			} else {
2542281494Sandrew				/*
2543281494Sandrew				 * Pass NULL instead of the PV list lock
2544281494Sandrew				 * pointer, because we don't intend to sleep.
2545281494Sandrew				 */
2546281494Sandrew				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2547281494Sandrew				if (mpte == NULL)
2548281494Sandrew					return (mpte);
2549281494Sandrew			}
2550281494Sandrew		}
2551281494Sandrew		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2552281494Sandrew		l3 = &l3[pmap_l3_index(va)];
2553281494Sandrew	} else {
2554281494Sandrew		mpte = NULL;
2555297446Sandrew		pde = pmap_pde(kernel_pmap, va, &lvl);
2556297446Sandrew		KASSERT(pde != NULL,
2557297446Sandrew		    ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
2558297446Sandrew		     va));
2559297446Sandrew		KASSERT(lvl == 2,
2560297446Sandrew		    ("pmap_enter_quick_locked: Invalid level %d", lvl));
2561297446Sandrew		l3 = pmap_l2_to_l3(pde, va);
2562281494Sandrew	}
2563297446Sandrew
2564285212Sandrew	if (pmap_load(l3) != 0) {
2565281494Sandrew		if (mpte != NULL) {
2566281494Sandrew			mpte->wire_count--;
2567281494Sandrew			mpte = NULL;
2568281494Sandrew		}
2569281494Sandrew		return (mpte);
2570281494Sandrew	}
2571281494Sandrew
2572281494Sandrew	/*
2573281494Sandrew	 * Enter on the PV list if part of our managed memory.
2574281494Sandrew	 */
2575281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2576281494Sandrew	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2577281494Sandrew		if (mpte != NULL) {
2578281494Sandrew			SLIST_INIT(&free);
2579281494Sandrew			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2580281494Sandrew				pmap_invalidate_page(pmap, va);
2581281494Sandrew				pmap_free_zero_pages(&free);
2582281494Sandrew			}
2583281494Sandrew			mpte = NULL;
2584281494Sandrew		}
2585281494Sandrew		return (mpte);
2586281494Sandrew	}
2587281494Sandrew
2588281494Sandrew	/*
2589281494Sandrew	 * Increment counters
2590281494Sandrew	 */
2591281494Sandrew	pmap_resident_count_inc(pmap, 1);
2592281494Sandrew
2593285537Sandrew	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2594281494Sandrew	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
2595281494Sandrew
2596281494Sandrew	/*
2597281494Sandrew	 * Now validate mapping with RO protection
2598281494Sandrew	 */
2599281494Sandrew	if ((m->oflags & VPO_UNMANAGED) == 0)
2600281494Sandrew		pa |= ATTR_SW_MANAGED;
2601281494Sandrew	pmap_load_store(l3, pa);
2602281494Sandrew	PTE_SYNC(l3);
2603281494Sandrew	pmap_invalidate_page(pmap, va);
2604281494Sandrew	return (mpte);
2605281494Sandrew}
2606281494Sandrew
2607281494Sandrew/*
2608281494Sandrew * This code maps large physical mmap regions into the
2609281494Sandrew * processor address space.  Note that some shortcuts
2610281494Sandrew * are taken, but the code works.
2611281494Sandrew */
2612281494Sandrewvoid
2613281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2614281494Sandrew    vm_pindex_t pindex, vm_size_t size)
2615281494Sandrew{
2616281494Sandrew
2617281846Sandrew	VM_OBJECT_ASSERT_WLOCKED(object);
2618281846Sandrew	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2619281846Sandrew	    ("pmap_object_init_pt: non-device object"));
2620281494Sandrew}
2621281494Sandrew
2622281494Sandrew/*
2623281494Sandrew *	Clear the wired attribute from the mappings for the specified range of
2624281494Sandrew *	addresses in the given pmap.  Every valid mapping within that range
2625281494Sandrew *	must have the wired attribute set.  In contrast, invalid mappings
2626281494Sandrew *	cannot have the wired attribute set, so they are ignored.
2627281494Sandrew *
2628281494Sandrew *	The wired attribute of the page table entry is not a hardware feature,
2629281494Sandrew *	so there is no need to invalidate any TLB entries.
2630281494Sandrew */
2631281494Sandrewvoid
2632281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2633281494Sandrew{
2634281494Sandrew	vm_offset_t va_next;
2635297446Sandrew	pd_entry_t *l0, *l1, *l2;
2636281494Sandrew	pt_entry_t *l3;
2637281494Sandrew
2638281494Sandrew	PMAP_LOCK(pmap);
2639281494Sandrew	for (; sva < eva; sva = va_next) {
2640297446Sandrew		l0 = pmap_l0(pmap, sva);
2641297446Sandrew		if (pmap_load(l0) == 0) {
2642297446Sandrew			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2643297446Sandrew			if (va_next < sva)
2644297446Sandrew				va_next = eva;
2645297446Sandrew			continue;
2646297446Sandrew		}
2647297446Sandrew
2648297446Sandrew		l1 = pmap_l0_to_l1(l0, sva);
2649285045Sandrew		if (pmap_load(l1) == 0) {
2650281494Sandrew			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2651281494Sandrew			if (va_next < sva)
2652281494Sandrew				va_next = eva;
2653281494Sandrew			continue;
2654281494Sandrew		}
2655281494Sandrew
2656281494Sandrew		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2657281494Sandrew		if (va_next < sva)
2658281494Sandrew			va_next = eva;
2659281494Sandrew
2660281494Sandrew		l2 = pmap_l1_to_l2(l1, sva);
2661285045Sandrew		if (pmap_load(l2) == 0)
2662281494Sandrew			continue;
2663281494Sandrew
2664281494Sandrew		if (va_next > eva)
2665281494Sandrew			va_next = eva;
2666281494Sandrew		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2667281494Sandrew		    sva += L3_SIZE) {
2668285045Sandrew			if (pmap_load(l3) == 0)
2669281494Sandrew				continue;
2670285045Sandrew			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
2671281494Sandrew				panic("pmap_unwire: l3 %#jx is missing "
2672288445Sandrew				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
2673281494Sandrew
2674281494Sandrew			/*
2675281494Sandrew			 * PG_W must be cleared atomically.  Although the pmap
2676281494Sandrew			 * lock synchronizes access to PG_W, another processor
2677281494Sandrew			 * could be setting PG_M and/or PG_A concurrently.
2678281494Sandrew			 */
2679281494Sandrew			atomic_clear_long(l3, ATTR_SW_WIRED);
2680281494Sandrew			pmap->pm_stats.wired_count--;
2681281494Sandrew		}
2682281494Sandrew	}
2683281494Sandrew	PMAP_UNLOCK(pmap);
2684281494Sandrew}
2685281494Sandrew
2686281494Sandrew/*
2687281494Sandrew *	Copy the range specified by src_addr/len
2688281494Sandrew *	from the source map to the range dst_addr/len
2689281494Sandrew *	in the destination map.
2690281494Sandrew *
2691281494Sandrew *	This routine is only advisory and need not do anything.
2692281494Sandrew */
2693281494Sandrew
2694281494Sandrewvoid
2695281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2696281494Sandrew    vm_offset_t src_addr)
2697281494Sandrew{
2698281494Sandrew}
2699281494Sandrew
2700281494Sandrew/*
2701281494Sandrew *	pmap_zero_page zeros the specified hardware page by mapping
2702281494Sandrew *	the page into KVM and using bzero to clear its contents.
2703281494Sandrew */
2704281494Sandrewvoid
2705281494Sandrewpmap_zero_page(vm_page_t m)
2706281494Sandrew{
2707281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2708281494Sandrew
2709281494Sandrew	pagezero((void *)va);
2710281494Sandrew}
2711281494Sandrew
2712281494Sandrew/*
2713305531Sandrew *	pmap_zero_page_area zeros the specified hardware page by mapping
2714281494Sandrew *	the page into KVM and using bzero to clear its contents.
2715281494Sandrew *
2716281494Sandrew *	off and size may not cover an area beyond a single hardware page.
2717281494Sandrew */
2718281494Sandrewvoid
2719281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size)
2720281494Sandrew{
2721281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2722281494Sandrew
2723281494Sandrew	if (off == 0 && size == PAGE_SIZE)
2724281494Sandrew		pagezero((void *)va);
2725281494Sandrew	else
2726281494Sandrew		bzero((char *)va + off, size);
2727281494Sandrew}
2728281494Sandrew
2729281494Sandrew/*
2730305531Sandrew *	pmap_zero_page_idle zeros the specified hardware page by mapping
2731281494Sandrew *	the page into KVM and using bzero to clear its contents.  This
2732281494Sandrew *	is intended to be called from the vm_pagezero process only and
2733281494Sandrew *	outside of Giant.
2734281494Sandrew */
2735281494Sandrewvoid
2736281494Sandrewpmap_zero_page_idle(vm_page_t m)
2737281494Sandrew{
2738281494Sandrew	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2739281494Sandrew
2740281494Sandrew	pagezero((void *)va);
2741281494Sandrew}
2742281494Sandrew
2743281494Sandrew/*
2744281494Sandrew *	pmap_copy_page copies the specified (machine independent)
2745281494Sandrew *	page by mapping the page into virtual memory and using
2746281494Sandrew *	bcopy to copy the page, one machine dependent page at a
2747281494Sandrew *	time.
2748281494Sandrew */
2749281494Sandrewvoid
2750281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2751281494Sandrew{
2752281494Sandrew	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2753281494Sandrew	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2754281494Sandrew
2755281494Sandrew	pagecopy((void *)src, (void *)dst);
2756281494Sandrew}
2757281494Sandrew
2758281494Sandrewint unmapped_buf_allowed = 1;
2759281494Sandrew
2760281494Sandrewvoid
2761281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2762281494Sandrew    vm_offset_t b_offset, int xfersize)
2763281494Sandrew{
2764281494Sandrew	void *a_cp, *b_cp;
2765281494Sandrew	vm_page_t m_a, m_b;
2766281494Sandrew	vm_paddr_t p_a, p_b;
2767281494Sandrew	vm_offset_t a_pg_offset, b_pg_offset;
2768281494Sandrew	int cnt;
2769281494Sandrew
2770281494Sandrew	while (xfersize > 0) {
2771281494Sandrew		a_pg_offset = a_offset & PAGE_MASK;
2772281494Sandrew		m_a = ma[a_offset >> PAGE_SHIFT];
2773281494Sandrew		p_a = m_a->phys_addr;
2774281494Sandrew		b_pg_offset = b_offset & PAGE_MASK;
2775281494Sandrew		m_b = mb[b_offset >> PAGE_SHIFT];
2776281494Sandrew		p_b = m_b->phys_addr;
2777281494Sandrew		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2778281494Sandrew		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2779281494Sandrew		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2780281494Sandrew			panic("!DMAP a %lx", p_a);
2781281494Sandrew		} else {
2782281494Sandrew			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2783281494Sandrew		}
2784281494Sandrew		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2785281494Sandrew			panic("!DMAP b %lx", p_b);
2786281494Sandrew		} else {
2787281494Sandrew			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2788281494Sandrew		}
2789281494Sandrew		bcopy(a_cp, b_cp, cnt);
2790281494Sandrew		a_offset += cnt;
2791281494Sandrew		b_offset += cnt;
2792281494Sandrew		xfersize -= cnt;
2793281494Sandrew	}
2794281494Sandrew}
2795281494Sandrew
2796286296Sjahvm_offset_t
2797286296Sjahpmap_quick_enter_page(vm_page_t m)
2798286296Sjah{
2799286296Sjah
2800286296Sjah	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
2801286296Sjah}
2802286296Sjah
2803286296Sjahvoid
2804286296Sjahpmap_quick_remove_page(vm_offset_t addr)
2805286296Sjah{
2806286296Sjah}
2807286296Sjah
2808281494Sandrew/*
2809281494Sandrew * Returns true if the pmap's pv is one of the first
2810281494Sandrew * 16 pvs linked to from this page.  This count may
2811281494Sandrew * be changed upwards or downwards in the future; it
2812281494Sandrew * is only necessary that true be returned for a small
2813281494Sandrew * subset of pmaps for proper page aging.
2814281494Sandrew */
2815281494Sandrewboolean_t
2816281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2817281494Sandrew{
2818281494Sandrew	struct rwlock *lock;
2819281494Sandrew	pv_entry_t pv;
2820281494Sandrew	int loops = 0;
2821281494Sandrew	boolean_t rv;
2822281494Sandrew
2823281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2824281494Sandrew	    ("pmap_page_exists_quick: page %p is not managed", m));
2825281494Sandrew	rv = FALSE;
2826281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2827281494Sandrew	rw_rlock(lock);
2828281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2829281494Sandrew		if (PV_PMAP(pv) == pmap) {
2830281494Sandrew			rv = TRUE;
2831281494Sandrew			break;
2832281494Sandrew		}
2833281494Sandrew		loops++;
2834281494Sandrew		if (loops >= 16)
2835281494Sandrew			break;
2836281494Sandrew	}
2837281494Sandrew	rw_runlock(lock);
2838281494Sandrew	return (rv);
2839281494Sandrew}
2840281494Sandrew
2841281494Sandrew/*
2842281494Sandrew *	pmap_page_wired_mappings:
2843281494Sandrew *
2844281494Sandrew *	Return the number of managed mappings to the given physical page
2845281494Sandrew *	that are wired.
2846281494Sandrew */
2847281494Sandrewint
2848281494Sandrewpmap_page_wired_mappings(vm_page_t m)
2849281494Sandrew{
2850281494Sandrew	struct rwlock *lock;
2851281494Sandrew	pmap_t pmap;
2852297446Sandrew	pt_entry_t *pte;
2853281494Sandrew	pv_entry_t pv;
2854297446Sandrew	int count, lvl, md_gen;
2855281494Sandrew
2856281494Sandrew	if ((m->oflags & VPO_UNMANAGED) != 0)
2857281494Sandrew		return (0);
2858281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2859281494Sandrew	rw_rlock(lock);
2860281494Sandrewrestart:
2861281494Sandrew	count = 0;
2862281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2863281494Sandrew		pmap = PV_PMAP(pv);
2864281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
2865281494Sandrew			md_gen = m->md.pv_gen;
2866281494Sandrew			rw_runlock(lock);
2867281494Sandrew			PMAP_LOCK(pmap);
2868281494Sandrew			rw_rlock(lock);
2869281494Sandrew			if (md_gen != m->md.pv_gen) {
2870281494Sandrew				PMAP_UNLOCK(pmap);
2871281494Sandrew				goto restart;
2872281494Sandrew			}
2873281494Sandrew		}
2874297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
2875297446Sandrew		if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
2876281494Sandrew			count++;
2877281494Sandrew		PMAP_UNLOCK(pmap);
2878281494Sandrew	}
2879281494Sandrew	rw_runlock(lock);
2880281494Sandrew	return (count);
2881281494Sandrew}
2882281494Sandrew
2883281494Sandrew/*
2884281494Sandrew * Destroy all managed, non-wired mappings in the given user-space
2885281494Sandrew * pmap.  This pmap cannot be active on any processor besides the
2886281494Sandrew * caller.
2887305531Sandrew *
2888281494Sandrew * This function cannot be applied to the kernel pmap.  Moreover, it
2889281494Sandrew * is not intended for general use.  It is only to be used during
2890281494Sandrew * process termination.  Consequently, it can be implemented in ways
2891281494Sandrew * that make it faster than pmap_remove().  First, it can more quickly
2892281494Sandrew * destroy mappings by iterating over the pmap's collection of PV
2893281494Sandrew * entries, rather than searching the page table.  Second, it doesn't
2894281494Sandrew * have to test and clear the page table entries atomically, because
2895281494Sandrew * no processor is currently accessing the user address space.  In
2896281494Sandrew * particular, a page table entry's dirty bit won't change state once
2897281494Sandrew * this function starts.
2898281494Sandrew */
2899281494Sandrewvoid
2900281494Sandrewpmap_remove_pages(pmap_t pmap)
2901281494Sandrew{
2902297446Sandrew	pd_entry_t *pde;
2903297446Sandrew	pt_entry_t *pte, tpte;
2904281494Sandrew	struct spglist free;
2905281494Sandrew	vm_page_t m;
2906281494Sandrew	pv_entry_t pv;
2907281494Sandrew	struct pv_chunk *pc, *npc;
2908281494Sandrew	struct rwlock *lock;
2909281494Sandrew	int64_t bit;
2910281494Sandrew	uint64_t inuse, bitmask;
2911297446Sandrew	int allfree, field, freed, idx, lvl;
2912281494Sandrew	vm_paddr_t pa;
2913281494Sandrew
2914281494Sandrew	lock = NULL;
2915281494Sandrew
2916281494Sandrew	SLIST_INIT(&free);
2917281494Sandrew	PMAP_LOCK(pmap);
2918281494Sandrew	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2919281494Sandrew		allfree = 1;
2920281494Sandrew		freed = 0;
2921281494Sandrew		for (field = 0; field < _NPCM; field++) {
2922281494Sandrew			inuse = ~pc->pc_map[field] & pc_freemask[field];
2923281494Sandrew			while (inuse != 0) {
2924281494Sandrew				bit = ffsl(inuse) - 1;
2925281494Sandrew				bitmask = 1UL << bit;
2926281494Sandrew				idx = field * 64 + bit;
2927281494Sandrew				pv = &pc->pc_pventry[idx];
2928281494Sandrew				inuse &= ~bitmask;
2929281494Sandrew
2930297446Sandrew				pde = pmap_pde(pmap, pv->pv_va, &lvl);
2931297446Sandrew				KASSERT(pde != NULL,
2932297446Sandrew				    ("Attempting to remove an unmapped page"));
2933297446Sandrew				KASSERT(lvl == 2,
2934297446Sandrew				    ("Invalid page directory level: %d", lvl));
2935281494Sandrew
2936297446Sandrew				pte = pmap_l2_to_l3(pde, pv->pv_va);
2937297446Sandrew				KASSERT(pte != NULL,
2938297446Sandrew				    ("Attempting to remove an unmapped page"));
2939297446Sandrew
2940297446Sandrew				tpte = pmap_load(pte);
2941297446Sandrew
2942281494Sandrew/*
2943281494Sandrew * We cannot remove wired pages from a process' mapping at this time
2944281494Sandrew */
2945297446Sandrew				if (tpte & ATTR_SW_WIRED) {
2946281494Sandrew					allfree = 0;
2947281494Sandrew					continue;
2948281494Sandrew				}
2949281494Sandrew
2950297446Sandrew				pa = tpte & ~ATTR_MASK;
2951281494Sandrew
2952281494Sandrew				m = PHYS_TO_VM_PAGE(pa);
2953281494Sandrew				KASSERT(m->phys_addr == pa,
2954281494Sandrew				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
2955281494Sandrew				    m, (uintmax_t)m->phys_addr,
2956297446Sandrew				    (uintmax_t)tpte));
2957281494Sandrew
2958281494Sandrew				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
2959281494Sandrew				    m < &vm_page_array[vm_page_array_size],
2960297446Sandrew				    ("pmap_remove_pages: bad pte %#jx",
2961297446Sandrew				    (uintmax_t)tpte));
2962281494Sandrew
2963297446Sandrew				/* XXX: assumes tpte is level 3 */
2964281494Sandrew				if (pmap_is_current(pmap) &&
2965297446Sandrew				    pmap_l3_valid_cacheable(tpte))
2966281494Sandrew					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2967297446Sandrew				pmap_load_clear(pte);
2968297446Sandrew				PTE_SYNC(pte);
2969285212Sandrew				pmap_invalidate_page(pmap, pv->pv_va);
2970281494Sandrew
2971281494Sandrew				/*
2972281494Sandrew				 * Update the vm_page_t clean/reference bits.
2973281494Sandrew				 */
2974297446Sandrew				if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2975281494Sandrew					vm_page_dirty(m);
2976281494Sandrew
2977281494Sandrew				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
2978281494Sandrew
2979281494Sandrew				/* Mark free */
2980281494Sandrew				pc->pc_map[field] |= bitmask;
2981281494Sandrew
2982281494Sandrew				pmap_resident_count_dec(pmap, 1);
2983281494Sandrew				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2984281494Sandrew				m->md.pv_gen++;
2985281494Sandrew
2986297446Sandrew				pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
2987297446Sandrew				    &free);
2988281494Sandrew				freed++;
2989281494Sandrew			}
2990281494Sandrew		}
2991281494Sandrew		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
2992281494Sandrew		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
2993281494Sandrew		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
2994281494Sandrew		if (allfree) {
2995281494Sandrew			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2996281494Sandrew			free_pv_chunk(pc);
2997281494Sandrew		}
2998281494Sandrew	}
2999281494Sandrew	pmap_invalidate_all(pmap);
3000281494Sandrew	if (lock != NULL)
3001281494Sandrew		rw_wunlock(lock);
3002281494Sandrew	PMAP_UNLOCK(pmap);
3003281494Sandrew	pmap_free_zero_pages(&free);
3004281494Sandrew}
3005281494Sandrew
3006281494Sandrew/*
3007281494Sandrew * This is used to check if a page has been accessed or modified. As we
3008281494Sandrew * don't have a bit to see if it has been modified we have to assume it
3009281494Sandrew * has been if the page is read/write.
3010281494Sandrew */
3011281494Sandrewstatic boolean_t
3012281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
3013281494Sandrew{
3014281494Sandrew	struct rwlock *lock;
3015281494Sandrew	pv_entry_t pv;
3016297446Sandrew	pt_entry_t *pte, mask, value;
3017281494Sandrew	pmap_t pmap;
3018297446Sandrew	int lvl, md_gen;
3019281494Sandrew	boolean_t rv;
3020281494Sandrew
3021281494Sandrew	rv = FALSE;
3022281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3023281494Sandrew	rw_rlock(lock);
3024281494Sandrewrestart:
3025281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3026281494Sandrew		pmap = PV_PMAP(pv);
3027281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
3028281494Sandrew			md_gen = m->md.pv_gen;
3029281494Sandrew			rw_runlock(lock);
3030281494Sandrew			PMAP_LOCK(pmap);
3031281494Sandrew			rw_rlock(lock);
3032281494Sandrew			if (md_gen != m->md.pv_gen) {
3033281494Sandrew				PMAP_UNLOCK(pmap);
3034281494Sandrew				goto restart;
3035281494Sandrew			}
3036281494Sandrew		}
3037297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
3038297446Sandrew		KASSERT(lvl == 3,
3039297446Sandrew		    ("pmap_page_test_mappings: Invalid level %d", lvl));
3040281494Sandrew		mask = 0;
3041281494Sandrew		value = 0;
3042281494Sandrew		if (modified) {
3043281494Sandrew			mask |= ATTR_AP_RW_BIT;
3044281494Sandrew			value |= ATTR_AP(ATTR_AP_RW);
3045281494Sandrew		}
3046281494Sandrew		if (accessed) {
3047281494Sandrew			mask |= ATTR_AF | ATTR_DESCR_MASK;
3048281494Sandrew			value |= ATTR_AF | L3_PAGE;
3049281494Sandrew		}
3050297446Sandrew		rv = (pmap_load(pte) & mask) == value;
3051281494Sandrew		PMAP_UNLOCK(pmap);
3052281494Sandrew		if (rv)
3053281494Sandrew			goto out;
3054281494Sandrew	}
3055281494Sandrewout:
3056281494Sandrew	rw_runlock(lock);
3057281494Sandrew	return (rv);
3058281494Sandrew}
3059281494Sandrew
3060281494Sandrew/*
3061281494Sandrew *	pmap_is_modified:
3062281494Sandrew *
3063281494Sandrew *	Return whether or not the specified physical page was modified
3064281494Sandrew *	in any physical maps.
3065281494Sandrew */
3066281494Sandrewboolean_t
3067281494Sandrewpmap_is_modified(vm_page_t m)
3068281494Sandrew{
3069281494Sandrew
3070281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3071281494Sandrew	    ("pmap_is_modified: page %p is not managed", m));
3072281494Sandrew
3073281494Sandrew	/*
3074281494Sandrew	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
3075281494Sandrew	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
3076281494Sandrew	 * is clear, no PTEs can have PG_M set.
3077281494Sandrew	 */
3078281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
3079281494Sandrew	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
3080281494Sandrew		return (FALSE);
3081281494Sandrew	return (pmap_page_test_mappings(m, FALSE, TRUE));
3082281494Sandrew}
3083281494Sandrew
3084281494Sandrew/*
3085281494Sandrew *	pmap_is_prefaultable:
3086281494Sandrew *
3087281494Sandrew *	Return whether or not the specified virtual address is eligible
3088281494Sandrew *	for prefault.
3089281494Sandrew */
3090281494Sandrewboolean_t
3091281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
3092281494Sandrew{
3093297446Sandrew	pt_entry_t *pte;
3094281494Sandrew	boolean_t rv;
3095297446Sandrew	int lvl;
3096281494Sandrew
3097281494Sandrew	rv = FALSE;
3098281494Sandrew	PMAP_LOCK(pmap);
3099297446Sandrew	pte = pmap_pte(pmap, addr, &lvl);
3100297446Sandrew	if (pte != NULL && pmap_load(pte) != 0) {
3101281494Sandrew		rv = TRUE;
3102281494Sandrew	}
3103281494Sandrew	PMAP_UNLOCK(pmap);
3104281494Sandrew	return (rv);
3105281494Sandrew}
3106281494Sandrew
3107281494Sandrew/*
3108281494Sandrew *	pmap_is_referenced:
3109281494Sandrew *
3110281494Sandrew *	Return whether or not the specified physical page was referenced
3111281494Sandrew *	in any physical maps.
3112281494Sandrew */
3113281494Sandrewboolean_t
3114281494Sandrewpmap_is_referenced(vm_page_t m)
3115281494Sandrew{
3116281494Sandrew
3117281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3118281494Sandrew	    ("pmap_is_referenced: page %p is not managed", m));
3119281494Sandrew	return (pmap_page_test_mappings(m, TRUE, FALSE));
3120281494Sandrew}
3121281494Sandrew
3122281494Sandrew/*
3123281494Sandrew * Clear the write and modified bits in each of the given page's mappings.
3124281494Sandrew */
3125281494Sandrewvoid
3126281494Sandrewpmap_remove_write(vm_page_t m)
3127281494Sandrew{
3128281494Sandrew	pmap_t pmap;
3129281494Sandrew	struct rwlock *lock;
3130281494Sandrew	pv_entry_t pv;
3131297446Sandrew	pt_entry_t oldpte, *pte;
3132297446Sandrew	int lvl, md_gen;
3133281494Sandrew
3134281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3135281494Sandrew	    ("pmap_remove_write: page %p is not managed", m));
3136281494Sandrew
3137281494Sandrew	/*
3138281494Sandrew	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
3139281494Sandrew	 * set by another thread while the object is locked.  Thus,
3140281494Sandrew	 * if PGA_WRITEABLE is clear, no page table entries need updating.
3141281494Sandrew	 */
3142281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
3143281494Sandrew	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
3144281494Sandrew		return;
3145281494Sandrew	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3146281494Sandrewretry_pv_loop:
3147281494Sandrew	rw_wlock(lock);
3148281494Sandrew	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3149281494Sandrew		pmap = PV_PMAP(pv);
3150281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
3151281494Sandrew			md_gen = m->md.pv_gen;
3152281494Sandrew			rw_wunlock(lock);
3153281494Sandrew			PMAP_LOCK(pmap);
3154281494Sandrew			rw_wlock(lock);
3155281494Sandrew			if (md_gen != m->md.pv_gen) {
3156281494Sandrew				PMAP_UNLOCK(pmap);
3157281494Sandrew				rw_wunlock(lock);
3158281494Sandrew				goto retry_pv_loop;
3159281494Sandrew			}
3160281494Sandrew		}
3161297446Sandrew		pte = pmap_pte(pmap, pv->pv_va, &lvl);
3162281494Sandrewretry:
3163297446Sandrew		oldpte = pmap_load(pte);
3164297446Sandrew		if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
3165297446Sandrew			if (!atomic_cmpset_long(pte, oldpte,
3166297446Sandrew			    oldpte | ATTR_AP(ATTR_AP_RO)))
3167281494Sandrew				goto retry;
3168297446Sandrew			if ((oldpte & ATTR_AF) != 0)
3169281494Sandrew				vm_page_dirty(m);
3170281494Sandrew			pmap_invalidate_page(pmap, pv->pv_va);
3171281494Sandrew		}
3172281494Sandrew		PMAP_UNLOCK(pmap);
3173281494Sandrew	}
3174281494Sandrew	rw_wunlock(lock);
3175281494Sandrew	vm_page_aflag_clear(m, PGA_WRITEABLE);
3176281494Sandrew}
3177281494Sandrew
3178281494Sandrewstatic __inline boolean_t
3179281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
3180281494Sandrew{
3181281494Sandrew
3182281494Sandrew	return (FALSE);
3183281494Sandrew}
3184281494Sandrew
3185281494Sandrew#define	PMAP_TS_REFERENCED_MAX	5
3186281494Sandrew
3187281494Sandrew/*
3188281494Sandrew *	pmap_ts_referenced:
3189281494Sandrew *
3190281494Sandrew *	Return a count of reference bits for a page, clearing those bits.
3191281494Sandrew *	It is not necessary for every reference bit to be cleared, but it
3192281494Sandrew *	is necessary that 0 only be returned when there are truly no
3193281494Sandrew *	reference bits set.
3194281494Sandrew *
3195281494Sandrew *	XXX: The exact number of bits to check and clear is a matter that
3196281494Sandrew *	should be tested and standardized at some point in the future for
3197281494Sandrew *	optimal aging of shared pages.
3198281494Sandrew */
3199281494Sandrewint
3200281494Sandrewpmap_ts_referenced(vm_page_t m)
3201281494Sandrew{
3202281494Sandrew	pv_entry_t pv, pvf;
3203281494Sandrew	pmap_t pmap;
3204281494Sandrew	struct rwlock *lock;
3205297446Sandrew	pd_entry_t *pde, tpde;
3206297446Sandrew	pt_entry_t *pte, tpte;
3207281494Sandrew	vm_paddr_t pa;
3208297446Sandrew	int cleared, md_gen, not_cleared, lvl;
3209281494Sandrew	struct spglist free;
3210281494Sandrew
3211281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3212281494Sandrew	    ("pmap_ts_referenced: page %p is not managed", m));
3213281494Sandrew	SLIST_INIT(&free);
3214281494Sandrew	cleared = 0;
3215281494Sandrew	pa = VM_PAGE_TO_PHYS(m);
3216281494Sandrew	lock = PHYS_TO_PV_LIST_LOCK(pa);
3217281494Sandrew	rw_wlock(lock);
3218281494Sandrewretry:
3219281494Sandrew	not_cleared = 0;
3220281494Sandrew	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
3221281494Sandrew		goto out;
3222281494Sandrew	pv = pvf;
3223281494Sandrew	do {
3224281494Sandrew		if (pvf == NULL)
3225281494Sandrew			pvf = pv;
3226281494Sandrew		pmap = PV_PMAP(pv);
3227281494Sandrew		if (!PMAP_TRYLOCK(pmap)) {
3228281494Sandrew			md_gen = m->md.pv_gen;
3229281494Sandrew			rw_wunlock(lock);
3230281494Sandrew			PMAP_LOCK(pmap);
3231281494Sandrew			rw_wlock(lock);
3232281494Sandrew			if (md_gen != m->md.pv_gen) {
3233281494Sandrew				PMAP_UNLOCK(pmap);
3234281494Sandrew				goto retry;
3235281494Sandrew			}
3236281494Sandrew		}
3237297446Sandrew		pde = pmap_pde(pmap, pv->pv_va, &lvl);
3238297446Sandrew		KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
3239297446Sandrew		KASSERT(lvl == 2,
3240297446Sandrew		    ("pmap_ts_referenced: invalid pde level %d", lvl));
3241297446Sandrew		tpde = pmap_load(pde);
3242297446Sandrew		KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
3243281494Sandrew		    ("pmap_ts_referenced: found an invalid l2 table"));
3244297446Sandrew		pte = pmap_l2_to_l3(pde, pv->pv_va);
3245297446Sandrew		tpte = pmap_load(pte);
3246297446Sandrew		if ((tpte & ATTR_AF) != 0) {
3247297446Sandrew			if (safe_to_clear_referenced(pmap, tpte)) {
3248281494Sandrew				/*
3249281494Sandrew				 * TODO: We don't handle the access flag
3250281494Sandrew				 * at all. We need to be able to set it in
3251281494Sandrew				 * the exception handler.
3252281494Sandrew				 */
3253286073Semaste				panic("ARM64TODO: safe_to_clear_referenced\n");
3254297446Sandrew			} else if ((tpte & ATTR_SW_WIRED) == 0) {
3255281494Sandrew				/*
3256281494Sandrew				 * Wired pages cannot be paged out so
3257281494Sandrew				 * doing accessed bit emulation for
3258281494Sandrew				 * them is wasted effort. We do the
3259281494Sandrew				 * hard work for unwired pages only.
3260281494Sandrew				 */
3261297446Sandrew				pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
3262288445Sandrew				    &free, &lock);
3263281494Sandrew				pmap_invalidate_page(pmap, pv->pv_va);
3264281494Sandrew				cleared++;
3265281494Sandrew				if (pvf == pv)
3266281494Sandrew					pvf = NULL;
3267281494Sandrew				pv = NULL;
3268281494Sandrew				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
3269281494Sandrew				    ("inconsistent pv lock %p %p for page %p",
3270281494Sandrew				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
3271281494Sandrew			} else
3272281494Sandrew				not_cleared++;
3273281494Sandrew		}
3274281494Sandrew		PMAP_UNLOCK(pmap);
3275281494Sandrew		/* Rotate the PV list if it has more than one entry. */
3276281494Sandrew		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
3277281494Sandrew			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
3278281494Sandrew			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
3279281494Sandrew			m->md.pv_gen++;
3280281494Sandrew		}
3281281494Sandrew	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
3282281494Sandrew	    not_cleared < PMAP_TS_REFERENCED_MAX);
3283281494Sandrewout:
3284281494Sandrew	rw_wunlock(lock);
3285281494Sandrew	pmap_free_zero_pages(&free);
3286281494Sandrew	return (cleared + not_cleared);
3287281494Sandrew}
3288281494Sandrew
3289281494Sandrew/*
3290281494Sandrew *	Apply the given advice to the specified range of addresses within the
3291281494Sandrew *	given pmap.  Depending on the advice, clear the referenced and/or
3292281494Sandrew *	modified flags in each mapping and set the mapped page's dirty field.
3293281494Sandrew */
3294281494Sandrewvoid
3295281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
3296281494Sandrew{
3297281494Sandrew}
3298281494Sandrew
3299281494Sandrew/*
3300281494Sandrew *	Clear the modify bits on the specified physical page.
3301281494Sandrew */
3302281494Sandrewvoid
3303281494Sandrewpmap_clear_modify(vm_page_t m)
3304281494Sandrew{
3305281494Sandrew
3306281494Sandrew	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3307281494Sandrew	    ("pmap_clear_modify: page %p is not managed", m));
3308281494Sandrew	VM_OBJECT_ASSERT_WLOCKED(m->object);
3309281494Sandrew	KASSERT(!vm_page_xbusied(m),
3310281494Sandrew	    ("pmap_clear_modify: page %p is exclusive busied", m));
3311281494Sandrew
3312281494Sandrew	/*
3313281494Sandrew	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
3314281494Sandrew	 * If the object containing the page is locked and the page is not
3315281494Sandrew	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
3316281494Sandrew	 */
3317281494Sandrew	if ((m->aflags & PGA_WRITEABLE) == 0)
3318281494Sandrew		return;
3319281846Sandrew
3320286073Semaste	/* ARM64TODO: We lack support for tracking if a page is modified */
3321281494Sandrew}
3322281494Sandrew
3323282221Sandrewvoid *
3324282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size)
3325282221Sandrew{
3326282221Sandrew
3327282221Sandrew        return ((void *)PHYS_TO_DMAP(pa));
3328282221Sandrew}
3329282221Sandrew
3330282221Sandrewvoid
3331282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size)
3332282221Sandrew{
3333282221Sandrew}
3334282221Sandrew
3335281494Sandrew/*
3336281494Sandrew * Sets the memory attribute for the specified page.
3337281494Sandrew */
3338281494Sandrewvoid
3339281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3340281494Sandrew{
3341281494Sandrew
3342286080Sandrew	m->md.pv_memattr = ma;
3343286080Sandrew
3344286080Sandrew	/*
3345286080Sandrew	 * ARM64TODO: Implement the below (from the amd64 pmap)
3346286080Sandrew	 * If "m" is a normal page, update its direct mapping.  This update
3347286080Sandrew	 * can be relied upon to perform any cache operations that are
3348286080Sandrew	 * required for data coherence.
3349286080Sandrew	 */
3350286080Sandrew	if ((m->flags & PG_FICTITIOUS) == 0 &&
3351286080Sandrew	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
3352286080Sandrew		panic("ARM64TODO: pmap_page_set_memattr");
3353281494Sandrew}
3354281494Sandrew
3355281494Sandrew/*
3356281494Sandrew * perform the pmap work for mincore
3357281494Sandrew */
3358281494Sandrewint
3359281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3360281494Sandrew{
3361287570Sandrew	pd_entry_t *l1p, l1;
3362287570Sandrew	pd_entry_t *l2p, l2;
3363287570Sandrew	pt_entry_t *l3p, l3;
3364287570Sandrew	vm_paddr_t pa;
3365287570Sandrew	bool managed;
3366287570Sandrew	int val;
3367281494Sandrew
3368287570Sandrew	PMAP_LOCK(pmap);
3369287570Sandrewretry:
3370287570Sandrew	pa = 0;
3371287570Sandrew	val = 0;
3372287570Sandrew	managed = false;
3373287570Sandrew
3374287570Sandrew	l1p = pmap_l1(pmap, addr);
3375287570Sandrew	if (l1p == NULL) /* No l1 */
3376287570Sandrew		goto done;
3377295425Swma
3378287570Sandrew	l1 = pmap_load(l1p);
3379295425Swma	if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
3380295425Swma		goto done;
3381295425Swma
3382287570Sandrew	if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
3383287570Sandrew		pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
3384287570Sandrew		managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3385287570Sandrew		val = MINCORE_SUPER | MINCORE_INCORE;
3386287570Sandrew		if (pmap_page_dirty(l1))
3387287570Sandrew			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3388287570Sandrew		if ((l1 & ATTR_AF) == ATTR_AF)
3389287570Sandrew			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3390287570Sandrew		goto done;
3391287570Sandrew	}
3392287570Sandrew
3393287570Sandrew	l2p = pmap_l1_to_l2(l1p, addr);
3394287570Sandrew	if (l2p == NULL) /* No l2 */
3395287570Sandrew		goto done;
3396295425Swma
3397287570Sandrew	l2 = pmap_load(l2p);
3398295425Swma	if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
3399295425Swma		goto done;
3400295425Swma
3401287570Sandrew	if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
3402287570Sandrew		pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
3403287570Sandrew		managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3404287570Sandrew		val = MINCORE_SUPER | MINCORE_INCORE;
3405287570Sandrew		if (pmap_page_dirty(l2))
3406287570Sandrew			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3407287570Sandrew		if ((l2 & ATTR_AF) == ATTR_AF)
3408287570Sandrew			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3409287570Sandrew		goto done;
3410287570Sandrew	}
3411287570Sandrew
3412287570Sandrew	l3p = pmap_l2_to_l3(l2p, addr);
3413287570Sandrew	if (l3p == NULL) /* No l3 */
3414287570Sandrew		goto done;
3415295425Swma
3416287570Sandrew	l3 = pmap_load(l2p);
3417295425Swma	if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
3418295425Swma		goto done;
3419295425Swma
3420287570Sandrew	if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
3421287570Sandrew		pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
3422287570Sandrew		managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3423287570Sandrew		val = MINCORE_INCORE;
3424287570Sandrew		if (pmap_page_dirty(l3))
3425287570Sandrew			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3426287570Sandrew		if ((l3 & ATTR_AF) == ATTR_AF)
3427287570Sandrew			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3428287570Sandrew	}
3429287570Sandrew
3430287570Sandrewdone:
3431287570Sandrew	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3432287570Sandrew	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
3433287570Sandrew		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3434287570Sandrew		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3435287570Sandrew			goto retry;
3436287570Sandrew	} else
3437287570Sandrew		PA_UNLOCK_COND(*locked_pa);
3438287570Sandrew	PMAP_UNLOCK(pmap);
3439287570Sandrew
3440287570Sandrew	return (val);
3441281494Sandrew}
3442281494Sandrew
3443281494Sandrewvoid
3444281494Sandrewpmap_activate(struct thread *td)
3445281494Sandrew{
3446281494Sandrew	pmap_t	pmap;
3447281494Sandrew
3448281494Sandrew	critical_enter();
3449281494Sandrew	pmap = vmspace_pmap(td->td_proc->p_vmspace);
3450297446Sandrew	td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
3451297446Sandrew	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
3452285212Sandrew	pmap_invalidate_all(pmap);
3453281494Sandrew	critical_exit();
3454281494Sandrew}
3455281494Sandrew
3456281494Sandrewvoid
3457287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
3458281494Sandrew{
3459281494Sandrew
3460287105Sandrew	if (va >= VM_MIN_KERNEL_ADDRESS) {
3461287105Sandrew		cpu_icache_sync_range(va, sz);
3462287105Sandrew	} else {
3463287105Sandrew		u_int len, offset;
3464287105Sandrew		vm_paddr_t pa;
3465287105Sandrew
3466287105Sandrew		/* Find the length of data in this page to flush */
3467287105Sandrew		offset = va & PAGE_MASK;
3468287105Sandrew		len = imin(PAGE_SIZE - offset, sz);
3469287105Sandrew
3470287105Sandrew		while (sz != 0) {
3471287105Sandrew			/* Extract the physical address & find it in the DMAP */
3472287105Sandrew			pa = pmap_extract(pmap, va);
3473287105Sandrew			if (pa != 0)
3474287105Sandrew				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
3475287105Sandrew
3476287105Sandrew			/* Move to the next page */
3477287105Sandrew			sz -= len;
3478287105Sandrew			va += len;
3479287105Sandrew			/* Set the length for the next iteration */
3480287105Sandrew			len = imin(PAGE_SIZE, sz);
3481287105Sandrew		}
3482287105Sandrew	}
3483281494Sandrew}
3484281494Sandrew
3485281494Sandrew/*
3486281494Sandrew *	Increase the starting virtual address of the given mapping if a
3487281494Sandrew *	different alignment might result in more superpage mappings.
3488281494Sandrew */
3489281494Sandrewvoid
3490281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3491281494Sandrew    vm_offset_t *addr, vm_size_t size)
3492281494Sandrew{
3493281494Sandrew}
3494281494Sandrew
3495281494Sandrew/**
3496281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are
3497281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping
3498281494Sandrew * that will be removed when calling pmap_unmap_io_transient.
3499281494Sandrew *
3500281494Sandrew * \param page        The pages the caller wishes to obtain the virtual
3501281494Sandrew *                    address on the kernel memory map.
3502281494Sandrew * \param vaddr       On return contains the kernel virtual memory address
3503281494Sandrew *                    of the pages passed in the page parameter.
3504281494Sandrew * \param count       Number of pages passed in.
3505281494Sandrew * \param can_fault   TRUE if the thread using the mapped pages can take
3506281494Sandrew *                    page faults, FALSE otherwise.
3507281494Sandrew *
3508281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when
3509281494Sandrew *          finished or FALSE otherwise.
3510281494Sandrew *
3511281494Sandrew */
3512281494Sandrewboolean_t
3513281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3514281494Sandrew    boolean_t can_fault)
3515281494Sandrew{
3516281494Sandrew	vm_paddr_t paddr;
3517281494Sandrew	boolean_t needs_mapping;
3518281494Sandrew	int error, i;
3519281494Sandrew
3520281494Sandrew	/*
3521281494Sandrew	 * Allocate any KVA space that we need, this is done in a separate
3522281494Sandrew	 * loop to prevent calling vmem_alloc while pinned.
3523281494Sandrew	 */
3524281494Sandrew	needs_mapping = FALSE;
3525281494Sandrew	for (i = 0; i < count; i++) {
3526281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
3527297617Sandrew		if (__predict_false(!PHYS_IN_DMAP(paddr))) {
3528281494Sandrew			error = vmem_alloc(kernel_arena, PAGE_SIZE,
3529281494Sandrew			    M_BESTFIT | M_WAITOK, &vaddr[i]);
3530281494Sandrew			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
3531281494Sandrew			needs_mapping = TRUE;
3532281494Sandrew		} else {
3533281494Sandrew			vaddr[i] = PHYS_TO_DMAP(paddr);
3534281494Sandrew		}
3535281494Sandrew	}
3536281494Sandrew
3537281494Sandrew	/* Exit early if everything is covered by the DMAP */
3538281494Sandrew	if (!needs_mapping)
3539281494Sandrew		return (FALSE);
3540281494Sandrew
3541281494Sandrew	if (!can_fault)
3542281494Sandrew		sched_pin();
3543281494Sandrew	for (i = 0; i < count; i++) {
3544281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
3545297617Sandrew		if (!PHYS_IN_DMAP(paddr)) {
3546281494Sandrew			panic(
3547281494Sandrew			   "pmap_map_io_transient: TODO: Map out of DMAP data");
3548281494Sandrew		}
3549281494Sandrew	}
3550281494Sandrew
3551281494Sandrew	return (needs_mapping);
3552281494Sandrew}
3553281494Sandrew
3554281494Sandrewvoid
3555281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3556281494Sandrew    boolean_t can_fault)
3557281494Sandrew{
3558281494Sandrew	vm_paddr_t paddr;
3559281494Sandrew	int i;
3560281494Sandrew
3561281494Sandrew	if (!can_fault)
3562281494Sandrew		sched_unpin();
3563281494Sandrew	for (i = 0; i < count; i++) {
3564281494Sandrew		paddr = VM_PAGE_TO_PHYS(page[i]);
3565297617Sandrew		if (!PHYS_IN_DMAP(paddr)) {
3566286073Semaste			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
3567281494Sandrew		}
3568281494Sandrew	}
3569281494Sandrew}
3570