pmap.c revision 295425
194575Sdes/*-
294575Sdes * Copyright (c) 1991 Regents of the University of California.
394575Sdes * All rights reserved.
494575Sdes * Copyright (c) 1994 John S. Dyson
594575Sdes * All rights reserved.
694575Sdes * Copyright (c) 1994 David Greenman
794575Sdes * All rights reserved.
894575Sdes * Copyright (c) 2003 Peter Wemm
994575Sdes * All rights reserved.
1094575Sdes * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
1194575Sdes * All rights reserved.
1294575Sdes * Copyright (c) 2014 Andrew Turner
1394575Sdes * All rights reserved.
1494575Sdes * Copyright (c) 2014 The FreeBSD Foundation
1594575Sdes * All rights reserved.
1694575Sdes *
1794575Sdes * This code is derived from software contributed to Berkeley by
1894575Sdes * the Systems Programming Group of the University of Utah Computer
1994575Sdes * Science Department and William Jolitz of UUNET Technologies Inc.
2094575Sdes *
2194575Sdes * This software was developed by Andrew Turner under sponsorship from
2294575Sdes * the FreeBSD Foundation.
2394575Sdes *
2494575Sdes * Redistribution and use in source and binary forms, with or without
2594575Sdes * modification, are permitted provided that the following conditions
2694575Sdes * are met:
2794575Sdes * 1. Redistributions of source code must retain the above copyright
2894575Sdes *    notice, this list of conditions and the following disclaimer.
2994575Sdes * 2. Redistributions in binary form must reproduce the above copyright
3094575Sdes *    notice, this list of conditions and the following disclaimer in the
3194575Sdes *    documentation and/or other materials provided with the distribution.
3294575Sdes * 3. All advertising materials mentioning features or use of this software
3394575Sdes *    must display the following acknowledgement:
3494575Sdes *	This product includes software developed by the University of
3594575Sdes *	California, Berkeley and its contributors.
3694575Sdes * 4. Neither the name of the University nor the names of its contributors
3794575Sdes *    may be used to endorse or promote products derived from this software
3894575Sdes *    without specific prior written permission.
3994575Sdes *
4094575Sdes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
4194575Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4294575Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4394575Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
4494575Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4594575Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4694575Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4794575Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4894575Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
4994575Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5094575Sdes * SUCH DAMAGE.
5194575Sdes *
52 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
53 */
54/*-
55 * Copyright (c) 2003 Networks Associates Technology, Inc.
56 * All rights reserved.
57 *
58 * This software was developed for the FreeBSD Project by Jake Burkholder,
59 * Safeport Network Services, and Network Associates Laboratories, the
60 * Security Research Division of Network Associates, Inc. under
61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62 * CHATS research program.
63 *
64 * Redistribution and use in source and binary forms, with or without
65 * modification, are permitted provided that the following conditions
66 * are met:
67 * 1. Redistributions of source code must retain the above copyright
68 *    notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 *    notice, this list of conditions and the following disclaimer in the
71 *    documentation and/or other materials provided with the distribution.
72 *
73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 */
85
86#include <sys/cdefs.h>
87__FBSDID("$FreeBSD: head/sys/arm64/arm64/pmap.c 295425 2016-02-09 06:26:27Z wma $");
88
89/*
90 *	Manages physical address maps.
91 *
92 *	Since the information managed by this module is
93 *	also stored by the logical address mapping module,
94 *	this module may throw away valid virtual-to-physical
95 *	mappings at almost any time.  However, invalidations
96 *	of virtual-to-physical mappings must be done as
97 *	requested.
98 *
99 *	In order to cope with hardware architectures which
100 *	make virtual-to-physical map invalidates expensive,
101 *	this module may delay invalidate or reduced protection
102 *	operations until such time as they are actually
103 *	necessary.  This module is given full information as
104 *	to which processors are currently using which maps,
105 *	and to when physical maps must be made correct.
106 */
107
108#include <sys/param.h>
109#include <sys/bus.h>
110#include <sys/systm.h>
111#include <sys/kernel.h>
112#include <sys/ktr.h>
113#include <sys/lock.h>
114#include <sys/malloc.h>
115#include <sys/mman.h>
116#include <sys/msgbuf.h>
117#include <sys/mutex.h>
118#include <sys/proc.h>
119#include <sys/rwlock.h>
120#include <sys/sx.h>
121#include <sys/vmem.h>
122#include <sys/vmmeter.h>
123#include <sys/sched.h>
124#include <sys/sysctl.h>
125#include <sys/_unrhdr.h>
126#include <sys/smp.h>
127
128#include <vm/vm.h>
129#include <vm/vm_param.h>
130#include <vm/vm_kern.h>
131#include <vm/vm_page.h>
132#include <vm/vm_map.h>
133#include <vm/vm_object.h>
134#include <vm/vm_extern.h>
135#include <vm/vm_pageout.h>
136#include <vm/vm_pager.h>
137#include <vm/vm_radix.h>
138#include <vm/vm_reserv.h>
139#include <vm/uma.h>
140
141#include <machine/machdep.h>
142#include <machine/md_var.h>
143#include <machine/pcb.h>
144
145#define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
146#define	NUPDE			(NPDEPG * NPDEPG)
147#define	NUSERPGTBLS		(NUPDE + NPDEPG)
148
149#if !defined(DIAGNOSTIC)
150#ifdef __GNUC_GNU_INLINE__
151#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
152#else
153#define PMAP_INLINE	extern inline
154#endif
155#else
156#define PMAP_INLINE
157#endif
158
159/*
160 * These are configured by the mair_el1 register. This is set up in locore.S
161 */
162#define	DEVICE_MEMORY	0
163#define	UNCACHED_MEMORY	1
164#define	CACHED_MEMORY	2
165
166
167#ifdef PV_STATS
168#define PV_STAT(x)	do { x ; } while (0)
169#else
170#define PV_STAT(x)	do { } while (0)
171#endif
172
173#define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
174
175#define	NPV_LIST_LOCKS	MAXCPU
176
177#define	PHYS_TO_PV_LIST_LOCK(pa)	\
178			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
179
180#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
181	struct rwlock **_lockp = (lockp);		\
182	struct rwlock *_new_lock;			\
183							\
184	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
185	if (_new_lock != *_lockp) {			\
186		if (*_lockp != NULL)			\
187			rw_wunlock(*_lockp);		\
188		*_lockp = _new_lock;			\
189		rw_wlock(*_lockp);			\
190	}						\
191} while (0)
192
193#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
194			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
195
196#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
197	struct rwlock **_lockp = (lockp);		\
198							\
199	if (*_lockp != NULL) {				\
200		rw_wunlock(*_lockp);			\
201		*_lockp = NULL;				\
202	}						\
203} while (0)
204
205#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
206			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
207
208struct pmap kernel_pmap_store;
209
210vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
211vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
212vm_offset_t kernel_vm_end = 0;
213
214struct msgbuf *msgbufp = NULL;
215
216static struct rwlock_padalign pvh_global_lock;
217
218vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
219
220/*
221 * Data for the pv entry allocation mechanism
222 */
223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224static struct mtx pv_chunks_mutex;
225static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
226
227static void	free_pv_chunk(struct pv_chunk *pc);
228static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
229static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
230static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
231static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
232static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
233		    vm_offset_t va);
234static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
235    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
236static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
237    pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
238static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
239    vm_page_t m, struct rwlock **lockp);
240
241static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
242		struct rwlock **lockp);
243
244static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
245    struct spglist *free);
246static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
247
248/*
249 * These load the old table data and store the new value.
250 * They need to be atomic as the System MMU may write to the table at
251 * the same time as the CPU.
252 */
253#define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
254#define	pmap_set(table, mask) atomic_set_64(table, mask)
255#define	pmap_load_clear(table) atomic_swap_64(table, 0)
256#define	pmap_load(table) (*table)
257
258/********************/
259/* Inline functions */
260/********************/
261
262static __inline void
263pagecopy(void *s, void *d)
264{
265
266	memcpy(d, s, PAGE_SIZE);
267}
268
269static __inline void
270pagezero(void *p)
271{
272
273	bzero(p, PAGE_SIZE);
274}
275
276#define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
277#define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
278#define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
279
280static __inline pd_entry_t *
281pmap_l1(pmap_t pmap, vm_offset_t va)
282{
283
284	return (&pmap->pm_l1[pmap_l1_index(va)]);
285}
286
287static __inline pd_entry_t *
288pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
289{
290	pd_entry_t *l2;
291
292	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
293	return (&l2[pmap_l2_index(va)]);
294}
295
296static __inline pd_entry_t *
297pmap_l2(pmap_t pmap, vm_offset_t va)
298{
299	pd_entry_t *l1;
300
301	l1 = pmap_l1(pmap, va);
302	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
303		return (NULL);
304
305	return (pmap_l1_to_l2(l1, va));
306}
307
308static __inline pt_entry_t *
309pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
310{
311	pt_entry_t *l3;
312
313	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
314	return (&l3[pmap_l3_index(va)]);
315}
316
317static __inline pt_entry_t *
318pmap_l3(pmap_t pmap, vm_offset_t va)
319{
320	pd_entry_t *l2;
321
322	l2 = pmap_l2(pmap, va);
323	if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
324		return (NULL);
325
326	return (pmap_l2_to_l3(l2, va));
327}
328
329bool
330pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2,
331    pt_entry_t **l3)
332{
333	pd_entry_t *l1p, *l2p;
334
335	if (pmap->pm_l1 == NULL)
336		return (false);
337
338	l1p = pmap_l1(pmap, va);
339	*l1 = l1p;
340
341	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
342		*l2 = NULL;
343		*l3 = NULL;
344		return (true);
345	}
346
347	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
348		return (false);
349
350	l2p = pmap_l1_to_l2(l1p, va);
351	*l2 = l2p;
352
353	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
354		*l3 = NULL;
355		return (true);
356	}
357
358	*l3 = pmap_l2_to_l3(l2p, va);
359
360	return (true);
361}
362
363static __inline int
364pmap_is_current(pmap_t pmap)
365{
366
367	return ((pmap == pmap_kernel()) ||
368	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
369}
370
371static __inline int
372pmap_l3_valid(pt_entry_t l3)
373{
374
375	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
376}
377
378static __inline int
379pmap_l3_valid_cacheable(pt_entry_t l3)
380{
381
382	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
383	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
384}
385
386#define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
387
388/*
389 * Checks if the page is dirty. We currently lack proper tracking of this on
390 * arm64 so for now assume is a page mapped as rw was accessed it is.
391 */
392static inline int
393pmap_page_dirty(pt_entry_t pte)
394{
395
396	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
397	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
398}
399
400static __inline void
401pmap_resident_count_inc(pmap_t pmap, int count)
402{
403
404	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
405	pmap->pm_stats.resident_count += count;
406}
407
408static __inline void
409pmap_resident_count_dec(pmap_t pmap, int count)
410{
411
412	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
413	KASSERT(pmap->pm_stats.resident_count >= count,
414	    ("pmap %p resident count underflow %ld %d", pmap,
415	    pmap->pm_stats.resident_count, count));
416	pmap->pm_stats.resident_count -= count;
417}
418
419static pt_entry_t *
420pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
421    u_int *l2_slot)
422{
423	pt_entry_t *l2;
424	pd_entry_t *l1;
425
426	l1 = (pd_entry_t *)l1pt;
427	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
428
429	/* Check locore has used a table L1 map */
430	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
431	   ("Invalid bootstrap L1 table"));
432	/* Find the address of the L2 table */
433	l2 = (pt_entry_t *)init_pt_va;
434	*l2_slot = pmap_l2_index(va);
435
436	return (l2);
437}
438
439static vm_paddr_t
440pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
441{
442	u_int l1_slot, l2_slot;
443	pt_entry_t *l2;
444
445	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
446
447	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
448}
449
450static void
451pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
452{
453	vm_offset_t va;
454	vm_paddr_t pa;
455	pd_entry_t *l1;
456	u_int l1_slot;
457
458	pa = dmap_phys_base = kernstart & ~L1_OFFSET;
459	va = DMAP_MIN_ADDRESS;
460	l1 = (pd_entry_t *)l1pt;
461	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
462
463	for (; va < DMAP_MAX_ADDRESS;
464	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
465		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
466
467		pmap_load_store(&l1[l1_slot],
468		    (pa & ~L1_OFFSET) | ATTR_DEFAULT |
469		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
470	}
471
472	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
473	cpu_tlb_flushID();
474}
475
476static vm_offset_t
477pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
478{
479	vm_offset_t l2pt;
480	vm_paddr_t pa;
481	pd_entry_t *l1;
482	u_int l1_slot;
483
484	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
485
486	l1 = (pd_entry_t *)l1pt;
487	l1_slot = pmap_l1_index(va);
488	l2pt = l2_start;
489
490	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
491		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
492
493		pa = pmap_early_vtophys(l1pt, l2pt);
494		pmap_load_store(&l1[l1_slot],
495		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
496		l2pt += PAGE_SIZE;
497	}
498
499	/* Clean the L2 page table */
500	memset((void *)l2_start, 0, l2pt - l2_start);
501	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
502
503	/* Flush the l1 table to ram */
504	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
505
506	return l2pt;
507}
508
509static vm_offset_t
510pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
511{
512	vm_offset_t l2pt, l3pt;
513	vm_paddr_t pa;
514	pd_entry_t *l2;
515	u_int l2_slot;
516
517	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
518
519	l2 = pmap_l2(kernel_pmap, va);
520	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
521	l2pt = (vm_offset_t)l2;
522	l2_slot = pmap_l2_index(va);
523	l3pt = l3_start;
524
525	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
526		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
527
528		pa = pmap_early_vtophys(l1pt, l3pt);
529		pmap_load_store(&l2[l2_slot],
530		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
531		l3pt += PAGE_SIZE;
532	}
533
534	/* Clean the L2 page table */
535	memset((void *)l3_start, 0, l3pt - l3_start);
536	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
537
538	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
539
540	return l3pt;
541}
542
543/*
544 *	Bootstrap the system enough to run with virtual memory.
545 */
546void
547pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
548{
549	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
550	uint64_t kern_delta;
551	pt_entry_t *l2;
552	vm_offset_t va, freemempos;
553	vm_offset_t dpcpu, msgbufpv;
554	vm_paddr_t pa, min_pa;
555	int i;
556
557	kern_delta = KERNBASE - kernstart;
558	physmem = 0;
559
560	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
561	printf("%lx\n", l1pt);
562	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
563
564	/* Set this early so we can use the pagetable walking functions */
565	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
566	PMAP_LOCK_INIT(kernel_pmap);
567
568 	/*
569	 * Initialize the global pv list lock.
570	 */
571	rw_init(&pvh_global_lock, "pmap pv global");
572
573	/* Assume the address we were loaded to is a valid physical address */
574	min_pa = KERNBASE - kern_delta;
575
576	/*
577	 * Find the minimum physical address. physmap is sorted,
578	 * but may contain empty ranges.
579	 */
580	for (i = 0; i < (physmap_idx * 2); i += 2) {
581		if (physmap[i] == physmap[i + 1])
582			continue;
583		if (physmap[i] <= min_pa)
584			min_pa = physmap[i];
585		break;
586	}
587
588	/* Create a direct map region early so we can use it for pa -> va */
589	pmap_bootstrap_dmap(l1pt, min_pa);
590
591	va = KERNBASE;
592	pa = KERNBASE - kern_delta;
593
594	/*
595	 * Start to initialise phys_avail by copying from physmap
596	 * up to the physical address KERNBASE points at.
597	 */
598	map_slot = avail_slot = 0;
599	for (; map_slot < (physmap_idx * 2) &&
600	    avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
601		if (physmap[map_slot] == physmap[map_slot + 1])
602			continue;
603
604		if (physmap[map_slot] <= pa &&
605		    physmap[map_slot + 1] > pa)
606			break;
607
608		phys_avail[avail_slot] = physmap[map_slot];
609		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
610		physmem += (phys_avail[avail_slot + 1] -
611		    phys_avail[avail_slot]) >> PAGE_SHIFT;
612		avail_slot += 2;
613	}
614
615	/* Add the memory before the kernel */
616	if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
617		phys_avail[avail_slot] = physmap[map_slot];
618		phys_avail[avail_slot + 1] = pa;
619		physmem += (phys_avail[avail_slot + 1] -
620		    phys_avail[avail_slot]) >> PAGE_SHIFT;
621		avail_slot += 2;
622	}
623	used_map_slot = map_slot;
624
625	/*
626	 * Read the page table to find out what is already mapped.
627	 * This assumes we have mapped a block of memory from KERNBASE
628	 * using a single L1 entry.
629	 */
630	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
631
632	/* Sanity check the index, KERNBASE should be the first VA */
633	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
634
635	/* Find how many pages we have mapped */
636	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
637		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
638			break;
639
640		/* Check locore used L2 blocks */
641		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
642		    ("Invalid bootstrap L2 table"));
643		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
644		    ("Incorrect PA in L2 table"));
645
646		va += L2_SIZE;
647		pa += L2_SIZE;
648	}
649
650	va = roundup2(va, L1_SIZE);
651
652	freemempos = KERNBASE + kernlen;
653	freemempos = roundup2(freemempos, PAGE_SIZE);
654	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
655	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
656	/* And the l3 tables for the early devmap */
657	freemempos = pmap_bootstrap_l3(l1pt,
658	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
659
660	cpu_tlb_flushID();
661
662#define alloc_pages(var, np)						\
663	(var) = freemempos;						\
664	freemempos += (np * PAGE_SIZE);					\
665	memset((char *)(var), 0, ((np) * PAGE_SIZE));
666
667	/* Allocate dynamic per-cpu area. */
668	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
669	dpcpu_init((void *)dpcpu, 0);
670
671	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
672	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
673	msgbufp = (void *)msgbufpv;
674
675	virtual_avail = roundup2(freemempos, L1_SIZE);
676	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
677	kernel_vm_end = virtual_avail;
678
679	pa = pmap_early_vtophys(l1pt, freemempos);
680
681	/* Finish initialising physmap */
682	map_slot = used_map_slot;
683	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
684	    map_slot < (physmap_idx * 2); map_slot += 2) {
685		if (physmap[map_slot] == physmap[map_slot + 1])
686			continue;
687
688		/* Have we used the current range? */
689		if (physmap[map_slot + 1] <= pa)
690			continue;
691
692		/* Do we need to split the entry? */
693		if (physmap[map_slot] < pa) {
694			phys_avail[avail_slot] = pa;
695			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
696		} else {
697			phys_avail[avail_slot] = physmap[map_slot];
698			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
699		}
700		physmem += (phys_avail[avail_slot + 1] -
701		    phys_avail[avail_slot]) >> PAGE_SHIFT;
702
703		avail_slot += 2;
704	}
705	phys_avail[avail_slot] = 0;
706	phys_avail[avail_slot + 1] = 0;
707
708	/*
709	 * Maxmem isn't the "maximum memory", it's one larger than the
710	 * highest page of the physical address space.  It should be
711	 * called something like "Maxphyspage".
712	 */
713	Maxmem = atop(phys_avail[avail_slot - 1]);
714
715	cpu_tlb_flushID();
716}
717
718/*
719 *	Initialize a vm_page's machine-dependent fields.
720 */
721void
722pmap_page_init(vm_page_t m)
723{
724
725	TAILQ_INIT(&m->md.pv_list);
726	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
727}
728
729/*
730 *	Initialize the pmap module.
731 *	Called by vm_init, to initialize any structures that the pmap
732 *	system needs to map virtual memory.
733 */
734void
735pmap_init(void)
736{
737	int i;
738
739	/*
740	 * Initialize the pv chunk list mutex.
741	 */
742	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
743
744	/*
745	 * Initialize the pool of pv list locks.
746	 */
747	for (i = 0; i < NPV_LIST_LOCKS; i++)
748		rw_init(&pv_list_locks[i], "pmap pv list");
749}
750
751/*
752 * Normal, non-SMP, invalidation functions.
753 * We inline these within pmap.c for speed.
754 */
755PMAP_INLINE void
756pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
757{
758
759	sched_pin();
760	__asm __volatile(
761	    "dsb  sy		\n"
762	    "tlbi vaae1is, %0	\n"
763	    "dsb  sy		\n"
764	    "isb		\n"
765	    : : "r"(va >> PAGE_SHIFT));
766	sched_unpin();
767}
768
769PMAP_INLINE void
770pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
771{
772	vm_offset_t addr;
773
774	sched_pin();
775	sva >>= PAGE_SHIFT;
776	eva >>= PAGE_SHIFT;
777	__asm __volatile("dsb	sy");
778	for (addr = sva; addr < eva; addr++) {
779		__asm __volatile(
780		    "tlbi vaae1is, %0" : : "r"(addr));
781	}
782	__asm __volatile(
783	    "dsb  sy	\n"
784	    "isb	\n");
785	sched_unpin();
786}
787
788PMAP_INLINE void
789pmap_invalidate_all(pmap_t pmap)
790{
791
792	sched_pin();
793	__asm __volatile(
794	    "dsb  sy		\n"
795	    "tlbi vmalle1is	\n"
796	    "dsb  sy		\n"
797	    "isb		\n");
798	sched_unpin();
799}
800
801/*
802 *	Routine:	pmap_extract
803 *	Function:
804 *		Extract the physical page address associated
805 *		with the given map/virtual_address pair.
806 */
807vm_paddr_t
808pmap_extract(pmap_t pmap, vm_offset_t va)
809{
810	pd_entry_t *l2p, l2;
811	pt_entry_t *l3p, l3;
812	vm_paddr_t pa;
813
814	pa = 0;
815	PMAP_LOCK(pmap);
816	/*
817	 * Start with the l2 tabel. We are unable to allocate
818	 * pages in the l1 table.
819	 */
820	l2p = pmap_l2(pmap, va);
821	if (l2p != NULL) {
822		l2 = pmap_load(l2p);
823		if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) {
824			l3p = pmap_l2_to_l3(l2p, va);
825			if (l3p != NULL) {
826				l3 = pmap_load(l3p);
827
828				if ((l3 & ATTR_DESCR_MASK) == L3_PAGE)
829					pa = (l3 & ~ATTR_MASK) |
830					    (va & L3_OFFSET);
831			}
832		} else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
833			pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET);
834	}
835	PMAP_UNLOCK(pmap);
836	return (pa);
837}
838
839/*
840 *	Routine:	pmap_extract_and_hold
841 *	Function:
842 *		Atomically extract and hold the physical page
843 *		with the given pmap and virtual address pair
844 *		if that mapping permits the given protection.
845 */
846vm_page_t
847pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
848{
849	pt_entry_t *l3p, l3;
850	vm_paddr_t pa;
851	vm_page_t m;
852
853	pa = 0;
854	m = NULL;
855	PMAP_LOCK(pmap);
856retry:
857	l3p = pmap_l3(pmap, va);
858	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
859		if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
860		    ((prot & VM_PROT_WRITE) == 0)) {
861			if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa))
862				goto retry;
863			m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK);
864			vm_page_hold(m);
865		}
866	}
867	PA_UNLOCK_COND(pa);
868	PMAP_UNLOCK(pmap);
869	return (m);
870}
871
872vm_paddr_t
873pmap_kextract(vm_offset_t va)
874{
875	pd_entry_t *l2p, l2;
876	pt_entry_t *l3;
877	vm_paddr_t pa;
878
879	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
880		pa = DMAP_TO_PHYS(va);
881	} else {
882		l2p = pmap_l2(kernel_pmap, va);
883		if (l2p == NULL)
884			panic("pmap_kextract: No l2");
885		l2 = pmap_load(l2p);
886		if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
887			return ((l2 & ~ATTR_MASK) |
888			    (va & L2_OFFSET));
889
890		l3 = pmap_l2_to_l3(l2p, va);
891		if (l3 == NULL)
892			panic("pmap_kextract: No l3...");
893		pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK);
894	}
895	return (pa);
896}
897
898/***************************************************
899 * Low level mapping routines.....
900 ***************************************************/
901
902void
903pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
904{
905	pt_entry_t *l3;
906	vm_offset_t va;
907
908	KASSERT((pa & L3_OFFSET) == 0,
909	   ("pmap_kenter_device: Invalid physical address"));
910	KASSERT((sva & L3_OFFSET) == 0,
911	   ("pmap_kenter_device: Invalid virtual address"));
912	KASSERT((size & PAGE_MASK) == 0,
913	    ("pmap_kenter_device: Mapping is not page-sized"));
914
915	va = sva;
916	while (size != 0) {
917		l3 = pmap_l3(kernel_pmap, va);
918		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
919		pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
920		    ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
921		PTE_SYNC(l3);
922
923		va += PAGE_SIZE;
924		pa += PAGE_SIZE;
925		size -= PAGE_SIZE;
926	}
927	pmap_invalidate_range(kernel_pmap, sva, va);
928}
929
930/*
931 * Remove a page from the kernel pagetables.
932 * Note: not SMP coherent.
933 */
934PMAP_INLINE void
935pmap_kremove(vm_offset_t va)
936{
937	pt_entry_t *l3;
938
939	l3 = pmap_l3(kernel_pmap, va);
940	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
941
942	if (pmap_l3_valid_cacheable(pmap_load(l3)))
943		cpu_dcache_wb_range(va, L3_SIZE);
944	pmap_load_clear(l3);
945	PTE_SYNC(l3);
946	pmap_invalidate_page(kernel_pmap, va);
947}
948
949void
950pmap_kremove_device(vm_offset_t sva, vm_size_t size)
951{
952	pt_entry_t *l3;
953	vm_offset_t va;
954
955	KASSERT((sva & L3_OFFSET) == 0,
956	   ("pmap_kremove_device: Invalid virtual address"));
957	KASSERT((size & PAGE_MASK) == 0,
958	    ("pmap_kremove_device: Mapping is not page-sized"));
959
960	va = sva;
961	while (size != 0) {
962		l3 = pmap_l3(kernel_pmap, va);
963		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
964		pmap_load_clear(l3);
965		PTE_SYNC(l3);
966
967		va += PAGE_SIZE;
968		size -= PAGE_SIZE;
969	}
970	pmap_invalidate_range(kernel_pmap, sva, va);
971}
972
973/*
974 *	Used to map a range of physical addresses into kernel
975 *	virtual address space.
976 *
977 *	The value passed in '*virt' is a suggested virtual address for
978 *	the mapping. Architectures which can support a direct-mapped
979 *	physical to virtual region can return the appropriate address
980 *	within that region, leaving '*virt' unchanged. Other
981 *	architectures should map the pages starting at '*virt' and
982 *	update '*virt' with the first usable address after the mapped
983 *	region.
984 */
985vm_offset_t
986pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
987{
988	return PHYS_TO_DMAP(start);
989}
990
991
992/*
993 * Add a list of wired pages to the kva
994 * this routine is only used for temporary
995 * kernel mappings that do not need to have
996 * page modification or references recorded.
997 * Note that old mappings are simply written
998 * over.  The page *must* be wired.
999 * Note: SMP coherent.  Uses a ranged shootdown IPI.
1000 */
1001void
1002pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
1003{
1004	pt_entry_t *l3, pa;
1005	vm_offset_t va;
1006	vm_page_t m;
1007	int i;
1008
1009	va = sva;
1010	for (i = 0; i < count; i++) {
1011		m = ma[i];
1012		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
1013		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
1014		l3 = pmap_l3(kernel_pmap, va);
1015		pmap_load_store(l3, pa);
1016		PTE_SYNC(l3);
1017
1018		va += L3_SIZE;
1019	}
1020	pmap_invalidate_range(kernel_pmap, sva, va);
1021}
1022
1023/*
1024 * This routine tears out page mappings from the
1025 * kernel -- it is meant only for temporary mappings.
1026 * Note: SMP coherent.  Uses a ranged shootdown IPI.
1027 */
1028void
1029pmap_qremove(vm_offset_t sva, int count)
1030{
1031	pt_entry_t *l3;
1032	vm_offset_t va;
1033
1034	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1035
1036	va = sva;
1037	while (count-- > 0) {
1038		l3 = pmap_l3(kernel_pmap, va);
1039		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
1040
1041		if (pmap_l3_valid_cacheable(pmap_load(l3)))
1042			cpu_dcache_wb_range(va, L3_SIZE);
1043		pmap_load_clear(l3);
1044		PTE_SYNC(l3);
1045
1046		va += PAGE_SIZE;
1047	}
1048	pmap_invalidate_range(kernel_pmap, sva, va);
1049}
1050
1051/***************************************************
1052 * Page table page management routines.....
1053 ***************************************************/
1054static __inline void
1055pmap_free_zero_pages(struct spglist *free)
1056{
1057	vm_page_t m;
1058
1059	while ((m = SLIST_FIRST(free)) != NULL) {
1060		SLIST_REMOVE_HEAD(free, plinks.s.ss);
1061		/* Preserve the page's PG_ZERO setting. */
1062		vm_page_free_toq(m);
1063	}
1064}
1065
1066/*
1067 * Schedule the specified unused page table page to be freed.  Specifically,
1068 * add the page to the specified list of pages that will be released to the
1069 * physical memory manager after the TLB has been updated.
1070 */
1071static __inline void
1072pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1073    boolean_t set_PG_ZERO)
1074{
1075
1076	if (set_PG_ZERO)
1077		m->flags |= PG_ZERO;
1078	else
1079		m->flags &= ~PG_ZERO;
1080	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1081}
1082
1083/*
1084 * Decrements a page table page's wire count, which is used to record the
1085 * number of valid page table entries within the page.  If the wire count
1086 * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1087 * page table page was unmapped and FALSE otherwise.
1088 */
1089static inline boolean_t
1090pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1091{
1092
1093	--m->wire_count;
1094	if (m->wire_count == 0) {
1095		_pmap_unwire_l3(pmap, va, m, free);
1096		return (TRUE);
1097	} else
1098		return (FALSE);
1099}
1100
1101static void
1102_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1103{
1104
1105	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1106	/*
1107	 * unmap the page table page
1108	 */
1109	if (m->pindex >= NUPDE) {
1110		/* PD page */
1111		pd_entry_t *l1;
1112		l1 = pmap_l1(pmap, va);
1113		pmap_load_clear(l1);
1114		PTE_SYNC(l1);
1115	} else {
1116		/* PTE page */
1117		pd_entry_t *l2;
1118		l2 = pmap_l2(pmap, va);
1119		pmap_load_clear(l2);
1120		PTE_SYNC(l2);
1121	}
1122	pmap_resident_count_dec(pmap, 1);
1123	if (m->pindex < NUPDE) {
1124		/* We just released a PT, unhold the matching PD */
1125		vm_page_t pdpg;
1126
1127		pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK);
1128		pmap_unwire_l3(pmap, va, pdpg, free);
1129	}
1130	pmap_invalidate_page(pmap, va);
1131
1132	/*
1133	 * This is a release store so that the ordinary store unmapping
1134	 * the page table page is globally performed before TLB shoot-
1135	 * down is begun.
1136	 */
1137	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1138
1139	/*
1140	 * Put page on a list so that it is released after
1141	 * *ALL* TLB shootdown is done
1142	 */
1143	pmap_add_delayed_free_list(m, free, TRUE);
1144}
1145
1146/*
1147 * After removing an l3 entry, this routine is used to
1148 * conditionally free the page, and manage the hold/wire counts.
1149 */
1150static int
1151pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1152    struct spglist *free)
1153{
1154	vm_page_t mpte;
1155
1156	if (va >= VM_MAXUSER_ADDRESS)
1157		return (0);
1158	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1159	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1160	return (pmap_unwire_l3(pmap, va, mpte, free));
1161}
1162
1163void
1164pmap_pinit0(pmap_t pmap)
1165{
1166
1167	PMAP_LOCK_INIT(pmap);
1168	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1169	pmap->pm_l1 = kernel_pmap->pm_l1;
1170}
1171
1172int
1173pmap_pinit(pmap_t pmap)
1174{
1175	vm_paddr_t l1phys;
1176	vm_page_t l1pt;
1177
1178	/*
1179	 * allocate the l1 page
1180	 */
1181	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
1182	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1183		VM_WAIT;
1184
1185	l1phys = VM_PAGE_TO_PHYS(l1pt);
1186	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
1187
1188	if ((l1pt->flags & PG_ZERO) == 0)
1189		pagezero(pmap->pm_l1);
1190
1191	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1192
1193	return (1);
1194}
1195
1196/*
1197 * This routine is called if the desired page table page does not exist.
1198 *
1199 * If page table page allocation fails, this routine may sleep before
1200 * returning NULL.  It sleeps only if a lock pointer was given.
1201 *
1202 * Note: If a page allocation fails at page table level two or three,
1203 * one or two pages may be held during the wait, only to be released
1204 * afterwards.  This conservative approach is easily argued to avoid
1205 * race conditions.
1206 */
1207static vm_page_t
1208_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1209{
1210	vm_page_t m, /*pdppg, */pdpg;
1211
1212	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1213
1214	/*
1215	 * Allocate a page table page.
1216	 */
1217	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1218	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1219		if (lockp != NULL) {
1220			RELEASE_PV_LIST_LOCK(lockp);
1221			PMAP_UNLOCK(pmap);
1222			rw_runlock(&pvh_global_lock);
1223			VM_WAIT;
1224			rw_rlock(&pvh_global_lock);
1225			PMAP_LOCK(pmap);
1226		}
1227
1228		/*
1229		 * Indicate the need to retry.  While waiting, the page table
1230		 * page may have been allocated.
1231		 */
1232		return (NULL);
1233	}
1234	if ((m->flags & PG_ZERO) == 0)
1235		pmap_zero_page(m);
1236
1237	/*
1238	 * Map the pagetable page into the process address space, if
1239	 * it isn't already there.
1240	 */
1241
1242	if (ptepindex >= NUPDE) {
1243		pd_entry_t *l1;
1244		vm_pindex_t l1index;
1245
1246		l1index = ptepindex - NUPDE;
1247		l1 = &pmap->pm_l1[l1index];
1248		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1249		PTE_SYNC(l1);
1250
1251	} else {
1252		vm_pindex_t l1index;
1253		pd_entry_t *l1, *l2;
1254
1255		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
1256		l1 = &pmap->pm_l1[l1index];
1257		if (pmap_load(l1) == 0) {
1258			/* recurse for allocating page dir */
1259			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
1260			    lockp) == NULL) {
1261				--m->wire_count;
1262				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1263				vm_page_free_zero(m);
1264				return (NULL);
1265			}
1266		} else {
1267			pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
1268			pdpg->wire_count++;
1269		}
1270
1271		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
1272		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1273		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
1274		PTE_SYNC(l2);
1275	}
1276
1277	pmap_resident_count_inc(pmap, 1);
1278
1279	return (m);
1280}
1281
1282static vm_page_t
1283pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1284{
1285	vm_pindex_t ptepindex;
1286	pd_entry_t *l2;
1287	vm_page_t m;
1288
1289	/*
1290	 * Calculate pagetable page index
1291	 */
1292	ptepindex = pmap_l2_pindex(va);
1293retry:
1294	/*
1295	 * Get the page directory entry
1296	 */
1297	l2 = pmap_l2(pmap, va);
1298
1299	/*
1300	 * If the page table page is mapped, we just increment the
1301	 * hold count, and activate it.
1302	 */
1303	if (l2 != NULL && pmap_load(l2) != 0) {
1304		m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
1305		m->wire_count++;
1306	} else {
1307		/*
1308		 * Here if the pte page isn't mapped, or if it has been
1309		 * deallocated.
1310		 */
1311		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1312		if (m == NULL && lockp != NULL)
1313			goto retry;
1314	}
1315	return (m);
1316}
1317
1318
1319/***************************************************
1320 * Pmap allocation/deallocation routines.
1321 ***************************************************/
1322
1323/*
1324 * Release any resources held by the given physical map.
1325 * Called when a pmap initialized by pmap_pinit is being released.
1326 * Should only be called if the map contains no valid mappings.
1327 */
1328void
1329pmap_release(pmap_t pmap)
1330{
1331	vm_page_t m;
1332
1333	KASSERT(pmap->pm_stats.resident_count == 0,
1334	    ("pmap_release: pmap resident count %ld != 0",
1335	    pmap->pm_stats.resident_count));
1336
1337	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
1338
1339	m->wire_count--;
1340	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1341	vm_page_free_zero(m);
1342}
1343
1344#if 0
1345static int
1346kvm_size(SYSCTL_HANDLER_ARGS)
1347{
1348	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1349
1350	return sysctl_handle_long(oidp, &ksize, 0, req);
1351}
1352SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1353    0, 0, kvm_size, "LU", "Size of KVM");
1354
1355static int
1356kvm_free(SYSCTL_HANDLER_ARGS)
1357{
1358	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1359
1360	return sysctl_handle_long(oidp, &kfree, 0, req);
1361}
1362SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1363    0, 0, kvm_free, "LU", "Amount of KVM free");
1364#endif /* 0 */
1365
1366/*
1367 * grow the number of kernel page table entries, if needed
1368 */
1369void
1370pmap_growkernel(vm_offset_t addr)
1371{
1372	vm_paddr_t paddr;
1373	vm_page_t nkpg;
1374	pd_entry_t *l1, *l2;
1375
1376	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1377
1378	addr = roundup2(addr, L2_SIZE);
1379	if (addr - 1 >= kernel_map->max_offset)
1380		addr = kernel_map->max_offset;
1381	while (kernel_vm_end < addr) {
1382		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
1383		if (pmap_load(l1) == 0) {
1384			/* We need a new PDP entry */
1385			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1386			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1387			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1388			if (nkpg == NULL)
1389				panic("pmap_growkernel: no memory to grow kernel");
1390			if ((nkpg->flags & PG_ZERO) == 0)
1391				pmap_zero_page(nkpg);
1392			paddr = VM_PAGE_TO_PHYS(nkpg);
1393			pmap_load_store(l1, paddr | L1_TABLE);
1394			PTE_SYNC(l1);
1395			continue; /* try again */
1396		}
1397		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1398		if ((pmap_load(l2) & ATTR_AF) != 0) {
1399			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1400			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1401				kernel_vm_end = kernel_map->max_offset;
1402				break;
1403			}
1404			continue;
1405		}
1406
1407		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1408		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1409		    VM_ALLOC_ZERO);
1410		if (nkpg == NULL)
1411			panic("pmap_growkernel: no memory to grow kernel");
1412		if ((nkpg->flags & PG_ZERO) == 0)
1413			pmap_zero_page(nkpg);
1414		paddr = VM_PAGE_TO_PHYS(nkpg);
1415		pmap_load_store(l2, paddr | L2_TABLE);
1416		PTE_SYNC(l2);
1417		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1418
1419		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1420		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1421			kernel_vm_end = kernel_map->max_offset;
1422			break;
1423		}
1424	}
1425}
1426
1427
1428/***************************************************
1429 * page management routines.
1430 ***************************************************/
1431
1432CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1433CTASSERT(_NPCM == 3);
1434CTASSERT(_NPCPV == 168);
1435
1436static __inline struct pv_chunk *
1437pv_to_chunk(pv_entry_t pv)
1438{
1439
1440	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1441}
1442
1443#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1444
1445#define	PC_FREE0	0xfffffffffffffffful
1446#define	PC_FREE1	0xfffffffffffffffful
1447#define	PC_FREE2	0x000000fffffffffful
1448
1449static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1450
1451#if 0
1452#ifdef PV_STATS
1453static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1454
1455SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1456	"Current number of pv entry chunks");
1457SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1458	"Current number of pv entry chunks allocated");
1459SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1460	"Current number of pv entry chunks frees");
1461SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1462	"Number of times tried to get a chunk page but failed.");
1463
1464static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1465static int pv_entry_spare;
1466
1467SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1468	"Current number of pv entry frees");
1469SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1470	"Current number of pv entry allocs");
1471SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1472	"Current number of pv entries");
1473SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1474	"Current number of spare pv entries");
1475#endif
1476#endif /* 0 */
1477
1478/*
1479 * We are in a serious low memory condition.  Resort to
1480 * drastic measures to free some pages so we can allocate
1481 * another pv entry chunk.
1482 *
1483 * Returns NULL if PV entries were reclaimed from the specified pmap.
1484 *
1485 * We do not, however, unmap 2mpages because subsequent accesses will
1486 * allocate per-page pv entries until repromotion occurs, thereby
1487 * exacerbating the shortage of free pv entries.
1488 */
1489static vm_page_t
1490reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1491{
1492
1493	panic("ARM64TODO: reclaim_pv_chunk");
1494}
1495
1496/*
1497 * free the pv_entry back to the free list
1498 */
1499static void
1500free_pv_entry(pmap_t pmap, pv_entry_t pv)
1501{
1502	struct pv_chunk *pc;
1503	int idx, field, bit;
1504
1505	rw_assert(&pvh_global_lock, RA_LOCKED);
1506	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1507	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1508	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1509	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1510	pc = pv_to_chunk(pv);
1511	idx = pv - &pc->pc_pventry[0];
1512	field = idx / 64;
1513	bit = idx % 64;
1514	pc->pc_map[field] |= 1ul << bit;
1515	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1516	    pc->pc_map[2] != PC_FREE2) {
1517		/* 98% of the time, pc is already at the head of the list. */
1518		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1519			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1520			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1521		}
1522		return;
1523	}
1524	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1525	free_pv_chunk(pc);
1526}
1527
1528static void
1529free_pv_chunk(struct pv_chunk *pc)
1530{
1531	vm_page_t m;
1532
1533	mtx_lock(&pv_chunks_mutex);
1534 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1535	mtx_unlock(&pv_chunks_mutex);
1536	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1537	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1538	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1539	/* entire chunk is free, return it */
1540	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1541	dump_drop_page(m->phys_addr);
1542	vm_page_unwire(m, PQ_NONE);
1543	vm_page_free(m);
1544}
1545
1546/*
1547 * Returns a new PV entry, allocating a new PV chunk from the system when
1548 * needed.  If this PV chunk allocation fails and a PV list lock pointer was
1549 * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
1550 * returned.
1551 *
1552 * The given PV list lock may be released.
1553 */
1554static pv_entry_t
1555get_pv_entry(pmap_t pmap, struct rwlock **lockp)
1556{
1557	int bit, field;
1558	pv_entry_t pv;
1559	struct pv_chunk *pc;
1560	vm_page_t m;
1561
1562	rw_assert(&pvh_global_lock, RA_LOCKED);
1563	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1564	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1565retry:
1566	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1567	if (pc != NULL) {
1568		for (field = 0; field < _NPCM; field++) {
1569			if (pc->pc_map[field]) {
1570				bit = ffsl(pc->pc_map[field]) - 1;
1571				break;
1572			}
1573		}
1574		if (field < _NPCM) {
1575			pv = &pc->pc_pventry[field * 64 + bit];
1576			pc->pc_map[field] &= ~(1ul << bit);
1577			/* If this was the last item, move it to tail */
1578			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1579			    pc->pc_map[2] == 0) {
1580				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1581				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1582				    pc_list);
1583			}
1584			PV_STAT(atomic_add_long(&pv_entry_count, 1));
1585			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1586			return (pv);
1587		}
1588	}
1589	/* No free items, allocate another chunk */
1590	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1591	    VM_ALLOC_WIRED);
1592	if (m == NULL) {
1593		if (lockp == NULL) {
1594			PV_STAT(pc_chunk_tryfail++);
1595			return (NULL);
1596		}
1597		m = reclaim_pv_chunk(pmap, lockp);
1598		if (m == NULL)
1599			goto retry;
1600	}
1601	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1602	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1603	dump_add_page(m->phys_addr);
1604	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1605	pc->pc_pmap = pmap;
1606	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
1607	pc->pc_map[1] = PC_FREE1;
1608	pc->pc_map[2] = PC_FREE2;
1609	mtx_lock(&pv_chunks_mutex);
1610	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1611	mtx_unlock(&pv_chunks_mutex);
1612	pv = &pc->pc_pventry[0];
1613	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1614	PV_STAT(atomic_add_long(&pv_entry_count, 1));
1615	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1616	return (pv);
1617}
1618
1619/*
1620 * First find and then remove the pv entry for the specified pmap and virtual
1621 * address from the specified pv list.  Returns the pv entry if found and NULL
1622 * otherwise.  This operation can be performed on pv lists for either 4KB or
1623 * 2MB page mappings.
1624 */
1625static __inline pv_entry_t
1626pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1627{
1628	pv_entry_t pv;
1629
1630	rw_assert(&pvh_global_lock, RA_LOCKED);
1631	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1632		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1633			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1634			pvh->pv_gen++;
1635			break;
1636		}
1637	}
1638	return (pv);
1639}
1640
1641/*
1642 * First find and then destroy the pv entry for the specified pmap and virtual
1643 * address.  This operation can be performed on pv lists for either 4KB or 2MB
1644 * page mappings.
1645 */
1646static void
1647pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1648{
1649	pv_entry_t pv;
1650
1651	pv = pmap_pvh_remove(pvh, pmap, va);
1652	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1653	free_pv_entry(pmap, pv);
1654}
1655
1656/*
1657 * Conditionally create the PV entry for a 4KB page mapping if the required
1658 * memory can be allocated without resorting to reclamation.
1659 */
1660static boolean_t
1661pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1662    struct rwlock **lockp)
1663{
1664	pv_entry_t pv;
1665
1666	rw_assert(&pvh_global_lock, RA_LOCKED);
1667	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1668	/* Pass NULL instead of the lock pointer to disable reclamation. */
1669	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1670		pv->pv_va = va;
1671		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1672		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1673		m->md.pv_gen++;
1674		return (TRUE);
1675	} else
1676		return (FALSE);
1677}
1678
1679/*
1680 * pmap_remove_l3: do the things to unmap a page in a process
1681 */
1682static int
1683pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1684    pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1685{
1686	pt_entry_t old_l3;
1687	vm_page_t m;
1688
1689	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1690	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1691		cpu_dcache_wb_range(va, L3_SIZE);
1692	old_l3 = pmap_load_clear(l3);
1693	PTE_SYNC(l3);
1694	pmap_invalidate_page(pmap, va);
1695	if (old_l3 & ATTR_SW_WIRED)
1696		pmap->pm_stats.wired_count -= 1;
1697	pmap_resident_count_dec(pmap, 1);
1698	if (old_l3 & ATTR_SW_MANAGED) {
1699		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
1700		if (pmap_page_dirty(old_l3))
1701			vm_page_dirty(m);
1702		if (old_l3 & ATTR_AF)
1703			vm_page_aflag_set(m, PGA_REFERENCED);
1704		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1705		pmap_pvh_free(&m->md, pmap, va);
1706	}
1707	return (pmap_unuse_l3(pmap, va, l2e, free));
1708}
1709
1710/*
1711 *	Remove the given range of addresses from the specified map.
1712 *
1713 *	It is assumed that the start and end are properly
1714 *	rounded to the page size.
1715 */
1716void
1717pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1718{
1719	struct rwlock *lock;
1720	vm_offset_t va, va_next;
1721	pd_entry_t *l1, *l2;
1722	pt_entry_t l3_paddr, *l3;
1723	struct spglist free;
1724	int anyvalid;
1725
1726	/*
1727	 * Perform an unsynchronized read.  This is, however, safe.
1728	 */
1729	if (pmap->pm_stats.resident_count == 0)
1730		return;
1731
1732	anyvalid = 0;
1733	SLIST_INIT(&free);
1734
1735	rw_rlock(&pvh_global_lock);
1736	PMAP_LOCK(pmap);
1737
1738	lock = NULL;
1739	for (; sva < eva; sva = va_next) {
1740
1741		if (pmap->pm_stats.resident_count == 0)
1742			break;
1743
1744		l1 = pmap_l1(pmap, sva);
1745		if (pmap_load(l1) == 0) {
1746			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1747			if (va_next < sva)
1748				va_next = eva;
1749			continue;
1750		}
1751
1752		/*
1753		 * Calculate index for next page table.
1754		 */
1755		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1756		if (va_next < sva)
1757			va_next = eva;
1758
1759		l2 = pmap_l1_to_l2(l1, sva);
1760		if (l2 == NULL)
1761			continue;
1762
1763		l3_paddr = pmap_load(l2);
1764
1765		/*
1766		 * Weed out invalid mappings.
1767		 */
1768		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
1769			continue;
1770
1771		/*
1772		 * Limit our scan to either the end of the va represented
1773		 * by the current page table page, or to the end of the
1774		 * range being removed.
1775		 */
1776		if (va_next > eva)
1777			va_next = eva;
1778
1779		va = va_next;
1780		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
1781		    sva += L3_SIZE) {
1782			if (l3 == NULL)
1783				panic("l3 == NULL");
1784			if (pmap_load(l3) == 0) {
1785				if (va != va_next) {
1786					pmap_invalidate_range(pmap, va, sva);
1787					va = va_next;
1788				}
1789				continue;
1790			}
1791			if (va == va_next)
1792				va = sva;
1793			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
1794			    &lock)) {
1795				sva += L3_SIZE;
1796				break;
1797			}
1798		}
1799		if (va != va_next)
1800			pmap_invalidate_range(pmap, va, sva);
1801	}
1802	if (lock != NULL)
1803		rw_wunlock(lock);
1804	if (anyvalid)
1805		pmap_invalidate_all(pmap);
1806	rw_runlock(&pvh_global_lock);
1807	PMAP_UNLOCK(pmap);
1808	pmap_free_zero_pages(&free);
1809}
1810
1811/*
1812 *	Routine:	pmap_remove_all
1813 *	Function:
1814 *		Removes this physical page from
1815 *		all physical maps in which it resides.
1816 *		Reflects back modify bits to the pager.
1817 *
1818 *	Notes:
1819 *		Original versions of this routine were very
1820 *		inefficient because they iteratively called
1821 *		pmap_remove (slow...)
1822 */
1823
1824void
1825pmap_remove_all(vm_page_t m)
1826{
1827	pv_entry_t pv;
1828	pmap_t pmap;
1829	pt_entry_t *l3, tl3;
1830	pd_entry_t *l2, tl2;
1831	struct spglist free;
1832
1833	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1834	    ("pmap_remove_all: page %p is not managed", m));
1835	SLIST_INIT(&free);
1836	rw_wlock(&pvh_global_lock);
1837	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1838		pmap = PV_PMAP(pv);
1839		PMAP_LOCK(pmap);
1840		pmap_resident_count_dec(pmap, 1);
1841		l2 = pmap_l2(pmap, pv->pv_va);
1842		KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
1843		tl2 = pmap_load(l2);
1844		KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE,
1845		    ("pmap_remove_all: found a table when expecting "
1846		     "a block in %p's pv list", m));
1847		l3 = pmap_l2_to_l3(l2, pv->pv_va);
1848		if (pmap_is_current(pmap) &&
1849		    pmap_l3_valid_cacheable(pmap_load(l3)))
1850			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
1851		tl3 = pmap_load_clear(l3);
1852		PTE_SYNC(l3);
1853		pmap_invalidate_page(pmap, pv->pv_va);
1854		if (tl3 & ATTR_SW_WIRED)
1855			pmap->pm_stats.wired_count--;
1856		if ((tl3 & ATTR_AF) != 0)
1857			vm_page_aflag_set(m, PGA_REFERENCED);
1858
1859		/*
1860		 * Update the vm_page_t clean and reference bits.
1861		 */
1862		if (pmap_page_dirty(tl3))
1863			vm_page_dirty(m);
1864		pmap_unuse_l3(pmap, pv->pv_va, tl2, &free);
1865		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
1866		m->md.pv_gen++;
1867		free_pv_entry(pmap, pv);
1868		PMAP_UNLOCK(pmap);
1869	}
1870	vm_page_aflag_clear(m, PGA_WRITEABLE);
1871	rw_wunlock(&pvh_global_lock);
1872	pmap_free_zero_pages(&free);
1873}
1874
1875/*
1876 *	Set the physical protection on the
1877 *	specified range of this map as requested.
1878 */
1879void
1880pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1881{
1882	vm_offset_t va, va_next;
1883	pd_entry_t *l1, *l2;
1884	pt_entry_t *l3p, l3;
1885
1886	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1887		pmap_remove(pmap, sva, eva);
1888		return;
1889	}
1890
1891	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
1892		return;
1893
1894	PMAP_LOCK(pmap);
1895	for (; sva < eva; sva = va_next) {
1896
1897		l1 = pmap_l1(pmap, sva);
1898		if (pmap_load(l1) == 0) {
1899			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1900			if (va_next < sva)
1901				va_next = eva;
1902			continue;
1903		}
1904
1905		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1906		if (va_next < sva)
1907			va_next = eva;
1908
1909		l2 = pmap_l1_to_l2(l1, sva);
1910		if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
1911			continue;
1912
1913		if (va_next > eva)
1914			va_next = eva;
1915
1916		va = va_next;
1917		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
1918		    sva += L3_SIZE) {
1919			l3 = pmap_load(l3p);
1920			if (pmap_l3_valid(l3)) {
1921				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
1922				PTE_SYNC(l3p);
1923				/* XXX: Use pmap_invalidate_range */
1924				pmap_invalidate_page(pmap, va);
1925			}
1926		}
1927	}
1928	PMAP_UNLOCK(pmap);
1929
1930	/* TODO: Only invalidate entries we are touching */
1931	pmap_invalidate_all(pmap);
1932}
1933
1934/*
1935 *	Insert the given physical page (p) at
1936 *	the specified virtual address (v) in the
1937 *	target physical map with the protection requested.
1938 *
1939 *	If specified, the page will be wired down, meaning
1940 *	that the related pte can not be reclaimed.
1941 *
1942 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1943 *	or lose information.  That is, this routine must actually
1944 *	insert this page into the given map NOW.
1945 */
1946int
1947pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1948    u_int flags, int8_t psind __unused)
1949{
1950	struct rwlock *lock;
1951	pd_entry_t *l1, *l2;
1952	pt_entry_t new_l3, orig_l3;
1953	pt_entry_t *l3;
1954	pv_entry_t pv;
1955	vm_paddr_t opa, pa, l2_pa, l3_pa;
1956	vm_page_t mpte, om, l2_m, l3_m;
1957	boolean_t nosleep;
1958
1959	va = trunc_page(va);
1960	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1961		VM_OBJECT_ASSERT_LOCKED(m->object);
1962	pa = VM_PAGE_TO_PHYS(m);
1963	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
1964	    L3_PAGE);
1965	if ((prot & VM_PROT_WRITE) == 0)
1966		new_l3 |= ATTR_AP(ATTR_AP_RO);
1967	if ((flags & PMAP_ENTER_WIRED) != 0)
1968		new_l3 |= ATTR_SW_WIRED;
1969	if ((va >> 63) == 0)
1970		new_l3 |= ATTR_AP(ATTR_AP_USER);
1971
1972	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
1973
1974	mpte = NULL;
1975
1976	lock = NULL;
1977	rw_rlock(&pvh_global_lock);
1978	PMAP_LOCK(pmap);
1979
1980	if (va < VM_MAXUSER_ADDRESS) {
1981		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
1982		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
1983		if (mpte == NULL && nosleep) {
1984			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
1985			if (lock != NULL)
1986				rw_wunlock(lock);
1987			rw_runlock(&pvh_global_lock);
1988			PMAP_UNLOCK(pmap);
1989			return (KERN_RESOURCE_SHORTAGE);
1990		}
1991		l3 = pmap_l3(pmap, va);
1992	} else {
1993		l3 = pmap_l3(pmap, va);
1994		/* TODO: This is not optimal, but should mostly work */
1995		if (l3 == NULL) {
1996			l2 = pmap_l2(pmap, va);
1997
1998			if (l2 == NULL) {
1999				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2000				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2001				    VM_ALLOC_ZERO);
2002				if (l2_m == NULL)
2003					panic("pmap_enter: l2 pte_m == NULL");
2004				if ((l2_m->flags & PG_ZERO) == 0)
2005					pmap_zero_page(l2_m);
2006
2007				l2_pa = VM_PAGE_TO_PHYS(l2_m);
2008				l1 = pmap_l1(pmap, va);
2009				pmap_load_store(l1, l2_pa | L1_TABLE);
2010				PTE_SYNC(l1);
2011				l2 = pmap_l1_to_l2(l1, va);
2012			}
2013
2014			KASSERT(l2 != NULL,
2015			    ("No l2 table after allocating one"));
2016
2017			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2018			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
2019			if (l3_m == NULL)
2020				panic("pmap_enter: l3 pte_m == NULL");
2021			if ((l3_m->flags & PG_ZERO) == 0)
2022				pmap_zero_page(l3_m);
2023
2024			l3_pa = VM_PAGE_TO_PHYS(l3_m);
2025			pmap_load_store(l2, l3_pa | L2_TABLE);
2026			PTE_SYNC(l2);
2027			l3 = pmap_l2_to_l3(l2, va);
2028		}
2029		pmap_invalidate_page(pmap, va);
2030	}
2031
2032	om = NULL;
2033	orig_l3 = pmap_load(l3);
2034	opa = orig_l3 & ~ATTR_MASK;
2035
2036	/*
2037	 * Is the specified virtual address already mapped?
2038	 */
2039	if (pmap_l3_valid(orig_l3)) {
2040		/*
2041		 * Wiring change, just update stats. We don't worry about
2042		 * wiring PT pages as they remain resident as long as there
2043		 * are valid mappings in them. Hence, if a user page is wired,
2044		 * the PT page will be also.
2045		 */
2046		if ((flags & PMAP_ENTER_WIRED) != 0 &&
2047		    (orig_l3 & ATTR_SW_WIRED) == 0)
2048			pmap->pm_stats.wired_count++;
2049		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
2050		    (orig_l3 & ATTR_SW_WIRED) != 0)
2051			pmap->pm_stats.wired_count--;
2052
2053		/*
2054		 * Remove the extra PT page reference.
2055		 */
2056		if (mpte != NULL) {
2057			mpte->wire_count--;
2058			KASSERT(mpte->wire_count > 0,
2059			    ("pmap_enter: missing reference to page table page,"
2060			     " va: 0x%lx", va));
2061		}
2062
2063		/*
2064		 * Has the physical page changed?
2065		 */
2066		if (opa == pa) {
2067			/*
2068			 * No, might be a protection or wiring change.
2069			 */
2070			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2071				new_l3 |= ATTR_SW_MANAGED;
2072				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
2073				    ATTR_AP(ATTR_AP_RW)) {
2074					vm_page_aflag_set(m, PGA_WRITEABLE);
2075				}
2076			}
2077			goto validate;
2078		}
2079
2080		/* Flush the cache, there might be uncommitted data in it */
2081		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2082			cpu_dcache_wb_range(va, L3_SIZE);
2083	} else {
2084		/*
2085		 * Increment the counters.
2086		 */
2087		if ((new_l3 & ATTR_SW_WIRED) != 0)
2088			pmap->pm_stats.wired_count++;
2089		pmap_resident_count_inc(pmap, 1);
2090	}
2091	/*
2092	 * Enter on the PV list if part of our managed memory.
2093	 */
2094	if ((m->oflags & VPO_UNMANAGED) == 0) {
2095		new_l3 |= ATTR_SW_MANAGED;
2096		pv = get_pv_entry(pmap, &lock);
2097		pv->pv_va = va;
2098		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2099		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2100		m->md.pv_gen++;
2101		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2102			vm_page_aflag_set(m, PGA_WRITEABLE);
2103	}
2104
2105	/*
2106	 * Update the L3 entry.
2107	 */
2108	if (orig_l3 != 0) {
2109validate:
2110		orig_l3 = pmap_load_store(l3, new_l3);
2111		PTE_SYNC(l3);
2112		opa = orig_l3 & ~ATTR_MASK;
2113
2114		if (opa != pa) {
2115			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2116				om = PHYS_TO_VM_PAGE(opa);
2117				if (pmap_page_dirty(orig_l3))
2118					vm_page_dirty(om);
2119				if ((orig_l3 & ATTR_AF) != 0)
2120					vm_page_aflag_set(om, PGA_REFERENCED);
2121				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2122				pmap_pvh_free(&om->md, pmap, va);
2123			}
2124		} else if (pmap_page_dirty(orig_l3)) {
2125			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
2126				vm_page_dirty(m);
2127		}
2128	} else {
2129		pmap_load_store(l3, new_l3);
2130		PTE_SYNC(l3);
2131	}
2132	pmap_invalidate_page(pmap, va);
2133	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2134	    cpu_icache_sync_range(va, PAGE_SIZE);
2135
2136	if (lock != NULL)
2137		rw_wunlock(lock);
2138	rw_runlock(&pvh_global_lock);
2139	PMAP_UNLOCK(pmap);
2140	return (KERN_SUCCESS);
2141}
2142
2143/*
2144 * Maps a sequence of resident pages belonging to the same object.
2145 * The sequence begins with the given page m_start.  This page is
2146 * mapped at the given virtual address start.  Each subsequent page is
2147 * mapped at a virtual address that is offset from start by the same
2148 * amount as the page is offset from m_start within the object.  The
2149 * last page in the sequence is the page with the largest offset from
2150 * m_start that can be mapped at a virtual address less than the given
2151 * virtual address end.  Not every virtual page between start and end
2152 * is mapped; only those for which a resident page exists with the
2153 * corresponding offset from m_start are mapped.
2154 */
2155void
2156pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2157    vm_page_t m_start, vm_prot_t prot)
2158{
2159	struct rwlock *lock;
2160	vm_offset_t va;
2161	vm_page_t m, mpte;
2162	vm_pindex_t diff, psize;
2163
2164	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2165
2166	psize = atop(end - start);
2167	mpte = NULL;
2168	m = m_start;
2169	lock = NULL;
2170	rw_rlock(&pvh_global_lock);
2171	PMAP_LOCK(pmap);
2172	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2173		va = start + ptoa(diff);
2174		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2175		m = TAILQ_NEXT(m, listq);
2176	}
2177	if (lock != NULL)
2178		rw_wunlock(lock);
2179	rw_runlock(&pvh_global_lock);
2180	PMAP_UNLOCK(pmap);
2181}
2182
2183/*
2184 * this code makes some *MAJOR* assumptions:
2185 * 1. Current pmap & pmap exists.
2186 * 2. Not wired.
2187 * 3. Read access.
2188 * 4. No page table pages.
2189 * but is *MUCH* faster than pmap_enter...
2190 */
2191
2192void
2193pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2194{
2195	struct rwlock *lock;
2196
2197	lock = NULL;
2198	rw_rlock(&pvh_global_lock);
2199	PMAP_LOCK(pmap);
2200	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2201	if (lock != NULL)
2202		rw_wunlock(lock);
2203	rw_runlock(&pvh_global_lock);
2204	PMAP_UNLOCK(pmap);
2205}
2206
2207static vm_page_t
2208pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2209    vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2210{
2211	struct spglist free;
2212	pd_entry_t *l2;
2213	pt_entry_t *l3;
2214	vm_paddr_t pa;
2215
2216	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2217	    (m->oflags & VPO_UNMANAGED) != 0,
2218	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2219	rw_assert(&pvh_global_lock, RA_LOCKED);
2220	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2221
2222	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
2223	/*
2224	 * In the case that a page table page is not
2225	 * resident, we are creating it here.
2226	 */
2227	if (va < VM_MAXUSER_ADDRESS) {
2228		vm_pindex_t l2pindex;
2229
2230		/*
2231		 * Calculate pagetable page index
2232		 */
2233		l2pindex = pmap_l2_pindex(va);
2234		if (mpte && (mpte->pindex == l2pindex)) {
2235			mpte->wire_count++;
2236		} else {
2237			/*
2238			 * Get the l2 entry
2239			 */
2240			l2 = pmap_l2(pmap, va);
2241
2242			/*
2243			 * If the page table page is mapped, we just increment
2244			 * the hold count, and activate it.  Otherwise, we
2245			 * attempt to allocate a page table page.  If this
2246			 * attempt fails, we don't retry.  Instead, we give up.
2247			 */
2248			if (l2 != NULL && pmap_load(l2) != 0) {
2249				mpte =
2250				    PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
2251				mpte->wire_count++;
2252			} else {
2253				/*
2254				 * Pass NULL instead of the PV list lock
2255				 * pointer, because we don't intend to sleep.
2256				 */
2257				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2258				if (mpte == NULL)
2259					return (mpte);
2260			}
2261		}
2262		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2263		l3 = &l3[pmap_l3_index(va)];
2264	} else {
2265		mpte = NULL;
2266		l3 = pmap_l3(kernel_pmap, va);
2267	}
2268	if (l3 == NULL)
2269		panic("pmap_enter_quick_locked: No l3");
2270	if (pmap_load(l3) != 0) {
2271		if (mpte != NULL) {
2272			mpte->wire_count--;
2273			mpte = NULL;
2274		}
2275		return (mpte);
2276	}
2277
2278	/*
2279	 * Enter on the PV list if part of our managed memory.
2280	 */
2281	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2282	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2283		if (mpte != NULL) {
2284			SLIST_INIT(&free);
2285			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2286				pmap_invalidate_page(pmap, va);
2287				pmap_free_zero_pages(&free);
2288			}
2289			mpte = NULL;
2290		}
2291		return (mpte);
2292	}
2293
2294	/*
2295	 * Increment counters
2296	 */
2297	pmap_resident_count_inc(pmap, 1);
2298
2299	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2300	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
2301
2302	/*
2303	 * Now validate mapping with RO protection
2304	 */
2305	if ((m->oflags & VPO_UNMANAGED) == 0)
2306		pa |= ATTR_SW_MANAGED;
2307	pmap_load_store(l3, pa);
2308	PTE_SYNC(l3);
2309	pmap_invalidate_page(pmap, va);
2310	return (mpte);
2311}
2312
2313/*
2314 * This code maps large physical mmap regions into the
2315 * processor address space.  Note that some shortcuts
2316 * are taken, but the code works.
2317 */
2318void
2319pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2320    vm_pindex_t pindex, vm_size_t size)
2321{
2322
2323	VM_OBJECT_ASSERT_WLOCKED(object);
2324	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2325	    ("pmap_object_init_pt: non-device object"));
2326}
2327
2328/*
2329 *	Clear the wired attribute from the mappings for the specified range of
2330 *	addresses in the given pmap.  Every valid mapping within that range
2331 *	must have the wired attribute set.  In contrast, invalid mappings
2332 *	cannot have the wired attribute set, so they are ignored.
2333 *
2334 *	The wired attribute of the page table entry is not a hardware feature,
2335 *	so there is no need to invalidate any TLB entries.
2336 */
2337void
2338pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2339{
2340	vm_offset_t va_next;
2341	pd_entry_t *l1, *l2;
2342	pt_entry_t *l3;
2343	boolean_t pv_lists_locked;
2344
2345	pv_lists_locked = FALSE;
2346	PMAP_LOCK(pmap);
2347	for (; sva < eva; sva = va_next) {
2348		l1 = pmap_l1(pmap, sva);
2349		if (pmap_load(l1) == 0) {
2350			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2351			if (va_next < sva)
2352				va_next = eva;
2353			continue;
2354		}
2355
2356		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2357		if (va_next < sva)
2358			va_next = eva;
2359
2360		l2 = pmap_l1_to_l2(l1, sva);
2361		if (pmap_load(l2) == 0)
2362			continue;
2363
2364		if (va_next > eva)
2365			va_next = eva;
2366		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2367		    sva += L3_SIZE) {
2368			if (pmap_load(l3) == 0)
2369				continue;
2370			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
2371				panic("pmap_unwire: l3 %#jx is missing "
2372				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
2373
2374			/*
2375			 * PG_W must be cleared atomically.  Although the pmap
2376			 * lock synchronizes access to PG_W, another processor
2377			 * could be setting PG_M and/or PG_A concurrently.
2378			 */
2379			atomic_clear_long(l3, ATTR_SW_WIRED);
2380			pmap->pm_stats.wired_count--;
2381		}
2382	}
2383	if (pv_lists_locked)
2384		rw_runlock(&pvh_global_lock);
2385	PMAP_UNLOCK(pmap);
2386}
2387
2388/*
2389 *	Copy the range specified by src_addr/len
2390 *	from the source map to the range dst_addr/len
2391 *	in the destination map.
2392 *
2393 *	This routine is only advisory and need not do anything.
2394 */
2395
2396void
2397pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2398    vm_offset_t src_addr)
2399{
2400}
2401
2402/*
2403 *	pmap_zero_page zeros the specified hardware page by mapping
2404 *	the page into KVM and using bzero to clear its contents.
2405 */
2406void
2407pmap_zero_page(vm_page_t m)
2408{
2409	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2410
2411	pagezero((void *)va);
2412}
2413
2414/*
2415 *	pmap_zero_page_area zeros the specified hardware page by mapping
2416 *	the page into KVM and using bzero to clear its contents.
2417 *
2418 *	off and size may not cover an area beyond a single hardware page.
2419 */
2420void
2421pmap_zero_page_area(vm_page_t m, int off, int size)
2422{
2423	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2424
2425	if (off == 0 && size == PAGE_SIZE)
2426		pagezero((void *)va);
2427	else
2428		bzero((char *)va + off, size);
2429}
2430
2431/*
2432 *	pmap_zero_page_idle zeros the specified hardware page by mapping
2433 *	the page into KVM and using bzero to clear its contents.  This
2434 *	is intended to be called from the vm_pagezero process only and
2435 *	outside of Giant.
2436 */
2437void
2438pmap_zero_page_idle(vm_page_t m)
2439{
2440	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2441
2442	pagezero((void *)va);
2443}
2444
2445/*
2446 *	pmap_copy_page copies the specified (machine independent)
2447 *	page by mapping the page into virtual memory and using
2448 *	bcopy to copy the page, one machine dependent page at a
2449 *	time.
2450 */
2451void
2452pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2453{
2454	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2455	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2456
2457	pagecopy((void *)src, (void *)dst);
2458}
2459
2460int unmapped_buf_allowed = 1;
2461
2462void
2463pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2464    vm_offset_t b_offset, int xfersize)
2465{
2466	void *a_cp, *b_cp;
2467	vm_page_t m_a, m_b;
2468	vm_paddr_t p_a, p_b;
2469	vm_offset_t a_pg_offset, b_pg_offset;
2470	int cnt;
2471
2472	while (xfersize > 0) {
2473		a_pg_offset = a_offset & PAGE_MASK;
2474		m_a = ma[a_offset >> PAGE_SHIFT];
2475		p_a = m_a->phys_addr;
2476		b_pg_offset = b_offset & PAGE_MASK;
2477		m_b = mb[b_offset >> PAGE_SHIFT];
2478		p_b = m_b->phys_addr;
2479		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2480		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2481		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2482			panic("!DMAP a %lx", p_a);
2483		} else {
2484			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2485		}
2486		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2487			panic("!DMAP b %lx", p_b);
2488		} else {
2489			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2490		}
2491		bcopy(a_cp, b_cp, cnt);
2492		a_offset += cnt;
2493		b_offset += cnt;
2494		xfersize -= cnt;
2495	}
2496}
2497
2498vm_offset_t
2499pmap_quick_enter_page(vm_page_t m)
2500{
2501
2502	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
2503}
2504
2505void
2506pmap_quick_remove_page(vm_offset_t addr)
2507{
2508}
2509
2510/*
2511 * Returns true if the pmap's pv is one of the first
2512 * 16 pvs linked to from this page.  This count may
2513 * be changed upwards or downwards in the future; it
2514 * is only necessary that true be returned for a small
2515 * subset of pmaps for proper page aging.
2516 */
2517boolean_t
2518pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2519{
2520	struct rwlock *lock;
2521	pv_entry_t pv;
2522	int loops = 0;
2523	boolean_t rv;
2524
2525	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2526	    ("pmap_page_exists_quick: page %p is not managed", m));
2527	rv = FALSE;
2528	rw_rlock(&pvh_global_lock);
2529	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2530	rw_rlock(lock);
2531	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2532		if (PV_PMAP(pv) == pmap) {
2533			rv = TRUE;
2534			break;
2535		}
2536		loops++;
2537		if (loops >= 16)
2538			break;
2539	}
2540	rw_runlock(lock);
2541	rw_runlock(&pvh_global_lock);
2542	return (rv);
2543}
2544
2545/*
2546 *	pmap_page_wired_mappings:
2547 *
2548 *	Return the number of managed mappings to the given physical page
2549 *	that are wired.
2550 */
2551int
2552pmap_page_wired_mappings(vm_page_t m)
2553{
2554	struct rwlock *lock;
2555	pmap_t pmap;
2556	pt_entry_t *l3;
2557	pv_entry_t pv;
2558	int count, md_gen;
2559
2560	if ((m->oflags & VPO_UNMANAGED) != 0)
2561		return (0);
2562	rw_rlock(&pvh_global_lock);
2563	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2564	rw_rlock(lock);
2565restart:
2566	count = 0;
2567	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2568		pmap = PV_PMAP(pv);
2569		if (!PMAP_TRYLOCK(pmap)) {
2570			md_gen = m->md.pv_gen;
2571			rw_runlock(lock);
2572			PMAP_LOCK(pmap);
2573			rw_rlock(lock);
2574			if (md_gen != m->md.pv_gen) {
2575				PMAP_UNLOCK(pmap);
2576				goto restart;
2577			}
2578		}
2579		l3 = pmap_l3(pmap, pv->pv_va);
2580		if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0)
2581			count++;
2582		PMAP_UNLOCK(pmap);
2583	}
2584	rw_runlock(lock);
2585	rw_runlock(&pvh_global_lock);
2586	return (count);
2587}
2588
2589/*
2590 * Destroy all managed, non-wired mappings in the given user-space
2591 * pmap.  This pmap cannot be active on any processor besides the
2592 * caller.
2593 *
2594 * This function cannot be applied to the kernel pmap.  Moreover, it
2595 * is not intended for general use.  It is only to be used during
2596 * process termination.  Consequently, it can be implemented in ways
2597 * that make it faster than pmap_remove().  First, it can more quickly
2598 * destroy mappings by iterating over the pmap's collection of PV
2599 * entries, rather than searching the page table.  Second, it doesn't
2600 * have to test and clear the page table entries atomically, because
2601 * no processor is currently accessing the user address space.  In
2602 * particular, a page table entry's dirty bit won't change state once
2603 * this function starts.
2604 */
2605void
2606pmap_remove_pages(pmap_t pmap)
2607{
2608	pd_entry_t ptepde, *l2;
2609	pt_entry_t *l3, tl3;
2610	struct spglist free;
2611	vm_page_t m;
2612	pv_entry_t pv;
2613	struct pv_chunk *pc, *npc;
2614	struct rwlock *lock;
2615	int64_t bit;
2616	uint64_t inuse, bitmask;
2617	int allfree, field, freed, idx;
2618	vm_paddr_t pa;
2619
2620	lock = NULL;
2621
2622	SLIST_INIT(&free);
2623	rw_rlock(&pvh_global_lock);
2624	PMAP_LOCK(pmap);
2625	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2626		allfree = 1;
2627		freed = 0;
2628		for (field = 0; field < _NPCM; field++) {
2629			inuse = ~pc->pc_map[field] & pc_freemask[field];
2630			while (inuse != 0) {
2631				bit = ffsl(inuse) - 1;
2632				bitmask = 1UL << bit;
2633				idx = field * 64 + bit;
2634				pv = &pc->pc_pventry[idx];
2635				inuse &= ~bitmask;
2636
2637				l2 = pmap_l2(pmap, pv->pv_va);
2638				ptepde = pmap_load(l2);
2639				l3 = pmap_l2_to_l3(l2, pv->pv_va);
2640				tl3 = pmap_load(l3);
2641
2642/*
2643 * We cannot remove wired pages from a process' mapping at this time
2644 */
2645				if (tl3 & ATTR_SW_WIRED) {
2646					allfree = 0;
2647					continue;
2648				}
2649
2650				pa = tl3 & ~ATTR_MASK;
2651
2652				m = PHYS_TO_VM_PAGE(pa);
2653				KASSERT(m->phys_addr == pa,
2654				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
2655				    m, (uintmax_t)m->phys_addr,
2656				    (uintmax_t)tl3));
2657
2658				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
2659				    m < &vm_page_array[vm_page_array_size],
2660				    ("pmap_remove_pages: bad l3 %#jx",
2661				    (uintmax_t)tl3));
2662
2663				if (pmap_is_current(pmap) &&
2664				    pmap_l3_valid_cacheable(pmap_load(l3)))
2665					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2666				pmap_load_clear(l3);
2667				PTE_SYNC(l3);
2668				pmap_invalidate_page(pmap, pv->pv_va);
2669
2670				/*
2671				 * Update the vm_page_t clean/reference bits.
2672				 */
2673				if ((tl3 & ATTR_AP_RW_BIT) ==
2674				    ATTR_AP(ATTR_AP_RW))
2675					vm_page_dirty(m);
2676
2677				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
2678
2679				/* Mark free */
2680				pc->pc_map[field] |= bitmask;
2681
2682				pmap_resident_count_dec(pmap, 1);
2683				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2684				m->md.pv_gen++;
2685
2686				pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
2687				freed++;
2688			}
2689		}
2690		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
2691		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
2692		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
2693		if (allfree) {
2694			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2695			free_pv_chunk(pc);
2696		}
2697	}
2698	pmap_invalidate_all(pmap);
2699	if (lock != NULL)
2700		rw_wunlock(lock);
2701	rw_runlock(&pvh_global_lock);
2702	PMAP_UNLOCK(pmap);
2703	pmap_free_zero_pages(&free);
2704}
2705
2706/*
2707 * This is used to check if a page has been accessed or modified. As we
2708 * don't have a bit to see if it has been modified we have to assume it
2709 * has been if the page is read/write.
2710 */
2711static boolean_t
2712pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
2713{
2714	struct rwlock *lock;
2715	pv_entry_t pv;
2716	pt_entry_t *l3, mask, value;
2717	pmap_t pmap;
2718	int md_gen;
2719	boolean_t rv;
2720
2721	rv = FALSE;
2722	rw_rlock(&pvh_global_lock);
2723	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2724	rw_rlock(lock);
2725restart:
2726	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2727		pmap = PV_PMAP(pv);
2728		if (!PMAP_TRYLOCK(pmap)) {
2729			md_gen = m->md.pv_gen;
2730			rw_runlock(lock);
2731			PMAP_LOCK(pmap);
2732			rw_rlock(lock);
2733			if (md_gen != m->md.pv_gen) {
2734				PMAP_UNLOCK(pmap);
2735				goto restart;
2736			}
2737		}
2738		l3 = pmap_l3(pmap, pv->pv_va);
2739		mask = 0;
2740		value = 0;
2741		if (modified) {
2742			mask |= ATTR_AP_RW_BIT;
2743			value |= ATTR_AP(ATTR_AP_RW);
2744		}
2745		if (accessed) {
2746			mask |= ATTR_AF | ATTR_DESCR_MASK;
2747			value |= ATTR_AF | L3_PAGE;
2748		}
2749		rv = (pmap_load(l3) & mask) == value;
2750		PMAP_UNLOCK(pmap);
2751		if (rv)
2752			goto out;
2753	}
2754out:
2755	rw_runlock(lock);
2756	rw_runlock(&pvh_global_lock);
2757	return (rv);
2758}
2759
2760/*
2761 *	pmap_is_modified:
2762 *
2763 *	Return whether or not the specified physical page was modified
2764 *	in any physical maps.
2765 */
2766boolean_t
2767pmap_is_modified(vm_page_t m)
2768{
2769
2770	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2771	    ("pmap_is_modified: page %p is not managed", m));
2772
2773	/*
2774	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2775	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2776	 * is clear, no PTEs can have PG_M set.
2777	 */
2778	VM_OBJECT_ASSERT_WLOCKED(m->object);
2779	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2780		return (FALSE);
2781	return (pmap_page_test_mappings(m, FALSE, TRUE));
2782}
2783
2784/*
2785 *	pmap_is_prefaultable:
2786 *
2787 *	Return whether or not the specified virtual address is eligible
2788 *	for prefault.
2789 */
2790boolean_t
2791pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2792{
2793	pt_entry_t *l3;
2794	boolean_t rv;
2795
2796	rv = FALSE;
2797	PMAP_LOCK(pmap);
2798	l3 = pmap_l3(pmap, addr);
2799	if (l3 != NULL && pmap_load(l3) != 0) {
2800		rv = TRUE;
2801	}
2802	PMAP_UNLOCK(pmap);
2803	return (rv);
2804}
2805
2806/*
2807 *	pmap_is_referenced:
2808 *
2809 *	Return whether or not the specified physical page was referenced
2810 *	in any physical maps.
2811 */
2812boolean_t
2813pmap_is_referenced(vm_page_t m)
2814{
2815
2816	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2817	    ("pmap_is_referenced: page %p is not managed", m));
2818	return (pmap_page_test_mappings(m, TRUE, FALSE));
2819}
2820
2821/*
2822 * Clear the write and modified bits in each of the given page's mappings.
2823 */
2824void
2825pmap_remove_write(vm_page_t m)
2826{
2827	pmap_t pmap;
2828	struct rwlock *lock;
2829	pv_entry_t pv;
2830	pt_entry_t *l3, oldl3;
2831	int md_gen;
2832
2833	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2834	    ("pmap_remove_write: page %p is not managed", m));
2835
2836	/*
2837	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2838	 * set by another thread while the object is locked.  Thus,
2839	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2840	 */
2841	VM_OBJECT_ASSERT_WLOCKED(m->object);
2842	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2843		return;
2844	rw_rlock(&pvh_global_lock);
2845	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2846retry_pv_loop:
2847	rw_wlock(lock);
2848	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2849		pmap = PV_PMAP(pv);
2850		if (!PMAP_TRYLOCK(pmap)) {
2851			md_gen = m->md.pv_gen;
2852			rw_wunlock(lock);
2853			PMAP_LOCK(pmap);
2854			rw_wlock(lock);
2855			if (md_gen != m->md.pv_gen) {
2856				PMAP_UNLOCK(pmap);
2857				rw_wunlock(lock);
2858				goto retry_pv_loop;
2859			}
2860		}
2861		l3 = pmap_l3(pmap, pv->pv_va);
2862retry:
2863		oldl3 = pmap_load(l3);
2864		if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
2865			if (!atomic_cmpset_long(l3, oldl3,
2866			    oldl3 | ATTR_AP(ATTR_AP_RO)))
2867				goto retry;
2868			if ((oldl3 & ATTR_AF) != 0)
2869				vm_page_dirty(m);
2870			pmap_invalidate_page(pmap, pv->pv_va);
2871		}
2872		PMAP_UNLOCK(pmap);
2873	}
2874	rw_wunlock(lock);
2875	vm_page_aflag_clear(m, PGA_WRITEABLE);
2876	rw_runlock(&pvh_global_lock);
2877}
2878
2879static __inline boolean_t
2880safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
2881{
2882
2883	return (FALSE);
2884}
2885
2886#define	PMAP_TS_REFERENCED_MAX	5
2887
2888/*
2889 *	pmap_ts_referenced:
2890 *
2891 *	Return a count of reference bits for a page, clearing those bits.
2892 *	It is not necessary for every reference bit to be cleared, but it
2893 *	is necessary that 0 only be returned when there are truly no
2894 *	reference bits set.
2895 *
2896 *	XXX: The exact number of bits to check and clear is a matter that
2897 *	should be tested and standardized at some point in the future for
2898 *	optimal aging of shared pages.
2899 */
2900int
2901pmap_ts_referenced(vm_page_t m)
2902{
2903	pv_entry_t pv, pvf;
2904	pmap_t pmap;
2905	struct rwlock *lock;
2906	pd_entry_t *l2p, l2;
2907	pt_entry_t *l3;
2908	vm_paddr_t pa;
2909	int cleared, md_gen, not_cleared;
2910	struct spglist free;
2911
2912	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2913	    ("pmap_ts_referenced: page %p is not managed", m));
2914	SLIST_INIT(&free);
2915	cleared = 0;
2916	pa = VM_PAGE_TO_PHYS(m);
2917	lock = PHYS_TO_PV_LIST_LOCK(pa);
2918	rw_rlock(&pvh_global_lock);
2919	rw_wlock(lock);
2920retry:
2921	not_cleared = 0;
2922	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
2923		goto out;
2924	pv = pvf;
2925	do {
2926		if (pvf == NULL)
2927			pvf = pv;
2928		pmap = PV_PMAP(pv);
2929		if (!PMAP_TRYLOCK(pmap)) {
2930			md_gen = m->md.pv_gen;
2931			rw_wunlock(lock);
2932			PMAP_LOCK(pmap);
2933			rw_wlock(lock);
2934			if (md_gen != m->md.pv_gen) {
2935				PMAP_UNLOCK(pmap);
2936				goto retry;
2937			}
2938		}
2939		l2p = pmap_l2(pmap, pv->pv_va);
2940		KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found"));
2941		l2 = pmap_load(l2p);
2942		KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE,
2943		    ("pmap_ts_referenced: found an invalid l2 table"));
2944		l3 = pmap_l2_to_l3(l2p, pv->pv_va);
2945		if ((pmap_load(l3) & ATTR_AF) != 0) {
2946			if (safe_to_clear_referenced(pmap, pmap_load(l3))) {
2947				/*
2948				 * TODO: We don't handle the access flag
2949				 * at all. We need to be able to set it in
2950				 * the exception handler.
2951				 */
2952				panic("ARM64TODO: safe_to_clear_referenced\n");
2953			} else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) {
2954				/*
2955				 * Wired pages cannot be paged out so
2956				 * doing accessed bit emulation for
2957				 * them is wasted effort. We do the
2958				 * hard work for unwired pages only.
2959				 */
2960				pmap_remove_l3(pmap, l3, pv->pv_va, l2,
2961				    &free, &lock);
2962				pmap_invalidate_page(pmap, pv->pv_va);
2963				cleared++;
2964				if (pvf == pv)
2965					pvf = NULL;
2966				pv = NULL;
2967				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
2968				    ("inconsistent pv lock %p %p for page %p",
2969				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
2970			} else
2971				not_cleared++;
2972		}
2973		PMAP_UNLOCK(pmap);
2974		/* Rotate the PV list if it has more than one entry. */
2975		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
2976			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2977			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2978			m->md.pv_gen++;
2979		}
2980	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
2981	    not_cleared < PMAP_TS_REFERENCED_MAX);
2982out:
2983	rw_wunlock(lock);
2984	rw_runlock(&pvh_global_lock);
2985	pmap_free_zero_pages(&free);
2986	return (cleared + not_cleared);
2987}
2988
2989/*
2990 *	Apply the given advice to the specified range of addresses within the
2991 *	given pmap.  Depending on the advice, clear the referenced and/or
2992 *	modified flags in each mapping and set the mapped page's dirty field.
2993 */
2994void
2995pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2996{
2997}
2998
2999/*
3000 *	Clear the modify bits on the specified physical page.
3001 */
3002void
3003pmap_clear_modify(vm_page_t m)
3004{
3005
3006	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3007	    ("pmap_clear_modify: page %p is not managed", m));
3008	VM_OBJECT_ASSERT_WLOCKED(m->object);
3009	KASSERT(!vm_page_xbusied(m),
3010	    ("pmap_clear_modify: page %p is exclusive busied", m));
3011
3012	/*
3013	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
3014	 * If the object containing the page is locked and the page is not
3015	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
3016	 */
3017	if ((m->aflags & PGA_WRITEABLE) == 0)
3018		return;
3019
3020	/* ARM64TODO: We lack support for tracking if a page is modified */
3021}
3022
3023void *
3024pmap_mapbios(vm_paddr_t pa, vm_size_t size)
3025{
3026
3027        return ((void *)PHYS_TO_DMAP(pa));
3028}
3029
3030void
3031pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
3032{
3033}
3034
3035/*
3036 * Sets the memory attribute for the specified page.
3037 */
3038void
3039pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3040{
3041
3042	m->md.pv_memattr = ma;
3043
3044	/*
3045	 * ARM64TODO: Implement the below (from the amd64 pmap)
3046	 * If "m" is a normal page, update its direct mapping.  This update
3047	 * can be relied upon to perform any cache operations that are
3048	 * required for data coherence.
3049	 */
3050	if ((m->flags & PG_FICTITIOUS) == 0 &&
3051	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
3052		panic("ARM64TODO: pmap_page_set_memattr");
3053}
3054
3055/*
3056 * perform the pmap work for mincore
3057 */
3058int
3059pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3060{
3061	pd_entry_t *l1p, l1;
3062	pd_entry_t *l2p, l2;
3063	pt_entry_t *l3p, l3;
3064	vm_paddr_t pa;
3065	bool managed;
3066	int val;
3067
3068	PMAP_LOCK(pmap);
3069retry:
3070	pa = 0;
3071	val = 0;
3072	managed = false;
3073
3074	l1p = pmap_l1(pmap, addr);
3075	if (l1p == NULL) /* No l1 */
3076		goto done;
3077
3078	l1 = pmap_load(l1p);
3079	if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
3080		goto done;
3081
3082	if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
3083		pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
3084		managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3085		val = MINCORE_SUPER | MINCORE_INCORE;
3086		if (pmap_page_dirty(l1))
3087			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3088		if ((l1 & ATTR_AF) == ATTR_AF)
3089			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3090		goto done;
3091	}
3092
3093	l2p = pmap_l1_to_l2(l1p, addr);
3094	if (l2p == NULL) /* No l2 */
3095		goto done;
3096
3097	l2 = pmap_load(l2p);
3098	if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
3099		goto done;
3100
3101	if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
3102		pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
3103		managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3104		val = MINCORE_SUPER | MINCORE_INCORE;
3105		if (pmap_page_dirty(l2))
3106			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3107		if ((l2 & ATTR_AF) == ATTR_AF)
3108			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3109		goto done;
3110	}
3111
3112	l3p = pmap_l2_to_l3(l2p, addr);
3113	if (l3p == NULL) /* No l3 */
3114		goto done;
3115
3116	l3 = pmap_load(l2p);
3117	if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
3118		goto done;
3119
3120	if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
3121		pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
3122		managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3123		val = MINCORE_INCORE;
3124		if (pmap_page_dirty(l3))
3125			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3126		if ((l3 & ATTR_AF) == ATTR_AF)
3127			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3128	}
3129
3130done:
3131	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3132	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
3133		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3134		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3135			goto retry;
3136	} else
3137		PA_UNLOCK_COND(*locked_pa);
3138	PMAP_UNLOCK(pmap);
3139
3140	return (val);
3141}
3142
3143void
3144pmap_activate(struct thread *td)
3145{
3146	pmap_t	pmap;
3147
3148	critical_enter();
3149	pmap = vmspace_pmap(td->td_proc->p_vmspace);
3150	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
3151	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr));
3152	pmap_invalidate_all(pmap);
3153	critical_exit();
3154}
3155
3156void
3157pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
3158{
3159
3160	if (va >= VM_MIN_KERNEL_ADDRESS) {
3161		cpu_icache_sync_range(va, sz);
3162	} else {
3163		u_int len, offset;
3164		vm_paddr_t pa;
3165
3166		/* Find the length of data in this page to flush */
3167		offset = va & PAGE_MASK;
3168		len = imin(PAGE_SIZE - offset, sz);
3169
3170		while (sz != 0) {
3171			/* Extract the physical address & find it in the DMAP */
3172			pa = pmap_extract(pmap, va);
3173			if (pa != 0)
3174				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
3175
3176			/* Move to the next page */
3177			sz -= len;
3178			va += len;
3179			/* Set the length for the next iteration */
3180			len = imin(PAGE_SIZE, sz);
3181		}
3182	}
3183}
3184
3185/*
3186 *	Increase the starting virtual address of the given mapping if a
3187 *	different alignment might result in more superpage mappings.
3188 */
3189void
3190pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3191    vm_offset_t *addr, vm_size_t size)
3192{
3193}
3194
3195/**
3196 * Get the kernel virtual address of a set of physical pages. If there are
3197 * physical addresses not covered by the DMAP perform a transient mapping
3198 * that will be removed when calling pmap_unmap_io_transient.
3199 *
3200 * \param page        The pages the caller wishes to obtain the virtual
3201 *                    address on the kernel memory map.
3202 * \param vaddr       On return contains the kernel virtual memory address
3203 *                    of the pages passed in the page parameter.
3204 * \param count       Number of pages passed in.
3205 * \param can_fault   TRUE if the thread using the mapped pages can take
3206 *                    page faults, FALSE otherwise.
3207 *
3208 * \returns TRUE if the caller must call pmap_unmap_io_transient when
3209 *          finished or FALSE otherwise.
3210 *
3211 */
3212boolean_t
3213pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3214    boolean_t can_fault)
3215{
3216	vm_paddr_t paddr;
3217	boolean_t needs_mapping;
3218	int error, i;
3219
3220	/*
3221	 * Allocate any KVA space that we need, this is done in a separate
3222	 * loop to prevent calling vmem_alloc while pinned.
3223	 */
3224	needs_mapping = FALSE;
3225	for (i = 0; i < count; i++) {
3226		paddr = VM_PAGE_TO_PHYS(page[i]);
3227		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
3228			error = vmem_alloc(kernel_arena, PAGE_SIZE,
3229			    M_BESTFIT | M_WAITOK, &vaddr[i]);
3230			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
3231			needs_mapping = TRUE;
3232		} else {
3233			vaddr[i] = PHYS_TO_DMAP(paddr);
3234		}
3235	}
3236
3237	/* Exit early if everything is covered by the DMAP */
3238	if (!needs_mapping)
3239		return (FALSE);
3240
3241	if (!can_fault)
3242		sched_pin();
3243	for (i = 0; i < count; i++) {
3244		paddr = VM_PAGE_TO_PHYS(page[i]);
3245		if (paddr >= DMAP_MAX_PHYSADDR) {
3246			panic(
3247			   "pmap_map_io_transient: TODO: Map out of DMAP data");
3248		}
3249	}
3250
3251	return (needs_mapping);
3252}
3253
3254void
3255pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3256    boolean_t can_fault)
3257{
3258	vm_paddr_t paddr;
3259	int i;
3260
3261	if (!can_fault)
3262		sched_unpin();
3263	for (i = 0; i < count; i++) {
3264		paddr = VM_PAGE_TO_PHYS(page[i]);
3265		if (paddr >= DMAP_MAX_PHYSADDR) {
3266			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
3267		}
3268	}
3269}
3270