pmap7.c revision 1.61
1/*	$OpenBSD: pmap7.c,v 1.61 2021/03/25 04:12:00 jsg Exp $	*/
2/*	$NetBSD: pmap.c,v 1.147 2004/01/18 13:03:50 scw Exp $	*/
3
4/*
5 * Copyright 2003 Wasabi Systems, Inc.
6 * All rights reserved.
7 *
8 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed for the NetBSD Project by
21 *      Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 *    or promote products derived from this software without specific prior
24 *    written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copyright (c) 2002-2003 Wasabi Systems, Inc.
41 * Copyright (c) 2001 Richard Earnshaw
42 * Copyright (c) 2001-2002 Christopher Gilbert
43 * All rights reserved.
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. The name of the company nor the name of the author may be used to
51 *    endorse or promote products derived from this software without specific
52 *    prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
55 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
56 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
58 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67/*-
68 * Copyright (c) 1999 The NetBSD Foundation, Inc.
69 * All rights reserved.
70 *
71 * This code is derived from software contributed to The NetBSD Foundation
72 * by Charles M. Hannum.
73 *
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted provided that the following conditions
76 * are met:
77 * 1. Redistributions of source code must retain the above copyright
78 *    notice, this list of conditions and the following disclaimer.
79 * 2. Redistributions in binary form must reproduce the above copyright
80 *    notice, this list of conditions and the following disclaimer in the
81 *    documentation and/or other materials provided with the distribution.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
84 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
86 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
93 * POSSIBILITY OF SUCH DAMAGE.
94 */
95
96/*
97 * Copyright (c) 1994-1998 Mark Brinicombe.
98 * Copyright (c) 1994 Brini.
99 * All rights reserved.
100 *
101 * This code is derived from software written for Brini by Mark Brinicombe
102 *
103 * Redistribution and use in source and binary forms, with or without
104 * modification, are permitted provided that the following conditions
105 * are met:
106 * 1. Redistributions of source code must retain the above copyright
107 *    notice, this list of conditions and the following disclaimer.
108 * 2. Redistributions in binary form must reproduce the above copyright
109 *    notice, this list of conditions and the following disclaimer in the
110 *    documentation and/or other materials provided with the distribution.
111 * 3. All advertising materials mentioning features or use of this software
112 *    must display the following acknowledgement:
113 *	This product includes software developed by Mark Brinicombe.
114 * 4. The name of the author may not be used to endorse or promote products
115 *    derived from this software without specific prior written permission.
116 *
117 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
118 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
119 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
120 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
121 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
122 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
123 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
124 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
125 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
126 *
127 * RiscBSD kernel project
128 *
129 * pmap.c
130 *
131 * Machine dependant vm stuff
132 *
133 * Created      : 20/09/94
134 */
135
136/*
137 * Performance improvements, UVM changes, overhauls and part-rewrites
138 * were contributed by Neil A. Carson <neil@causality.com>.
139 */
140
141/*
142 * Overhauled again to speedup the pmap, use MMU Domains so that L1 tables
143 * can be shared, and re-work the KVM layout, by Steve Woodford of Wasabi
144 * Systems, Inc.
145 *
146 * There are still a few things outstanding at this time:
147 *
148 *   - There are some unresolved issues for MP systems:
149 *
150 *     o The L1 metadata needs a lock, or more specifically, some places
151 *       need to acquire an exclusive lock when modifying L1 translation
152 *       table entries.
153 *
154 *     o When one cpu modifies an L1 entry, and that L1 table is also
155 *       being used by another cpu, then the latter will need to be told
156 *       that a tlb invalidation may be necessary. (But only if the old
157 *       domain number in the L1 entry being over-written is currently
158 *       the active domain on that cpu). I guess there are lots more tlb
159 *       shootdown issues too...
160 *
161 *     o If the vector_page is at 0x00000000 instead of 0xffff0000, then
162 *       MP systems will lose big-time because of the MMU domain hack.
163 *       The only way this can be solved (apart from moving the vector
164 *       page to 0xffff0000) is to reserve the first 1MB of user address
165 *       space for kernel use only. This would require re-linking all
166 *       applications so that the text section starts above this 1MB
167 *       boundary.
168 *
169 *     o Tracking which VM space is resident in the cache/tlb has not yet
170 *       been implemented for MP systems.
171 *
172 *     o Finally, there is a pathological condition where two cpus running
173 *       two separate processes (not procs) which happen to share an L1
174 *       can get into a fight over one or more L1 entries. This will result
175 *       in a significant slow-down if both processes are in tight loops.
176 */
177
178#include <sys/param.h>
179#include <sys/systm.h>
180#include <sys/proc.h>
181#include <sys/malloc.h>
182#include <sys/user.h>
183#include <sys/pool.h>
184
185#include <uvm/uvm.h>
186
187#include <machine/pmap.h>
188#include <machine/pcb.h>
189#include <machine/param.h>
190#include <arm/cpufunc.h>
191
192//#define PMAP_DEBUG
193#ifdef PMAP_DEBUG
194
195/*
196 * for switching to potentially finer grained debugging
197 */
198#define	PDB_FOLLOW	0x0001
199#define	PDB_INIT	0x0002
200#define	PDB_ENTER	0x0004
201#define	PDB_REMOVE	0x0008
202#define	PDB_CREATE	0x0010
203#define	PDB_PTPAGE	0x0020
204#define	PDB_GROWKERN	0x0040
205#define	PDB_BITS	0x0080
206#define	PDB_COLLECT	0x0100
207#define	PDB_PROTECT	0x0200
208#define	PDB_MAP_L1	0x0400
209#define	PDB_BOOTSTRAP	0x1000
210#define	PDB_PARANOIA	0x2000
211#define	PDB_WIRING	0x4000
212#define	PDB_PVDUMP	0x8000
213#define	PDB_KENTER	0x20000
214#define	PDB_KREMOVE	0x40000
215
216#define pmapdebug (cold ? 0 : 0xffffffff)
217#define	NPDEBUG(_lev_,_stat_) \
218	if (pmapdebug & (_lev_)) \
219        	((_stat_))
220
221#else	/* PMAP_DEBUG */
222#define NPDEBUG(_lev_,_stat_) /* Nothing */
223#endif	/* PMAP_DEBUG */
224
225/*
226 * pmap_kernel() points here
227 */
228struct pmap     kernel_pmap_store;
229
230/*
231 * Pool and cache that pmap structures are allocated from.
232 * We use a cache to avoid clearing the pm_l2[] array (1KB)
233 * in pmap_create().
234 */
235struct pool pmap_pmap_pool;
236
237/*
238 * Pool of PV structures
239 */
240struct pool pmap_pv_pool;
241void *pmap_bootstrap_pv_page_alloc(struct pool *, int, int *);
242void pmap_bootstrap_pv_page_free(struct pool *, void *);
243struct pool_allocator pmap_bootstrap_pv_allocator = {
244	pmap_bootstrap_pv_page_alloc, pmap_bootstrap_pv_page_free
245};
246
247/*
248 * Pool and cache of l2_dtable structures.
249 * We use a cache to avoid clearing the structures when they're
250 * allocated. (196 bytes)
251 */
252struct pool pmap_l2dtable_pool;
253vaddr_t pmap_kernel_l2dtable_kva;
254
255/*
256 * Pool and cache of L2 page descriptors.
257 * We use a cache to avoid clearing the descriptor table
258 * when they're allocated. (1KB)
259 */
260struct pool pmap_l2ptp_pool;
261vaddr_t pmap_kernel_l2ptp_kva;
262paddr_t pmap_kernel_l2ptp_phys;
263
264/*
265 * pmap copy/zero page, wb page, and mem(5) hook point
266 */
267pt_entry_t *csrc_pte, *cdst_pte, *cwb_pte;
268vaddr_t csrcp, cdstp, cwbp;
269char *memhook;
270extern caddr_t msgbufaddr;
271
272/*
273 * Flag to indicate if pmap_init() has done its thing
274 */
275int pmap_initialized;
276
277/*
278 * Metadata for L1 translation tables.
279 */
280struct l1_ttable {
281	/* Entry on the L1 Table list */
282	TAILQ_ENTRY(l1_ttable) l1_link;
283
284	/* Physical address of this L1 page table */
285	paddr_t l1_physaddr;
286
287	/* KVA of this L1 page table */
288	pd_entry_t *l1_kva;
289};
290
291/*
292 * Convert a virtual address into its L1 table index. That is, the
293 * index used to locate the L2 descriptor table pointer in an L1 table.
294 * This is basically used to index l1->l1_kva[].
295 *
296 * Each L2 descriptor table represents 1MB of VA space.
297 */
298#define	L1_IDX(va)		(((vaddr_t)(va)) >> L1_S_SHIFT)
299
300/*
301 * Set if the PXN bit is supported.
302 */
303pd_entry_t l1_c_pxn;
304
305/*
306 * A list of all L1 tables
307 */
308TAILQ_HEAD(, l1_ttable) l1_list;
309
310/*
311 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
312 *
313 * This is normally 16MB worth L2 page descriptors for any given pmap.
314 * Reference counts are maintained for L2 descriptors so they can be
315 * freed when empty.
316 */
317struct l2_dtable {
318	/* The number of L2 page descriptors allocated to this l2_dtable */
319	u_int l2_occupancy;
320
321	/* List of L2 page descriptors */
322	struct l2_bucket {
323		pt_entry_t *l2b_kva;	/* KVA of L2 Descriptor Table */
324		paddr_t l2b_phys;	/* Physical address of same */
325		u_short l2b_l1idx;	/* This L2 table's L1 index */
326		u_short l2b_occupancy;	/* How many active descriptors */
327	} l2_bucket[L2_BUCKET_SIZE];
328};
329
330/*
331 * Given an L1 table index, calculate the corresponding l2_dtable index
332 * and bucket index within the l2_dtable.
333 */
334#define	L2_IDX(l1idx)		(((l1idx) >> L2_BUCKET_LOG2) & \
335				 (L2_SIZE - 1))
336#define	L2_BUCKET(l1idx)	((l1idx) & (L2_BUCKET_SIZE - 1))
337
338/*
339 * Given a virtual address, this macro returns the
340 * virtual address required to drop into the next L2 bucket.
341 */
342#define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
343
344/*
345 * L2 allocation.
346 */
347#define	pmap_alloc_l2_dtable()		\
348	    pool_get(&pmap_l2dtable_pool, PR_NOWAIT|PR_ZERO)
349#define	pmap_free_l2_dtable(l2)		\
350	    pool_put(&pmap_l2dtable_pool, (l2))
351
352/*
353 * We try to map the page tables write-through, if possible.  However, not
354 * all CPUs have a write-through cache mode, so on those we have to sync
355 * the cache when we frob page tables.
356 *
357 * We try to evaluate this at compile time, if possible.  However, it's
358 * not always possible to do that, hence this run-time var.
359 */
360int	pmap_needs_pte_sync;
361
362/*
363 * Real definition of pv_entry.
364 */
365struct pv_entry {
366	struct pv_entry *pv_next;       /* next pv_entry */
367	pmap_t		pv_pmap;        /* pmap where mapping lies */
368	vaddr_t		pv_va;          /* virtual address for mapping */
369	u_int		pv_flags;       /* flags */
370};
371
372/*
373 * Macro to determine if a mapping might be resident in the
374 * instruction cache and/or TLB
375 */
376#define	PV_BEEN_EXECD(f)  (((f) & PVF_EXEC) != 0)
377
378/*
379 * Local prototypes
380 */
381void		pmap_alloc_specials(vaddr_t *, int, vaddr_t *,
382		    pt_entry_t **);
383static int	pmap_is_current(pmap_t);
384void		pmap_enter_pv(struct vm_page *, struct pv_entry *,
385		    pmap_t, vaddr_t, u_int);
386static struct pv_entry *pmap_find_pv(struct vm_page *, pmap_t, vaddr_t);
387struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vaddr_t);
388u_int		pmap_modify_pv(struct vm_page *, pmap_t, vaddr_t,
389		    u_int, u_int);
390
391void		pmap_alloc_l1(pmap_t);
392void		pmap_free_l1(pmap_t);
393
394struct l2_bucket *pmap_get_l2_bucket(pmap_t, vaddr_t);
395struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vaddr_t);
396void		pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
397
398void		pmap_clearbit(struct vm_page *, u_int);
399void		pmap_clean_page(struct vm_page *);
400void		pmap_page_remove(struct vm_page *);
401
402void		pmap_init_l1(struct l1_ttable *, pd_entry_t *);
403vaddr_t		kernel_pt_lookup(paddr_t);
404
405
406/*
407 * External function prototypes
408 */
409extern void bzero_page(vaddr_t);
410extern void bcopy_page(vaddr_t, vaddr_t);
411
412/*
413 * Misc variables
414 */
415vaddr_t virtual_avail;
416vaddr_t virtual_end;
417vaddr_t pmap_curmaxkvaddr;
418
419extern pv_addr_t systempage;
420
421static __inline int
422pmap_is_current(pmap_t pm)
423{
424	if (pm == pmap_kernel() ||
425	    (curproc && curproc->p_vmspace->vm_map.pmap == pm))
426		return 1;
427
428	return 0;
429}
430
431/*
432 * A bunch of routines to conditionally flush the caches/TLB depending
433 * on whether the specified pmap actually needs to be flushed at any
434 * given time.
435 */
436static __inline void
437pmap_tlb_flushID_SE(pmap_t pm, vaddr_t va)
438{
439	if (pmap_is_current(pm))
440		cpu_tlb_flushID_SE(va);
441}
442
443static __inline void
444pmap_tlb_flushID(pmap_t pm)
445{
446	if (pmap_is_current(pm))
447		cpu_tlb_flushID();
448}
449
450/*
451 * Returns a pointer to the L2 bucket associated with the specified pmap
452 * and VA, or NULL if no L2 bucket exists for the address.
453 */
454struct l2_bucket *
455pmap_get_l2_bucket(pmap_t pm, vaddr_t va)
456{
457	struct l2_dtable *l2;
458	struct l2_bucket *l2b;
459	u_short l1idx;
460
461	l1idx = L1_IDX(va);
462
463	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
464	    (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
465		return (NULL);
466
467	return (l2b);
468}
469
470/*
471 * main pv_entry manipulation functions:
472 *   pmap_enter_pv: enter a mapping onto a vm_page list
473 *   pmap_remove_pv: remove a mapping from a vm_page list
474 *
475 * NOTE: pmap_enter_pv expects to lock the pvh itself
476 *       pmap_remove_pv expects te caller to lock the pvh before calling
477 */
478
479/*
480 * pmap_enter_pv: enter a mapping onto a vm_page lst
481 *
482 * => caller should have pmap locked
483 * => we will gain the lock on the vm_page and allocate the new pv_entry
484 * => caller should adjust ptp's wire_count before calling
485 * => caller should not adjust pmap's wire_count
486 */
487void
488pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
489    vaddr_t va, u_int flags)
490{
491
492	NPDEBUG(PDB_PVDUMP,
493	    printf("pmap_enter_pv: pm %p, pg %p, flags 0x%x\n", pm, pg, flags));
494
495	pve->pv_pmap = pm;
496	pve->pv_va = va;
497	pve->pv_flags = flags;
498
499	pve->pv_next = pg->mdpage.pvh_list;	/* add to ... */
500	pg->mdpage.pvh_list = pve;		/* ... locked list */
501	pg->mdpage.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
502
503	if (pve->pv_flags & PVF_WIRED)
504		++pm->pm_stats.wired_count;
505}
506
507/*
508 *
509 * pmap_find_pv: Find a pv entry
510 *
511 * => caller should hold lock on vm_page
512 */
513static __inline struct pv_entry *
514pmap_find_pv(struct vm_page *pg, pmap_t pm, vaddr_t va)
515{
516	struct pv_entry *pv;
517
518	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
519		if (pm == pv->pv_pmap && va == pv->pv_va)
520			break;
521	}
522
523	return (pv);
524}
525
526/*
527 * pmap_remove_pv: try to remove a mapping from a pv_list
528 *
529 * => pmap should be locked
530 * => caller should hold lock on vm_page [so that attrs can be adjusted]
531 * => caller should adjust ptp's wire_count and free PTP if needed
532 * => caller should NOT adjust pmap's wire_count
533 * => we return the removed pve
534 */
535struct pv_entry *
536pmap_remove_pv(struct vm_page *pg, pmap_t pm, vaddr_t va)
537{
538	struct pv_entry *pve, **prevptr;
539
540	NPDEBUG(PDB_PVDUMP,
541	    printf("pmap_remove_pv: pm %p, pg %p, va 0x%08lx\n", pm, pg, va));
542
543	prevptr = &pg->mdpage.pvh_list;		/* previous pv_entry pointer */
544	pve = *prevptr;
545
546	while (pve) {
547		if (pve->pv_pmap == pm && pve->pv_va == va) {	/* match? */
548			NPDEBUG(PDB_PVDUMP,
549			    printf("pmap_remove_pv: pm %p, pg %p, flags 0x%x\n", pm, pg, pve->pv_flags));
550			*prevptr = pve->pv_next;		/* remove it! */
551			if (pve->pv_flags & PVF_WIRED)
552			    --pm->pm_stats.wired_count;
553			break;
554		}
555		prevptr = &pve->pv_next;		/* previous pointer */
556		pve = pve->pv_next;			/* advance */
557	}
558
559	return(pve);				/* return removed pve */
560}
561
562/*
563 *
564 * pmap_modify_pv: Update pv flags
565 *
566 * => caller should hold lock on vm_page [so that attrs can be adjusted]
567 * => caller should NOT adjust pmap's wire_count
568 * => we return the old flags
569 *
570 * Modify a physical-virtual mapping in the pv table
571 */
572u_int
573pmap_modify_pv(struct vm_page *pg, pmap_t pm, vaddr_t va,
574    u_int clr_mask, u_int set_mask)
575{
576	struct pv_entry *npv;
577	u_int flags, oflags;
578
579	if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
580		return (0);
581
582	NPDEBUG(PDB_PVDUMP,
583	    printf("pmap_modify_pv: pm %p, pg %p, clr 0x%x, set 0x%x, flags 0x%x\n", pm, pg, clr_mask, set_mask, npv->pv_flags));
584
585	/*
586	 * There is at least one VA mapping this page.
587	 */
588
589	if (clr_mask & (PVF_REF | PVF_MOD))
590		pg->mdpage.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
591
592	oflags = npv->pv_flags;
593	npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
594
595	if ((flags ^ oflags) & PVF_WIRED) {
596		if (flags & PVF_WIRED)
597			++pm->pm_stats.wired_count;
598		else
599			--pm->pm_stats.wired_count;
600	}
601
602	return (oflags);
603}
604
605uint nl1;
606/*
607 * Allocate an L1 translation table for the specified pmap.
608 * This is called at pmap creation time.
609 */
610void
611pmap_alloc_l1(pmap_t pm)
612{
613	struct l1_ttable *l1;
614	struct pglist plist;
615	struct vm_page *m;
616	pd_entry_t *pl1pt;
617	vaddr_t va, eva;
618	int error;
619
620#ifdef PMAP_DEBUG
621printf("%s: %d\n", __func__, ++nl1);
622#endif
623	/* XXX use a pool? or move to inside struct pmap? */
624	l1 = malloc(sizeof(*l1), M_VMPMAP, M_WAITOK);
625
626	/* Allocate a L1 page table */
627	for (;;) {
628		va = uvm_km_valloc(kernel_map, L1_TABLE_SIZE);
629		if (va != 0)
630			break;
631		uvm_wait("alloc_l1_va");
632	}
633
634	for (;;) {
635		TAILQ_INIT(&plist);
636		error = uvm_pglistalloc(L1_TABLE_SIZE, 0, (paddr_t)-1,
637		    L1_TABLE_SIZE, 0, &plist, 1, UVM_PLA_WAITOK);
638		if (error == 0)
639			break;
640		uvm_wait("alloc_l1_pg");
641	}
642
643	pl1pt = (pd_entry_t *)va;
644	m = TAILQ_FIRST(&plist);
645	for (eva = va + L1_TABLE_SIZE; va < eva; va += PAGE_SIZE) {
646		paddr_t pa = VM_PAGE_TO_PHYS(m);
647
648		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
649		m = TAILQ_NEXT(m, pageq);
650	}
651
652	pmap_init_l1(l1, pl1pt);
653
654	pm->pm_l1 = l1;
655}
656
657/*
658 * Free an L1 translation table.
659 * This is called at pmap destruction time.
660 */
661void
662pmap_free_l1(pmap_t pm)
663{
664	struct l1_ttable *l1 = pm->pm_l1;
665	struct pglist mlist;
666	struct vm_page *pg;
667	struct l2_bucket *l2b;
668	pt_entry_t *ptep;
669	vaddr_t va;
670	uint npg;
671
672	pm->pm_l1 = NULL;
673	TAILQ_REMOVE(&l1_list, l1, l1_link);
674
675	/* free backing pages */
676	TAILQ_INIT(&mlist);
677	va = (vaddr_t)l1->l1_kva;
678	for (npg = atop(L1_TABLE_SIZE); npg != 0; npg--) {
679		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
680		ptep = &l2b->l2b_kva[l2pte_index(va)];
681		pg = PHYS_TO_VM_PAGE(l2pte_pa(*ptep));
682		TAILQ_INSERT_TAIL(&mlist, pg, pageq);
683		va += PAGE_SIZE;
684	}
685	pmap_kremove((vaddr_t)l1->l1_kva, L1_TABLE_SIZE);
686	uvm_pglistfree(&mlist);
687
688	/* free backing va */
689	uvm_km_free(kernel_map, (vaddr_t)l1->l1_kva, L1_TABLE_SIZE);
690
691	free(l1, M_VMPMAP, 0);
692}
693
694/*
695 * void pmap_free_l2_ptp(pt_entry_t *)
696 *
697 * Free an L2 descriptor table.
698 */
699static __inline void
700pmap_free_l2_ptp(pt_entry_t *l2)
701{
702	pool_put(&pmap_l2ptp_pool, (void *)l2);
703}
704
705/*
706 * Returns a pointer to the L2 bucket associated with the specified pmap
707 * and VA.
708 *
709 * If no L2 bucket exists, perform the necessary allocations to put an L2
710 * bucket/page table in place.
711 *
712 * Note that if a new L2 bucket/page was allocated, the caller *must*
713 * increment the bucket occupancy counter appropriately *before*
714 * releasing the pmap's lock to ensure no other thread or cpu deallocates
715 * the bucket/page in the meantime.
716 */
717struct l2_bucket *
718pmap_alloc_l2_bucket(pmap_t pm, vaddr_t va)
719{
720	struct l2_dtable *l2;
721	struct l2_bucket *l2b;
722	u_short l1idx;
723
724	l1idx = L1_IDX(va);
725
726	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
727		/*
728		 * No mapping at this address, as there is
729		 * no entry in the L1 table.
730		 * Need to allocate a new l2_dtable.
731		 */
732		if ((l2 = pmap_alloc_l2_dtable()) == NULL)
733			return (NULL);
734
735		/*
736		 * Link it into the parent pmap
737		 */
738		pm->pm_l2[L2_IDX(l1idx)] = l2;
739	}
740
741	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
742
743	/*
744	 * Fetch pointer to the L2 page table associated with the address.
745	 */
746	if (l2b->l2b_kva == NULL) {
747		pt_entry_t *ptep;
748
749		/*
750		 * No L2 page table has been allocated. Chances are, this
751		 * is because we just allocated the l2_dtable, above.
752		 */
753		ptep = pool_get(&pmap_l2ptp_pool, PR_NOWAIT|PR_ZERO);
754		if (ptep == NULL) {
755			/*
756			 * Oops, no more L2 page tables available at this
757			 * time. We may need to deallocate the l2_dtable
758			 * if we allocated a new one above.
759			 */
760			if (l2->l2_occupancy == 0) {
761				pm->pm_l2[L2_IDX(l1idx)] = NULL;
762				pmap_free_l2_dtable(l2);
763			}
764			return (NULL);
765		}
766		PTE_SYNC_RANGE(ptep, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
767		pmap_extract(pmap_kernel(), (vaddr_t)ptep, &l2b->l2b_phys);
768
769		l2->l2_occupancy++;
770		l2b->l2b_kva = ptep;
771		l2b->l2b_l1idx = l1idx;
772	}
773
774	return (l2b);
775}
776
777/*
778 * One or more mappings in the specified L2 descriptor table have just been
779 * invalidated.
780 *
781 * Garbage collect the metadata and descriptor table itself if necessary.
782 *
783 * The pmap lock must be acquired when this is called (not necessary
784 * for the kernel pmap).
785 */
786void
787pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
788{
789	struct l2_dtable *l2;
790	pd_entry_t *pl1pd;
791	pt_entry_t *ptep;
792	u_short l1idx;
793
794	KDASSERT(count <= l2b->l2b_occupancy);
795
796	/*
797	 * Update the bucket's reference count according to how many
798	 * PTEs the caller has just invalidated.
799	 */
800	l2b->l2b_occupancy -= count;
801
802	/*
803	 * Note:
804	 *
805	 * Level 2 page tables allocated to the kernel pmap are never freed
806	 * as that would require checking all Level 1 page tables and
807	 * removing any references to the Level 2 page table. See also the
808	 * comment elsewhere about never freeing bootstrap L2 descriptors.
809	 *
810	 * We make do with just invalidating the mapping in the L2 table.
811	 *
812	 * This isn't really a big deal in practice and, in fact, leads
813	 * to a performance win over time as we don't need to continually
814	 * alloc/free.
815	 */
816	if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
817		return;
818
819	/*
820	 * There are no more valid mappings in this level 2 page table.
821	 * Go ahead and NULL-out the pointer in the bucket, then
822	 * free the page table.
823	 */
824	l1idx = l2b->l2b_l1idx;
825	ptep = l2b->l2b_kva;
826	l2b->l2b_kva = NULL;
827
828	pl1pd = &pm->pm_l1->l1_kva[l1idx];
829
830	/*
831	 * Invalidate the L1 slot.
832	 */
833	*pl1pd = L1_TYPE_INV;
834	PTE_SYNC(pl1pd);
835	pmap_tlb_flushID_SE(pm, l1idx << L1_S_SHIFT);
836
837	/*
838	 * Release the L2 descriptor table back to the pool cache.
839	 */
840	pmap_free_l2_ptp(ptep);
841
842	/*
843	 * Update the reference count in the associated l2_dtable
844	 */
845	l2 = pm->pm_l2[L2_IDX(l1idx)];
846	if (--l2->l2_occupancy > 0)
847		return;
848
849	/*
850	 * There are no more valid mappings in any of the Level 1
851	 * slots managed by this l2_dtable. Go ahead and NULL-out
852	 * the pointer in the parent pmap and free the l2_dtable.
853	 */
854	pm->pm_l2[L2_IDX(l1idx)] = NULL;
855	pmap_free_l2_dtable(l2);
856}
857
858/*
859 * Modify pte bits for all ptes corresponding to the given physical address.
860 * We use `maskbits' rather than `clearbits' because we're always passing
861 * constants and the latter would require an extra inversion at run-time.
862 */
863void
864pmap_clearbit(struct vm_page *pg, u_int maskbits)
865{
866	struct l2_bucket *l2b;
867	struct pv_entry *pv;
868	pt_entry_t *ptep, npte, opte;
869	pmap_t pm;
870	vaddr_t va;
871	u_int oflags;
872
873	NPDEBUG(PDB_BITS,
874	    printf("pmap_clearbit: pg %p (0x%08lx) mask 0x%x\n",
875	    pg, pg->phys_addr, maskbits));
876
877	/*
878	 * Clear saved attributes (modify, reference)
879	 */
880	pg->mdpage.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
881
882	if (pg->mdpage.pvh_list == NULL)
883		return;
884
885	/*
886	 * Loop over all current mappings setting/clearing as appropriate
887	 */
888	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
889		va = pv->pv_va;
890		pm = pv->pv_pmap;
891		oflags = pv->pv_flags;
892		pv->pv_flags &= ~maskbits;
893
894		l2b = pmap_get_l2_bucket(pm, va);
895		KDASSERT(l2b != NULL);
896
897		ptep = &l2b->l2b_kva[l2pte_index(va)];
898		npte = opte = *ptep;
899		NPDEBUG(PDB_BITS,
900		    printf(
901		    "pmap_clearbit: pv %p, pm %p, va 0x%08lx, flag 0x%x\n",
902		    pv, pv->pv_pmap, pv->pv_va, oflags));
903
904		if (maskbits & (PVF_WRITE|PVF_MOD)) {
905			/* Disable write access. */
906			npte |= L2_V7_AP(0x4);
907		}
908
909		if (maskbits & PVF_REF) {
910			/*
911			 * Clear the Access Flag such that we will
912			 * take a page fault the next time the mapping
913			 * is referenced.
914			 */
915			npte &= ~L2_V7_AF;
916		}
917
918		if (npte != opte) {
919			*ptep = npte;
920			PTE_SYNC(ptep);
921			/* Flush the TLB entry if a current pmap. */
922			if (opte & L2_V7_AF)
923				pmap_tlb_flushID_SE(pm, pv->pv_va);
924		}
925
926		NPDEBUG(PDB_BITS,
927		    printf("pmap_clearbit: pm %p va 0x%lx opte 0x%08x npte 0x%08x\n",
928		    pm, va, opte, npte));
929	}
930}
931
932/*
933 * pmap_clean_page()
934 *
935 * Invalidate all I$ aliases for a single page.
936 */
937void
938pmap_clean_page(struct vm_page *pg)
939{
940	pmap_t pm;
941	struct pv_entry *pv;
942
943	if (curproc)
944		pm = curproc->p_vmspace->vm_map.pmap;
945	else
946		pm = pmap_kernel();
947
948	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
949		/* inline !pmap_is_current(pv->pv_pmap) */
950		if (pv->pv_pmap != pmap_kernel() && pv->pv_pmap != pm)
951			continue;
952
953		/*
954		 * The page is mapped non-cacheable in
955		 * this map.  No need to flush the cache.
956		 */
957		if (pv->pv_flags & PVF_NC) /* XXX ought to be pg attr */
958			break;
959
960		if (PV_BEEN_EXECD(pv->pv_flags))
961			cpu_icache_sync_range(pv->pv_va, PAGE_SIZE);
962	}
963}
964
965/*
966 * Routine:	pmap_page_remove
967 * Function:
968 *		Removes this physical page from
969 *		all physical maps in which it resides.
970 *		Reflects back modify bits to the pager.
971 */
972void
973pmap_page_remove(struct vm_page *pg)
974{
975	struct l2_bucket *l2b;
976	struct pv_entry *pv, *npv;
977	pmap_t pm, curpm;
978	pt_entry_t *ptep, opte;
979	int flush;
980
981	NPDEBUG(PDB_FOLLOW,
982	    printf("pmap_page_remove: pg %p (0x%08lx)\n", pg, pg->phys_addr));
983
984	pv = pg->mdpage.pvh_list;
985	if (pv == NULL)
986		return;
987
988	flush = 0;
989	if (curproc)
990		curpm = curproc->p_vmspace->vm_map.pmap;
991	else
992		curpm = pmap_kernel();
993
994	while (pv) {
995		pm = pv->pv_pmap;
996
997		l2b = pmap_get_l2_bucket(pm, pv->pv_va);
998		KDASSERT(l2b != NULL);
999
1000		ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
1001		opte = *ptep;
1002		if (opte != L2_TYPE_INV) {
1003			/* inline pmap_is_current(pm) */
1004			if ((opte & L2_V7_AF) &&
1005			    (pm == curpm || pm == pmap_kernel())) {
1006				if (PV_BEEN_EXECD(pv->pv_flags))
1007					cpu_icache_sync_range(pv->pv_va, PAGE_SIZE);
1008				flush = 1;
1009			}
1010
1011			/*
1012			 * Update statistics
1013			 */
1014			--pm->pm_stats.resident_count;
1015
1016			/* Wired bit */
1017			if (pv->pv_flags & PVF_WIRED)
1018				--pm->pm_stats.wired_count;
1019
1020			/*
1021			 * Invalidate the PTEs.
1022			 */
1023			*ptep = L2_TYPE_INV;
1024			PTE_SYNC(ptep);
1025			if (flush)
1026				cpu_tlb_flushID_SE(pv->pv_va);
1027
1028			pmap_free_l2_bucket(pm, l2b, 1);
1029		}
1030
1031		npv = pv->pv_next;
1032		pool_put(&pmap_pv_pool, pv);
1033		pv = npv;
1034	}
1035	pg->mdpage.pvh_list = NULL;
1036}
1037
1038/*
1039 * pmap_t pmap_create(void)
1040 *
1041 *      Create a new pmap structure from scratch.
1042 */
1043pmap_t
1044pmap_create(void)
1045{
1046	pmap_t pm;
1047
1048	pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1049
1050	pm->pm_refs = 1;
1051	pm->pm_stats.wired_count = 0;
1052	pmap_alloc_l1(pm);
1053
1054	return (pm);
1055}
1056
1057/*
1058 * void pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot,
1059 *     int flags)
1060 *
1061 *      Insert the given physical page (p) at
1062 *      the specified virtual address (v) in the
1063 *      target physical map with the protection requested.
1064 *
1065 *      NB:  This is the only routine which MAY NOT lazy-evaluate
1066 *      or lose information.  That is, this routine must actually
1067 *      insert this page into the given map NOW.
1068 */
1069int
1070pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1071{
1072	struct l2_bucket *l2b;
1073	struct vm_page *pg, *opg;
1074	struct pv_entry *pve;
1075	pt_entry_t *ptep, npte, opte;
1076	u_int nflags;
1077	u_int oflags;
1078	int mapped = 1;
1079
1080	NPDEBUG(PDB_ENTER, printf("pmap_enter: pm %p va 0x%lx pa 0x%lx prot %x flag %x\n", pm, va, pa, prot, flags));
1081
1082	KDASSERT((flags & PMAP_WIRED) == 0 || (flags & PROT_MASK) != 0);
1083	KDASSERT(((va | pa) & PGOFSET) == 0);
1084
1085	/*
1086	 * Get a pointer to the page.  Later on in this function, we
1087	 * test for a managed page by checking pg != NULL.
1088	 */
1089	pg = pmap_initialized ? PHYS_TO_VM_PAGE(pa) : NULL;
1090
1091	nflags = 0;
1092	if (prot & PROT_WRITE)
1093		nflags |= PVF_WRITE;
1094	if (prot & PROT_EXEC)
1095		nflags |= PVF_EXEC;
1096	if (flags & PMAP_WIRED)
1097		nflags |= PVF_WIRED;
1098
1099	/*
1100	 * Fetch the L2 bucket which maps this page, allocating one if
1101	 * necessary for user pmaps.
1102	 */
1103	if (pm == pmap_kernel())
1104		l2b = pmap_get_l2_bucket(pm, va);
1105	else
1106		l2b = pmap_alloc_l2_bucket(pm, va);
1107	if (l2b == NULL) {
1108		if (flags & PMAP_CANFAIL)
1109			return (ENOMEM);
1110
1111		panic("pmap_enter: failed to allocate L2 bucket");
1112	}
1113	ptep = &l2b->l2b_kva[l2pte_index(va)];
1114	opte = *ptep;
1115	npte = L2_S_PROTO | pa;
1116
1117	if (opte != L2_TYPE_INV) {
1118		/*
1119		 * There is already a mapping at this address.
1120		 * If the physical address is different, lookup the
1121		 * vm_page.
1122		 */
1123		if (l2pte_pa(opte) != pa)
1124			opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
1125		else
1126			opg = pg;
1127	} else
1128		opg = NULL;
1129
1130	if (pg) {
1131		/*
1132		 * This has to be a managed mapping.
1133		 */
1134		if ((flags & PROT_MASK) ||
1135		    (pg->mdpage.pvh_attrs & PVF_REF)) {
1136			/*
1137			 * - The access type indicates that we don't need
1138			 *   to do referenced emulation.
1139			 * OR
1140			 * - The physical page has already been referenced
1141			 *   so no need to re-do referenced emulation here.
1142			 */
1143			nflags |= PVF_REF;
1144			npte |= L2_V7_AF;
1145
1146			if ((flags & PROT_WRITE) ||
1147			    (pg->mdpage.pvh_attrs & PVF_MOD)) {
1148				/*
1149				 * This is a writable mapping, and the
1150				 * page's mod state indicates it has
1151				 * already been modified. Make it
1152				 * writable from the outset.
1153				 */
1154				nflags |= PVF_MOD;
1155			} else {
1156				prot &= ~PROT_WRITE;
1157			}
1158		} else {
1159			/*
1160			 * Need to do page referenced emulation.
1161			 */
1162			prot &= ~PROT_WRITE;
1163			mapped = 0;
1164		}
1165
1166		npte |= pte_l2_s_cache_mode;
1167
1168		if (pg == opg) {
1169			/*
1170			 * We're changing the attrs of an existing mapping.
1171			 */
1172			oflags = pmap_modify_pv(pg, pm, va,
1173			    PVF_WRITE | PVF_EXEC | PVF_WIRED |
1174			    PVF_MOD | PVF_REF, nflags);
1175		} else {
1176			/*
1177			 * New mapping, or changing the backing page
1178			 * of an existing mapping.
1179			 */
1180			if (opg) {
1181				/*
1182				 * Replacing an existing mapping with a new one.
1183				 * It is part of our managed memory so we
1184				 * must remove it from the PV list
1185				 */
1186				pve = pmap_remove_pv(opg, pm, va);
1187			} else
1188			if ((pve = pool_get(&pmap_pv_pool, PR_NOWAIT)) == NULL){
1189				if ((flags & PMAP_CANFAIL) == 0)
1190					panic("pmap_enter: no pv entries");
1191
1192				if (pm != pmap_kernel())
1193					pmap_free_l2_bucket(pm, l2b, 0);
1194
1195				NPDEBUG(PDB_ENTER,
1196				    printf("pmap_enter: ENOMEM\n"));
1197				return (ENOMEM);
1198			}
1199
1200			pmap_enter_pv(pg, pve, pm, va, nflags);
1201		}
1202	} else {
1203		/*
1204		 * We're mapping an unmanaged page.
1205		 * These are always readable, and possibly writable, from
1206		 * the get go as we don't need to track ref/mod status.
1207		 */
1208		npte |= L2_V7_AF;
1209
1210		if (opg) {
1211			/*
1212			 * Looks like there's an existing 'managed' mapping
1213			 * at this address.
1214			 */
1215			pve = pmap_remove_pv(opg, pm, va);
1216			pool_put(&pmap_pv_pool, pve);
1217		}
1218	}
1219
1220	/*
1221	 * Make sure userland mappings get the right permissions
1222	 */
1223	npte |= L2_S_PROT(pm == pmap_kernel() ?  PTE_KERNEL : PTE_USER, prot);
1224
1225	/*
1226	 * Keep the stats up to date
1227	 */
1228	if (opte == L2_TYPE_INV) {
1229		l2b->l2b_occupancy++;
1230		pm->pm_stats.resident_count++;
1231	}
1232
1233	NPDEBUG(PDB_ENTER,
1234	    printf("pmap_enter: opte 0x%08x npte 0x%08x\n", opte, npte));
1235
1236	/*
1237	 * If this is just a wiring change, the two PTEs will be
1238	 * identical, so there's no need to update the page table.
1239	 */
1240	if (npte != opte) {
1241		*ptep = npte;
1242		/*
1243		 * We only need to frob the cache/tlb if this pmap
1244		 * is current
1245		 */
1246		PTE_SYNC(ptep);
1247		if (npte & L2_V7_AF) {
1248			/*
1249			 * This mapping is likely to be accessed as
1250			 * soon as we return to userland. Fix up the
1251			 * L1 entry to avoid taking another page fault.
1252			 */
1253			pd_entry_t *pl1pd, l1pd;
1254
1255			pl1pd = &pm->pm_l1->l1_kva[L1_IDX(va)];
1256			l1pd = L1_C_PROTO | l2b->l2b_phys | l1_c_pxn;
1257			if (*pl1pd != l1pd) {
1258				*pl1pd = l1pd;
1259				PTE_SYNC(pl1pd);
1260			}
1261		}
1262
1263		if (opte & L2_V7_AF)
1264			pmap_tlb_flushID_SE(pm, va);
1265	}
1266
1267	/*
1268	 * Make sure executable pages do not have stale data in I$,
1269	 * which is VIPT.
1270	 */
1271	if (mapped && (prot & PROT_EXEC) != 0 && pmap_is_current(pm))
1272		cpu_icache_sync_range(va, PAGE_SIZE);
1273
1274	return (0);
1275}
1276
1277/*
1278 * pmap_remove()
1279 *
1280 * pmap_remove is responsible for nuking a number of mappings for a range
1281 * of virtual address space in the current pmap.
1282 */
1283
1284void
1285pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
1286{
1287	struct l2_bucket *l2b;
1288	vaddr_t next_bucket;
1289	pt_entry_t *ptep;
1290	u_int mappings;
1291
1292	NPDEBUG(PDB_REMOVE, printf("pmap_remove: pmap=%p sva=%08lx eva=%08lx\n",
1293	    pm, sva, eva));
1294
1295	while (sva < eva) {
1296		/*
1297		 * Do one L2 bucket's worth at a time.
1298		 */
1299		next_bucket = L2_NEXT_BUCKET(sva);
1300		if (next_bucket > eva)
1301			next_bucket = eva;
1302
1303		l2b = pmap_get_l2_bucket(pm, sva);
1304		if (l2b == NULL) {
1305			sva = next_bucket;
1306			continue;
1307		}
1308
1309		ptep = &l2b->l2b_kva[l2pte_index(sva)];
1310		mappings = 0;
1311
1312		while (sva < next_bucket) {
1313			struct vm_page *pg;
1314			pt_entry_t pte;
1315			paddr_t pa;
1316
1317			pte = *ptep;
1318
1319			if (pte == L2_TYPE_INV) {
1320				/*
1321				 * Nothing here, move along
1322				 */
1323				sva += PAGE_SIZE;
1324				ptep++;
1325				continue;
1326			}
1327
1328			pm->pm_stats.resident_count--;
1329			pa = l2pte_pa(pte);
1330
1331			/*
1332			 * Update flags. In a number of circumstances,
1333			 * we could cluster a lot of these and do a
1334			 * number of sequential pages in one go.
1335			 */
1336			pg = PHYS_TO_VM_PAGE(pa);
1337			if (pg != NULL) {
1338				struct pv_entry *pve;
1339				pve = pmap_remove_pv(pg, pm, sva);
1340				if (pve != NULL)
1341					pool_put(&pmap_pv_pool, pve);
1342			}
1343
1344			/*
1345			 * If the cache is physically indexed, we need
1346			 * to flush any changes to the page before it
1347			 * gets invalidated.
1348			 */
1349			if (pg != NULL)
1350				pmap_clean_page(pg);
1351
1352			*ptep = L2_TYPE_INV;
1353			PTE_SYNC(ptep);
1354			if (pte & L2_V7_AF)
1355				pmap_tlb_flushID_SE(pm, sva);
1356
1357			sva += PAGE_SIZE;
1358			ptep++;
1359			mappings++;
1360		}
1361
1362		/*
1363		 * Deal with any left overs
1364		 */
1365		if (!pmap_is_current(pm))
1366			cpu_idcache_wbinv_all();
1367
1368		pmap_free_l2_bucket(pm, l2b, mappings);
1369	}
1370}
1371
1372/*
1373 * pmap_kenter_pa: enter an unmanaged, wired kernel mapping
1374 *
1375 * We assume there is already sufficient KVM space available
1376 * to do this, as we can't allocate L2 descriptor tables/metadata
1377 * from here.
1378 */
1379void
1380pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1381{
1382	struct l2_bucket *l2b;
1383	pt_entry_t *ptep, opte, npte;
1384	pt_entry_t cache_mode = pte_l2_s_cache_mode;
1385
1386	NPDEBUG(PDB_KENTER,
1387	    printf("pmap_kenter_pa: va 0x%08lx, pa 0x%08lx, prot 0x%x\n",
1388	    va, pa, prot));
1389
1390	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1391	KDASSERT(l2b != NULL);
1392
1393	ptep = &l2b->l2b_kva[l2pte_index(va)];
1394	opte = *ptep;
1395
1396	if (opte == L2_TYPE_INV)
1397		l2b->l2b_occupancy++;
1398
1399	if (pa & PMAP_DEVICE)
1400		cache_mode = L2_B | L2_V7_S_XN;
1401	else if (pa & PMAP_NOCACHE)
1402		cache_mode = L2_V7_S_TEX(1);
1403
1404	npte = L2_S_PROTO | (pa & PMAP_PA_MASK) | L2_V7_AF |
1405	    L2_S_PROT(PTE_KERNEL, prot) | cache_mode;
1406	*ptep = npte;
1407	PTE_SYNC(ptep);
1408	if (opte & L2_V7_AF)
1409		cpu_tlb_flushD_SE(va);
1410
1411	if (pa & PMAP_NOCACHE) {
1412		cpu_dcache_wbinv_range(va, PAGE_SIZE);
1413		cpu_sdcache_wbinv_range(va, (pa & PMAP_PA_MASK), PAGE_SIZE);
1414	}
1415}
1416
1417void
1418pmap_kenter_cache(vaddr_t va, paddr_t pa, vm_prot_t prot, int cacheable)
1419{
1420	if (cacheable == 0)
1421		pa |= PMAP_NOCACHE;
1422	pmap_kenter_pa(va, pa, prot);
1423}
1424
1425void
1426pmap_kremove(vaddr_t va, vsize_t len)
1427{
1428	struct l2_bucket *l2b;
1429	pt_entry_t *ptep, *sptep, opte;
1430	vaddr_t next_bucket, eva;
1431	u_int mappings;
1432
1433	NPDEBUG(PDB_KREMOVE, printf("pmap_kremove: va 0x%08lx, len 0x%08lx\n",
1434	    va, len));
1435
1436	eva = va + len;
1437
1438	while (va < eva) {
1439		next_bucket = L2_NEXT_BUCKET(va);
1440		if (next_bucket > eva)
1441			next_bucket = eva;
1442
1443		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1444		KDASSERT(l2b != NULL);
1445
1446		sptep = ptep = &l2b->l2b_kva[l2pte_index(va)];
1447		mappings = 0;
1448
1449		while (va < next_bucket) {
1450			opte = *ptep;
1451			if (opte != L2_TYPE_INV) {
1452				*ptep = L2_TYPE_INV;
1453				PTE_SYNC(ptep);
1454				mappings++;
1455			}
1456			if (opte & L2_V7_AF)
1457				cpu_tlb_flushD_SE(va);
1458			va += PAGE_SIZE;
1459			ptep++;
1460		}
1461		KDASSERT(mappings <= l2b->l2b_occupancy);
1462		l2b->l2b_occupancy -= mappings;
1463	}
1464}
1465
1466int
1467pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
1468{
1469	struct l2_dtable *l2;
1470	pd_entry_t *pl1pd, l1pd;
1471	pt_entry_t *ptep, pte;
1472	paddr_t pa;
1473	u_int l1idx;
1474
1475
1476	l1idx = L1_IDX(va);
1477	pl1pd = &pm->pm_l1->l1_kva[l1idx];
1478	l1pd = *pl1pd;
1479
1480	if (l1pte_section_p(l1pd)) {
1481		/*
1482		 * These should only happen for pmap_kernel()
1483		 */
1484		KDASSERT(pm == pmap_kernel());
1485		pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
1486	} else {
1487		/*
1488		 * Note that we can't rely on the validity of the L1
1489		 * descriptor as an indication that a mapping exists.
1490		 * We have to look it up in the L2 dtable.
1491		 */
1492		l2 = pm->pm_l2[L2_IDX(l1idx)];
1493
1494		if (l2 == NULL ||
1495		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
1496			return 0;
1497		}
1498
1499		ptep = &ptep[l2pte_index(va)];
1500		pte = *ptep;
1501
1502		if (pte == L2_TYPE_INV)
1503			return 0;
1504
1505		switch (pte & L2_TYPE_MASK) {
1506		case L2_TYPE_L:
1507			pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
1508			break;
1509		/*
1510		 * Can't check for L2_TYPE_S on V7 because of the XN
1511		 * bit being part of L2_TYPE_MASK for S mappings.
1512		 */
1513		default:
1514			pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
1515			break;
1516		}
1517	}
1518
1519	if (pap != NULL)
1520		*pap = pa;
1521
1522	return 1;
1523}
1524
1525void
1526pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1527{
1528	struct l2_bucket *l2b;
1529	pt_entry_t *ptep, opte, npte;
1530	vaddr_t next_bucket;
1531	int flush;
1532
1533	NPDEBUG(PDB_PROTECT,
1534	    printf("pmap_protect: pm %p sva 0x%lx eva 0x%lx prot 0x%x",
1535	    pm, sva, eva, prot));
1536
1537	if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1538		return;
1539
1540	if (prot == PROT_NONE) {
1541		pmap_remove(pm, sva, eva);
1542		return;
1543	}
1544
1545	/* XXX is that threshold of 4 the best choice for v7? */
1546	if (pmap_is_current(pm))
1547		flush = ((eva - sva) > (PAGE_SIZE * 4)) ? -1 : 0;
1548	else
1549		flush = -1;
1550
1551	while (sva < eva) {
1552		next_bucket = L2_NEXT_BUCKET(sva);
1553		if (next_bucket > eva)
1554			next_bucket = eva;
1555
1556		l2b = pmap_get_l2_bucket(pm, sva);
1557		if (l2b == NULL) {
1558			sva = next_bucket;
1559			continue;
1560		}
1561
1562		ptep = &l2b->l2b_kva[l2pte_index(sva)];
1563
1564		while (sva < next_bucket) {
1565			npte = opte = *ptep;
1566			if (opte != L2_TYPE_INV) {
1567				struct vm_page *pg;
1568
1569				if ((prot & PROT_WRITE) == 0)
1570					npte |= L2_V7_AP(0x4);
1571				if ((prot & PROT_EXEC) == 0)
1572					npte |= L2_V7_S_XN;
1573				*ptep = npte;
1574				PTE_SYNC(ptep);
1575
1576				pg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
1577				if (pg != NULL && (prot & PROT_WRITE) == 0)
1578					pmap_modify_pv(pg, pm, sva,
1579					    PVF_WRITE, 0);
1580
1581				if (flush >= 0) {
1582					flush++;
1583					if (opte & L2_V7_AF)
1584						cpu_tlb_flushID_SE(sva);
1585				}
1586			}
1587
1588			sva += PAGE_SIZE;
1589			ptep++;
1590		}
1591	}
1592
1593	if (flush < 0)
1594		pmap_tlb_flushID(pm);
1595
1596	NPDEBUG(PDB_PROTECT, printf("\n"));
1597}
1598
1599void
1600pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1601{
1602
1603	NPDEBUG(PDB_PROTECT,
1604	    printf("pmap_page_protect: pg %p (0x%08lx), prot 0x%x\n",
1605	    pg, pg->phys_addr, prot));
1606
1607	switch(prot) {
1608	case PROT_READ | PROT_WRITE | PROT_EXEC:
1609	case PROT_READ | PROT_WRITE:
1610		return;
1611
1612	case PROT_READ:
1613	case PROT_READ | PROT_EXEC:
1614		pmap_clearbit(pg, PVF_WRITE);
1615		break;
1616
1617	default:
1618		pmap_page_remove(pg);
1619		break;
1620	}
1621}
1622
1623/*
1624 * pmap_clear_modify:
1625 *
1626 *	Clear the "modified" attribute for a page.
1627 */
1628int
1629pmap_clear_modify(struct vm_page *pg)
1630{
1631	int rv;
1632
1633	if (pg->mdpage.pvh_attrs & PVF_MOD) {
1634		rv = 1;
1635		pmap_clearbit(pg, PVF_MOD);
1636	} else
1637		rv = 0;
1638
1639	return (rv);
1640}
1641
1642/*
1643 * pmap_clear_reference:
1644 *
1645 *	Clear the "referenced" attribute for a page.
1646 */
1647int
1648pmap_clear_reference(struct vm_page *pg)
1649{
1650	int rv;
1651
1652	if (pg->mdpage.pvh_attrs & PVF_REF) {
1653		rv = 1;
1654		pmap_clearbit(pg, PVF_REF);
1655	} else
1656		rv = 0;
1657
1658	return (rv);
1659}
1660
1661/*
1662 * pmap_is_modified:
1663 *
1664 *	Test if a page has the "modified" attribute.
1665 */
1666/* See <arm/pmap.h> */
1667
1668/*
1669 * pmap_is_referenced:
1670 *
1671 *	Test if a page has the "referenced" attribute.
1672 */
1673/* See <arm/pmap.h> */
1674
1675/*
1676 * dab_access() handles the following data aborts:
1677 *
1678 *  FAULT_ACCESS_2 - Access flag fault -- Level 2
1679 *
1680 * Set the Access Flag and mark the page as referenced.
1681 */
1682int
1683dab_access(trapframe_t *tf, u_int fsr, u_int far, struct proc *p)
1684{
1685	struct pmap *pm = p->p_vmspace->vm_map.pmap;
1686	vaddr_t va = trunc_page(far);
1687	struct l2_dtable *l2;
1688	struct l2_bucket *l2b;
1689	pt_entry_t *ptep, pte;
1690	struct pv_entry *pv;
1691	struct vm_page *pg;
1692	paddr_t pa;
1693	u_int l1idx;
1694
1695	if (!TRAP_USERMODE(tf) && far >= VM_MIN_KERNEL_ADDRESS)
1696		pm = pmap_kernel();
1697
1698	l1idx = L1_IDX(va);
1699
1700	/*
1701	 * If there is no l2_dtable for this address, then the process
1702	 * has no business accessing it.
1703	 */
1704	l2 = pm->pm_l2[L2_IDX(l1idx)];
1705	KASSERT(l2 != NULL);
1706
1707	/*
1708	 * Likewise if there is no L2 descriptor table
1709	 */
1710	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
1711	KASSERT(l2b->l2b_kva != NULL);
1712
1713	/*
1714	 * Check the PTE itself.
1715	 */
1716	ptep = &l2b->l2b_kva[l2pte_index(va)];
1717	pte = *ptep;
1718	KASSERT(pte != L2_TYPE_INV);
1719
1720	pa = l2pte_pa(pte);
1721
1722	/*
1723	 * Perform page referenced emulation.
1724	 */
1725	KASSERT((pte & L2_V7_AF) == 0);
1726
1727	/* Extract the physical address of the page */
1728	pg = PHYS_TO_VM_PAGE(pa);
1729	KASSERT(pg != NULL);
1730
1731	/* Get the current flags for this page. */
1732	pv = pmap_find_pv(pg, pm, va);
1733	KASSERT(pv != NULL);
1734
1735	pg->mdpage.pvh_attrs |= PVF_REF;
1736	pv->pv_flags |= PVF_REF;
1737	pte |= L2_V7_AF;
1738
1739	*ptep = pte;
1740	PTE_SYNC(ptep);
1741	return 0;
1742}
1743
1744/*
1745 * pmap_collect: free resources held by a pmap
1746 *
1747 * => optional function.
1748 * => called when a process is swapped out to free memory.
1749 */
1750void
1751pmap_collect(pmap_t pm)
1752{
1753	/*
1754	 * Nothing to do.
1755	 * We don't even need to free-up the process' L1.
1756	 */
1757}
1758
1759/*
1760 * Routine:	pmap_proc_iflush
1761 *
1762 * Function:
1763 *	Synchronize caches corresponding to [addr, addr+len) in p.
1764 *
1765 */
1766void
1767pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1768{
1769	/* We only need to do anything if it is the current process. */
1770	if (pr == curproc->p_p)
1771		cpu_icache_sync_range(va, len);
1772}
1773
1774/*
1775 * Routine:	pmap_unwire
1776 * Function:	Clear the wired attribute for a map/virtual-address pair.
1777 *
1778 * In/out conditions:
1779 *		The mapping must already exist in the pmap.
1780 */
1781void
1782pmap_unwire(pmap_t pm, vaddr_t va)
1783{
1784	struct l2_bucket *l2b;
1785	pt_entry_t *ptep, pte;
1786	struct vm_page *pg;
1787	paddr_t pa;
1788
1789	NPDEBUG(PDB_WIRING, printf("pmap_unwire: pm %p, va 0x%08lx\n", pm, va));
1790
1791	l2b = pmap_get_l2_bucket(pm, va);
1792	KDASSERT(l2b != NULL);
1793
1794	ptep = &l2b->l2b_kva[l2pte_index(va)];
1795	pte = *ptep;
1796
1797	/* Extract the physical address of the page */
1798	pa = l2pte_pa(pte);
1799
1800	if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
1801		/* Update the wired bit in the pv entry for this page. */
1802		(void) pmap_modify_pv(pg, pm, va, PVF_WIRED, 0);
1803	}
1804}
1805
1806void
1807pmap_activate(struct proc *p)
1808{
1809	pmap_t pm;
1810	struct pcb *pcb;
1811
1812	pm = p->p_vmspace->vm_map.pmap;
1813	pcb = &p->p_addr->u_pcb;
1814
1815	pmap_set_pcb_pagedir(pm, pcb);
1816
1817	if (p == curproc) {
1818		u_int cur_ttb;
1819
1820		__asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
1821
1822		cur_ttb &= ~(L1_TABLE_SIZE - 1);
1823
1824		if (cur_ttb == (u_int)pcb->pcb_pagedir) {
1825			/*
1826			 * No need to switch address spaces.
1827			 */
1828			return;
1829		}
1830
1831		__asm volatile("cpsid if");
1832		cpu_setttb(pcb->pcb_pagedir);
1833		__asm volatile("cpsie if");
1834	}
1835}
1836
1837void
1838pmap_update(pmap_t pm)
1839{
1840	/*
1841	 * make sure TLB/cache operations have completed.
1842	 */
1843}
1844
1845/*
1846 * Retire the given physical map from service.
1847 * Should only be called if the map contains no valid mappings.
1848 */
1849void
1850pmap_destroy(pmap_t pm)
1851{
1852	u_int count;
1853
1854	/*
1855	 * Drop reference count
1856	 */
1857	count = --pm->pm_refs;
1858	if (count > 0)
1859		return;
1860
1861	/*
1862	 * reference count is zero, free pmap resources and then free pmap.
1863	 */
1864
1865	pmap_free_l1(pm);
1866
1867	/* return the pmap to the pool */
1868	pool_put(&pmap_pmap_pool, pm);
1869}
1870
1871
1872/*
1873 * void pmap_reference(pmap_t pm)
1874 *
1875 * Add a reference to the specified pmap.
1876 */
1877void
1878pmap_reference(pmap_t pm)
1879{
1880	if (pm == NULL)
1881		return;
1882
1883	pm->pm_refs++;
1884}
1885
1886/*
1887 * pmap_zero_page()
1888 *
1889 * Zero a given physical page by mapping it at a page hook point.
1890 * In doing the zero page op, the page we zero is mapped cacheable, as with
1891 * StrongARM accesses to non-cached pages are non-burst making writing
1892 * _any_ bulk data very slow.
1893 */
1894void
1895pmap_zero_page_generic(struct vm_page *pg)
1896{
1897	paddr_t phys = VM_PAGE_TO_PHYS(pg);
1898#ifdef DEBUG
1899	if (pg->mdpage.pvh_list != NULL)
1900		panic("pmap_zero_page: page has mappings");
1901#endif
1902
1903	/*
1904	 * Hook in the page, zero it, and purge the cache for that
1905	 * zeroed page. Invalidate the TLB as needed.
1906	 */
1907	*cdst_pte = L2_S_PROTO | phys | L2_V7_AF |
1908	    L2_S_PROT(PTE_KERNEL, PROT_WRITE) | pte_l2_s_cache_mode;
1909	PTE_SYNC(cdst_pte);
1910	cpu_tlb_flushD_SE(cdstp);
1911	bzero_page(cdstp);
1912}
1913
1914/*
1915 * pmap_copy_page()
1916 *
1917 * Copy one physical page into another, by mapping the pages into
1918 * hook points. The same comment regarding cachability as in
1919 * pmap_zero_page also applies here.
1920 */
1921void
1922pmap_copy_page_generic(struct vm_page *src_pg, struct vm_page *dst_pg)
1923{
1924	paddr_t src = VM_PAGE_TO_PHYS(src_pg);
1925	paddr_t dst = VM_PAGE_TO_PHYS(dst_pg);
1926#ifdef DEBUG
1927	if (dst_pg->mdpage.pvh_list != NULL)
1928		panic("pmap_copy_page: dst page has mappings");
1929#endif
1930
1931	/*
1932	 * Map the pages into the page hook points, copy them, and purge
1933	 * the cache for the appropriate page. Invalidate the TLB
1934	 * as required.
1935	 */
1936	*csrc_pte = L2_S_PROTO | src | L2_V7_AF |
1937	    L2_S_PROT(PTE_KERNEL, PROT_READ) | pte_l2_s_cache_mode;
1938	PTE_SYNC(csrc_pte);
1939	*cdst_pte = L2_S_PROTO | dst | L2_V7_AF |
1940	    L2_S_PROT(PTE_KERNEL, PROT_WRITE) | pte_l2_s_cache_mode;
1941	PTE_SYNC(cdst_pte);
1942	cpu_tlb_flushD_SE(csrcp);
1943	cpu_tlb_flushD_SE(cdstp);
1944	bcopy_page(csrcp, cdstp);
1945}
1946
1947/*
1948 * void pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1949 *
1950 * Return the start and end addresses of the kernel's virtual space.
1951 * These values are setup in pmap_bootstrap and are updated as pages
1952 * are allocated.
1953 */
1954void
1955pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1956{
1957	*start = virtual_avail;
1958	*end = virtual_end;
1959}
1960
1961/*
1962 * Helper function for pmap_grow_l2_bucket()
1963 */
1964static __inline int
1965pmap_grow_map(vaddr_t va, pt_entry_t cache_mode, paddr_t *pap)
1966{
1967	struct l2_bucket *l2b;
1968	pt_entry_t *ptep;
1969	paddr_t pa;
1970
1971	if (uvm.page_init_done == 0) {
1972		if (uvm_page_physget(&pa) == 0)
1973			return (1);
1974	} else {
1975		struct vm_page *pg;
1976		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
1977		if (pg == NULL)
1978			return (1);
1979		pa = VM_PAGE_TO_PHYS(pg);
1980	}
1981
1982	if (pap)
1983		*pap = pa;
1984
1985	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1986	KDASSERT(l2b != NULL);
1987
1988	ptep = &l2b->l2b_kva[l2pte_index(va)];
1989	*ptep = L2_S_PROTO | pa | L2_V7_AF | cache_mode |
1990	    L2_S_PROT(PTE_KERNEL, PROT_READ | PROT_WRITE);
1991	PTE_SYNC(ptep);
1992	cpu_tlb_flushD_SE(va);
1993
1994	memset((void *)va, 0, PAGE_SIZE);
1995	return (0);
1996}
1997
1998/*
1999 * This is the same as pmap_alloc_l2_bucket(), except that it is only
2000 * used by pmap_growkernel().
2001 */
2002static __inline struct l2_bucket *
2003pmap_grow_l2_bucket(pmap_t pm, vaddr_t va)
2004{
2005	struct l2_dtable *l2;
2006	struct l2_bucket *l2b;
2007	u_short l1idx;
2008	vaddr_t nva;
2009
2010	l1idx = L1_IDX(va);
2011
2012	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
2013		/*
2014		 * No mapping at this address, as there is
2015		 * no entry in the L1 table.
2016		 * Need to allocate a new l2_dtable.
2017		 */
2018		nva = pmap_kernel_l2dtable_kva;
2019		if ((nva & PGOFSET) == 0) {
2020			/*
2021			 * Need to allocate a backing page
2022			 */
2023			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2024				return (NULL);
2025		}
2026
2027		l2 = (struct l2_dtable *)nva;
2028		nva += sizeof(struct l2_dtable);
2029
2030		if ((nva & PGOFSET) < (pmap_kernel_l2dtable_kva & PGOFSET)) {
2031			/*
2032			 * The new l2_dtable straddles a page boundary.
2033			 * Map in another page to cover it.
2034			 */
2035			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2036				return (NULL);
2037		}
2038
2039		pmap_kernel_l2dtable_kva = nva;
2040
2041		/*
2042		 * Link it into the parent pmap
2043		 */
2044		pm->pm_l2[L2_IDX(l1idx)] = l2;
2045	}
2046
2047	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2048
2049	/*
2050	 * Fetch pointer to the L2 page table associated with the address.
2051	 */
2052	if (l2b->l2b_kva == NULL) {
2053		pt_entry_t *ptep;
2054
2055		/*
2056		 * No L2 page table has been allocated. Chances are, this
2057		 * is because we just allocated the l2_dtable, above.
2058		 */
2059		nva = pmap_kernel_l2ptp_kva;
2060		ptep = (pt_entry_t *)nva;
2061		if ((nva & PGOFSET) == 0) {
2062			/*
2063			 * Need to allocate a backing page
2064			 */
2065			if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
2066			    &pmap_kernel_l2ptp_phys))
2067				return (NULL);
2068			PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
2069		}
2070
2071		l2->l2_occupancy++;
2072		l2b->l2b_kva = ptep;
2073		l2b->l2b_l1idx = l1idx;
2074		l2b->l2b_phys = pmap_kernel_l2ptp_phys;
2075
2076		pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
2077		pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
2078	}
2079
2080	return (l2b);
2081}
2082
2083vaddr_t
2084pmap_growkernel(vaddr_t maxkvaddr)
2085{
2086	pmap_t kpm = pmap_kernel();
2087	struct l1_ttable *l1;
2088	struct l2_bucket *l2b;
2089	pd_entry_t *pl1pd;
2090	int s;
2091
2092	if (maxkvaddr <= pmap_curmaxkvaddr)
2093		goto out;		/* we are OK */
2094
2095	NPDEBUG(PDB_GROWKERN,
2096	    printf("pmap_growkernel: growing kernel from 0x%lx to 0x%lx\n",
2097	    pmap_curmaxkvaddr, maxkvaddr));
2098
2099	KDASSERT(maxkvaddr <= virtual_end);
2100
2101	/*
2102	 * whoops!   we need to add kernel PTPs
2103	 */
2104
2105	s = splhigh();	/* to be safe */
2106
2107	/* Map 1MB at a time */
2108	for (; pmap_curmaxkvaddr < maxkvaddr; pmap_curmaxkvaddr += L1_S_SIZE) {
2109
2110		l2b = pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
2111		KDASSERT(l2b != NULL);
2112
2113		/* Distribute new L1 entry to all other L1s */
2114		TAILQ_FOREACH(l1, &l1_list, l1_link) {
2115			pl1pd = &l1->l1_kva[L1_IDX(pmap_curmaxkvaddr)];
2116			*pl1pd = L1_C_PROTO | l2b->l2b_phys;
2117			PTE_SYNC(pl1pd);
2118		}
2119	}
2120
2121	/*
2122	 * flush out the cache, expensive but growkernel will happen so
2123	 * rarely
2124	 */
2125	cpu_dcache_wbinv_all();
2126	cpu_sdcache_wbinv_all();
2127	cpu_tlb_flushD();
2128
2129	splx(s);
2130
2131out:
2132	return (pmap_curmaxkvaddr);
2133}
2134
2135/************************ Utility routines ****************************/
2136
2137/*
2138 * vector_page_setprot:
2139 *
2140 *	Manipulate the protection of the vector page.
2141 */
2142void
2143vector_page_setprot(int prot)
2144{
2145	struct l2_bucket *l2b;
2146	pt_entry_t *ptep;
2147
2148	l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
2149	KDASSERT(l2b != NULL);
2150
2151	ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
2152
2153	*ptep = (*ptep & ~L2_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
2154	PTE_SYNC(ptep);
2155	cpu_tlb_flushD_SE(vector_page);
2156}
2157
2158/*
2159 * This is used to stuff certain critical values into the PCB where they
2160 * can be accessed quickly from cpu_switch() et al.
2161 */
2162void
2163pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
2164{
2165	KDASSERT(pm->pm_l1);
2166	pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
2167}
2168
2169/*
2170 * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
2171 * Returns 1 if the mapping exists, else 0.
2172 *
2173 * NOTE: This function is only used by a couple of arm-specific modules.
2174 * It is not safe to take any pmap locks here, since we could be right
2175 * in the middle of debugging the pmap anyway...
2176 *
2177 * It is possible for this routine to return 0 even though a valid
2178 * mapping does exist. This is because we don't lock, so the metadata
2179 * state may be inconsistent.
2180 *
2181 * NOTE: We can return a NULL *ptp in the case where the L1 pde is
2182 * a "section" mapping.
2183 */
2184int
2185pmap_get_pde_pte(pmap_t pm, vaddr_t va, pd_entry_t **pdp, pt_entry_t **ptp)
2186{
2187	struct l2_dtable *l2;
2188	pd_entry_t *pl1pd, l1pd;
2189	pt_entry_t *ptep;
2190	u_short l1idx;
2191
2192	if (pm->pm_l1 == NULL)
2193		return 0;
2194
2195	l1idx = L1_IDX(va);
2196	*pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
2197	l1pd = *pl1pd;
2198
2199	if (l1pte_section_p(l1pd)) {
2200		*ptp = NULL;
2201		return 1;
2202	}
2203
2204	l2 = pm->pm_l2[L2_IDX(l1idx)];
2205	if (l2 == NULL ||
2206	    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
2207		return 0;
2208	}
2209
2210	*ptp = &ptep[l2pte_index(va)];
2211	return 1;
2212}
2213
2214/************************ Bootstrapping routines ****************************/
2215
2216void
2217pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
2218{
2219	l1->l1_kva = l1pt;
2220
2221	/*
2222	 * Copy the kernel's L1 entries to each new L1.
2223	 */
2224	if (pmap_initialized)
2225		memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
2226
2227	if (pmap_extract(pmap_kernel(), (vaddr_t)l1pt, &l1->l1_physaddr) == 0)
2228		panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
2229
2230	TAILQ_INSERT_TAIL(&l1_list, l1, l1_link);
2231}
2232
2233/*
2234 * pmap_bootstrap() is called from the board-specific initarm() routine
2235 * once the kernel L1/L2 descriptors tables have been set up.
2236 *
2237 * This is a somewhat convoluted process since pmap bootstrap is, effectively,
2238 * spread over a number of disparate files/functions.
2239 *
2240 * We are passed the following parameters
2241 *  - kernel_l1pt
2242 *    This is a pointer to the base of the kernel's L1 translation table.
2243 *  - vstart
2244 *    1MB-aligned start of managed kernel virtual memory.
2245 *  - vend
2246 *    1MB-aligned end of managed kernel virtual memory.
2247 *
2248 * We use the first parameter to build the metadata (struct l1_ttable and
2249 * struct l2_dtable) necessary to track kernel mappings.
2250 */
2251#define	PMAP_STATIC_L2_SIZE 16
2252void
2253pmap_bootstrap(pd_entry_t *kernel_l1pt, vaddr_t vstart, vaddr_t vend)
2254{
2255	static struct l1_ttable static_l1;
2256	static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
2257	struct l1_ttable *l1 = &static_l1;
2258	struct l2_dtable *l2;
2259	struct l2_bucket *l2b;
2260	pmap_t pm = pmap_kernel();
2261	pd_entry_t pde;
2262	pt_entry_t *ptep;
2263	paddr_t pa;
2264	vsize_t size;
2265	int l1idx, l2idx, l2next = 0;
2266
2267	/*
2268	 * Initialise the kernel pmap object
2269	 */
2270	pm->pm_l1 = l1;
2271	pm->pm_refs = 1;
2272
2273	/*
2274	 * Scan the L1 translation table created by initarm() and create
2275	 * the required metadata for all valid mappings found in it.
2276	 */
2277	for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
2278		pde = kernel_l1pt[l1idx];
2279
2280		/*
2281		 * We're only interested in Coarse mappings.
2282		 * pmap_extract() can deal with section mappings without
2283		 * recourse to checking L2 metadata.
2284		 */
2285		if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
2286			continue;
2287
2288		/*
2289		 * Lookup the KVA of this L2 descriptor table
2290		 */
2291		pa = (paddr_t)(pde & L1_C_ADDR_MASK);
2292		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2293		if (ptep == NULL) {
2294			panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
2295			    (u_int)l1idx << L1_S_SHIFT, pa);
2296		}
2297
2298		/*
2299		 * Fetch the associated L2 metadata structure.
2300		 * Allocate a new one if necessary.
2301		 */
2302		if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
2303			if (l2next == PMAP_STATIC_L2_SIZE)
2304				panic("pmap_bootstrap: out of static L2s");
2305			pm->pm_l2[L2_IDX(l1idx)] = l2 = &static_l2[l2next++];
2306		}
2307
2308		/*
2309		 * One more L1 slot tracked...
2310		 */
2311		l2->l2_occupancy++;
2312
2313		/*
2314		 * Fill in the details of the L2 descriptor in the
2315		 * appropriate bucket.
2316		 */
2317		l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2318		l2b->l2b_kva = ptep;
2319		l2b->l2b_phys = pa;
2320		l2b->l2b_l1idx = l1idx;
2321
2322		/*
2323		 * Establish an initial occupancy count for this descriptor
2324		 */
2325		for (l2idx = 0;
2326		    l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
2327		    l2idx++) {
2328			if (ptep[l2idx] != L2_TYPE_INV)
2329				l2b->l2b_occupancy++;
2330		}
2331	}
2332
2333	cpu_idcache_wbinv_all();
2334	cpu_sdcache_wbinv_all();
2335	cpu_tlb_flushID();
2336
2337	/*
2338	 * now we allocate the "special" VAs which are used for tmp mappings
2339	 * by the pmap (and other modules).  we allocate the VAs by advancing
2340	 * virtual_avail (note that there are no pages mapped at these VAs).
2341	 *
2342	 * Managed KVM space start from wherever initarm() tells us.
2343	 */
2344	virtual_avail = vstart;
2345	virtual_end = vend;
2346
2347	pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
2348	pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
2349	pmap_alloc_specials(&virtual_avail, 1, &cwbp, &cwb_pte);
2350	pmap_alloc_specials(&virtual_avail, 1, (void *)&memhook, NULL);
2351	pmap_alloc_specials(&virtual_avail, round_page(MSGBUFSIZE) / PAGE_SIZE,
2352	    (void *)&msgbufaddr, NULL);
2353
2354	/*
2355	 * Allocate a range of kernel virtual address space to be used
2356	 * for L2 descriptor tables and metadata allocation in
2357	 * pmap_growkernel().
2358	 */
2359	size = ((virtual_end - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE;
2360	pmap_alloc_specials(&virtual_avail,
2361	    round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
2362	    &pmap_kernel_l2ptp_kva, NULL);
2363
2364	size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
2365	pmap_alloc_specials(&virtual_avail,
2366	    round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
2367	    &pmap_kernel_l2dtable_kva, NULL);
2368
2369	/*
2370	 * We can now initialise the first L1's metadata.
2371	 */
2372	TAILQ_INIT(&l1_list);
2373	pmap_init_l1(l1, kernel_l1pt);
2374
2375	/*
2376	 * Initialize the pmap pool.
2377	 */
2378	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
2379	    "pmappl", &pool_allocator_single);
2380	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
2381	    "pvepl", &pmap_bootstrap_pv_allocator);
2382	pool_init(&pmap_l2dtable_pool, sizeof(struct l2_dtable), 0, IPL_VM, 0,
2383	    "l2dtblpl", NULL);
2384	pool_init(&pmap_l2ptp_pool, L2_TABLE_SIZE_REAL, L2_TABLE_SIZE_REAL,
2385	    IPL_VM, 0, "l2ptppl", &pool_allocator_single);
2386
2387	cpu_dcache_wbinv_all();
2388	cpu_sdcache_wbinv_all();
2389}
2390
2391void
2392pmap_alloc_specials(vaddr_t *availp, int pages, vaddr_t *vap, pt_entry_t **ptep)
2393{
2394	vaddr_t va = *availp;
2395	struct l2_bucket *l2b;
2396
2397	if (ptep) {
2398		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2399		if (l2b == NULL)
2400			panic("pmap_alloc_specials: no l2b for 0x%lx", va);
2401
2402		if (ptep)
2403			*ptep = &l2b->l2b_kva[l2pte_index(va)];
2404	}
2405
2406	*vap = va;
2407	*availp = va + (PAGE_SIZE * pages);
2408}
2409
2410void
2411pmap_init(void)
2412{
2413	pool_setlowat(&pmap_pv_pool, (PAGE_SIZE / sizeof(struct pv_entry)) * 2);
2414
2415	pmap_initialized = 1;
2416}
2417
2418static vaddr_t last_bootstrap_page = 0;
2419static void *free_bootstrap_pages = NULL;
2420
2421void *
2422pmap_bootstrap_pv_page_alloc(struct pool *pp, int flags, int *slowdown)
2423{
2424	extern void *pool_page_alloc(struct pool *, int, int *);
2425	vaddr_t new_page;
2426	void *rv;
2427
2428	if (pmap_initialized)
2429		return (pool_page_alloc(pp, flags, slowdown));
2430	*slowdown = 0;
2431
2432	if (free_bootstrap_pages) {
2433		rv = free_bootstrap_pages;
2434		free_bootstrap_pages = *((void **)rv);
2435		return (rv);
2436	}
2437
2438	new_page = uvm_km_kmemalloc(kernel_map, NULL, PAGE_SIZE,
2439	    (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT);
2440
2441	last_bootstrap_page = new_page;
2442	return ((void *)new_page);
2443}
2444
2445void
2446pmap_bootstrap_pv_page_free(struct pool *pp, void *v)
2447{
2448	extern void pool_page_free(struct pool *, void *);
2449
2450	if (pmap_initialized) {
2451		pool_page_free(pp, v);
2452		return;
2453	}
2454
2455	if ((vaddr_t)v < last_bootstrap_page) {
2456		*((void **)v) = free_bootstrap_pages;
2457		free_bootstrap_pages = v;
2458		return;
2459	}
2460}
2461
2462/*
2463 * pmap_postinit()
2464 *
2465 * This routine is called after the vm and kmem subsystems have been
2466 * initialised. This allows the pmap code to perform any initialisation
2467 * that can only be done once the memory allocation is in place.
2468 */
2469void
2470pmap_postinit(void)
2471{
2472	pool_setlowat(&pmap_l2ptp_pool,
2473	    (PAGE_SIZE / L2_TABLE_SIZE_REAL) * 4);
2474	pool_setlowat(&pmap_l2dtable_pool,
2475	    (PAGE_SIZE / sizeof(struct l2_dtable)) * 2);
2476}
2477
2478/*
2479 * Note that the following routines are used by board-specific initialisation
2480 * code to configure the initial kernel page tables.
2481 *
2482 * If ARM32_NEW_VM_LAYOUT is *not* defined, they operate on the assumption that
2483 * L2 page-table pages are 4KB in size and use 4 L1 slots. This mimics the
2484 * behaviour of the old pmap, and provides an easy migration path for
2485 * initial bring-up of the new pmap on existing ports. Fortunately,
2486 * pmap_bootstrap() compensates for this hackery. This is only a stop-gap and
2487 * will be deprecated.
2488 *
2489 * If ARM32_NEW_VM_LAYOUT *is* defined, these functions deal with 1KB L2 page
2490 * tables.
2491 */
2492
2493/*
2494 * This list exists for the benefit of pmap_map_chunk().  It keeps track
2495 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
2496 * find them as necessary.
2497 *
2498 * Note that the data on this list MUST remain valid after initarm() returns,
2499 * as pmap_bootstrap() uses it to construct L2 table metadata.
2500 */
2501SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
2502
2503vaddr_t
2504kernel_pt_lookup(paddr_t pa)
2505{
2506	pv_addr_t *pv;
2507
2508	SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
2509#ifndef ARM32_NEW_VM_LAYOUT
2510		if (pv->pv_pa == (pa & ~PGOFSET))
2511			return (pv->pv_va | (pa & PGOFSET));
2512#else
2513		if (pv->pv_pa == pa)
2514			return (pv->pv_va);
2515#endif
2516	}
2517	return (0);
2518}
2519
2520/*
2521 * pmap_map_section:
2522 *
2523 *	Create a single section mapping.
2524 */
2525void
2526pmap_map_section(vaddr_t l1pt, vaddr_t va, paddr_t pa, int prot, int cache)
2527{
2528	pd_entry_t *pde = (pd_entry_t *) l1pt;
2529	pd_entry_t fl;
2530
2531	switch (cache) {
2532	case PTE_NOCACHE:
2533	default:
2534		fl = 0;
2535		break;
2536
2537	case PTE_CACHE:
2538		fl = pte_l1_s_cache_mode;
2539		break;
2540
2541	case PTE_PAGETABLE:
2542		fl = pte_l1_s_cache_mode_pt;
2543		break;
2544	}
2545
2546	pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | L1_S_V7_AF |
2547	    L1_S_PROT(PTE_KERNEL, prot) | fl;
2548	PTE_SYNC(&pde[va >> L1_S_SHIFT]);
2549}
2550
2551/*
2552 * pmap_map_entry:
2553 *
2554 *	Create a single page mapping.
2555 */
2556void
2557pmap_map_entry(vaddr_t l1pt, vaddr_t va, paddr_t pa, int prot, int cache)
2558{
2559	pd_entry_t *pde = (pd_entry_t *) l1pt;
2560	pt_entry_t fl;
2561	pt_entry_t *pte;
2562
2563	switch (cache) {
2564	case PTE_NOCACHE:
2565	default:
2566		fl = 0;
2567		break;
2568
2569	case PTE_CACHE:
2570		fl = pte_l2_s_cache_mode;
2571		break;
2572
2573	case PTE_PAGETABLE:
2574		fl = pte_l2_s_cache_mode_pt;
2575		break;
2576	}
2577
2578	if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
2579		panic("pmap_map_entry: no L2 table for VA 0x%08lx", va);
2580
2581#ifndef ARM32_NEW_VM_LAYOUT
2582	pte = (pt_entry_t *)
2583	    kernel_pt_lookup(pde[va >> L1_S_SHIFT] & L2_S_FRAME);
2584#else
2585	pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
2586#endif
2587	if (pte == NULL)
2588		panic("pmap_map_entry: can't find L2 table for VA 0x%08lx", va);
2589
2590#ifndef ARM32_NEW_VM_LAYOUT
2591	pte[(va >> PGSHIFT) & 0x3ff] = L2_S_PROTO | pa | L2_V7_AF |
2592	    L2_S_PROT(PTE_KERNEL, prot) | fl;
2593	PTE_SYNC(&pte[(va >> PGSHIFT) & 0x3ff]);
2594#else
2595	pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_V7_AF |
2596	    L2_S_PROT(PTE_KERNEL, prot) | fl;
2597	PTE_SYNC(&pte[l2pte_index(va)]);
2598#endif
2599}
2600
2601/*
2602 * pmap_link_l2pt:
2603 *
2604 *	Link the L2 page table specified by "l2pv" into the L1
2605 *	page table at the slot for "va".
2606 */
2607void
2608pmap_link_l2pt(vaddr_t l1pt, vaddr_t va, pv_addr_t *l2pv)
2609{
2610	pd_entry_t *pde = (pd_entry_t *) l1pt;
2611	u_int slot = va >> L1_S_SHIFT;
2612
2613	pde[slot + 0] = L1_C_PROTO | (l2pv->pv_pa + 0x000);
2614#ifdef ARM32_NEW_VM_LAYOUT
2615	PTE_SYNC(&pde[slot]);
2616#else
2617	pde[slot + 1] = L1_C_PROTO | (l2pv->pv_pa + 0x400);
2618	pde[slot + 2] = L1_C_PROTO | (l2pv->pv_pa + 0x800);
2619	pde[slot + 3] = L1_C_PROTO | (l2pv->pv_pa + 0xc00);
2620	PTE_SYNC_RANGE(&pde[slot + 0], 4);
2621#endif
2622
2623	SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
2624}
2625
2626/*
2627 * pmap_map_chunk:
2628 *
2629 *	Map a chunk of memory using the most efficient mappings
2630 *	possible (section, large page, small page) into the
2631 *	provided L1 and L2 tables at the specified virtual address.
2632 */
2633vsize_t
2634pmap_map_chunk(vaddr_t l1pt, vaddr_t va, paddr_t pa, vsize_t size,
2635    int prot, int cache)
2636{
2637	pd_entry_t *pde = (pd_entry_t *) l1pt;
2638	pt_entry_t *pte, f1, f2s, f2l;
2639	vsize_t resid;
2640	int i;
2641
2642	resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
2643
2644	if (l1pt == 0)
2645		panic("pmap_map_chunk: no L1 table provided");
2646
2647#ifdef VERBOSE_INIT_ARM
2648	printf("pmap_map_chunk: pa=0x%lx va=0x%lx size=0x%lx resid=0x%lx "
2649	    "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
2650#endif
2651
2652	switch (cache) {
2653	case PTE_NOCACHE:
2654	default:
2655		f1 = 0;
2656		f2l = 0;
2657		f2s = 0;
2658		break;
2659
2660	case PTE_CACHE:
2661		f1 = pte_l1_s_cache_mode;
2662		f2l = pte_l2_l_cache_mode;
2663		f2s = pte_l2_s_cache_mode;
2664		break;
2665
2666	case PTE_PAGETABLE:
2667		f1 = pte_l1_s_cache_mode_pt;
2668		f2l = pte_l2_l_cache_mode_pt;
2669		f2s = pte_l2_s_cache_mode_pt;
2670		break;
2671	}
2672
2673	size = resid;
2674
2675	while (resid > 0) {
2676		/* See if we can use a section mapping. */
2677		if (L1_S_MAPPABLE_P(va, pa, resid)) {
2678#ifdef VERBOSE_INIT_ARM
2679			printf("S");
2680#endif
2681			pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
2682			    L1_S_V7_AF | L1_S_PROT(PTE_KERNEL, prot) | f1;
2683			PTE_SYNC(&pde[va >> L1_S_SHIFT]);
2684			va += L1_S_SIZE;
2685			pa += L1_S_SIZE;
2686			resid -= L1_S_SIZE;
2687			continue;
2688		}
2689
2690		/*
2691		 * Ok, we're going to use an L2 table.  Make sure
2692		 * one is actually in the corresponding L1 slot
2693		 * for the current VA.
2694		 */
2695		if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
2696			panic("pmap_map_chunk: no L2 table for VA 0x%08lx", va);
2697
2698#ifndef ARM32_NEW_VM_LAYOUT
2699		pte = (pt_entry_t *)
2700		    kernel_pt_lookup(pde[va >> L1_S_SHIFT] & L2_S_FRAME);
2701#else
2702		pte = (pt_entry_t *) kernel_pt_lookup(
2703		    pde[L1_IDX(va)] & L1_C_ADDR_MASK);
2704#endif
2705		if (pte == NULL)
2706			panic("pmap_map_chunk: can't find L2 table for VA"
2707			    "0x%08lx", va);
2708
2709		/* See if we can use a L2 large page mapping. */
2710		if (L2_L_MAPPABLE_P(va, pa, resid)) {
2711#ifdef VERBOSE_INIT_ARM
2712			printf("L");
2713#endif
2714			for (i = 0; i < 16; i++) {
2715#ifndef ARM32_NEW_VM_LAYOUT
2716				pte[((va >> PGSHIFT) & 0x3f0) + i] =
2717				    L2_L_PROTO | pa | L2_V7_AF |
2718				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
2719				PTE_SYNC(&pte[((va >> PGSHIFT) & 0x3f0) + i]);
2720#else
2721				pte[l2pte_index(va) + i] =
2722				    L2_L_PROTO | pa | L2_V7_AF |
2723				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
2724				PTE_SYNC(&pte[l2pte_index(va) + i]);
2725#endif
2726			}
2727			va += L2_L_SIZE;
2728			pa += L2_L_SIZE;
2729			resid -= L2_L_SIZE;
2730			continue;
2731		}
2732
2733		/* Use a small page mapping. */
2734#ifdef VERBOSE_INIT_ARM
2735		printf("P");
2736#endif
2737#ifndef ARM32_NEW_VM_LAYOUT
2738		pte[(va >> PGSHIFT) & 0x3ff] = L2_S_PROTO | pa | L2_V7_AF |
2739		    L2_S_PROT(PTE_KERNEL, prot) | f2s;
2740		PTE_SYNC(&pte[(va >> PGSHIFT) & 0x3ff]);
2741#else
2742		pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_V7_AF |
2743		    L2_S_PROT(PTE_KERNEL, prot) | f2s;
2744		PTE_SYNC(&pte[l2pte_index(va)]);
2745#endif
2746		va += PAGE_SIZE;
2747		pa += PAGE_SIZE;
2748		resid -= PAGE_SIZE;
2749	}
2750#ifdef VERBOSE_INIT_ARM
2751	printf("\n");
2752#endif
2753	return (size);
2754}
2755
2756/********************** PTE initialization routines **************************/
2757
2758/*
2759 * These routines are called when the CPU type is identified to set up
2760 * the PTE prototypes, cache modes, etc.
2761 *
2762 * The variables are always here, just in case LKMs need to reference
2763 * them (though, they shouldn't).
2764 */
2765
2766pt_entry_t	pte_l1_s_cache_mode;
2767pt_entry_t	pte_l1_s_cache_mode_pt;
2768pt_entry_t	pte_l1_s_cache_mask;
2769
2770pt_entry_t	pte_l2_l_cache_mode;
2771pt_entry_t	pte_l2_l_cache_mode_pt;
2772pt_entry_t	pte_l2_l_cache_mask;
2773
2774pt_entry_t	pte_l2_s_cache_mode;
2775pt_entry_t	pte_l2_s_cache_mode_pt;
2776pt_entry_t	pte_l2_s_cache_mask;
2777
2778pt_entry_t	pte_l1_s_coherent;
2779pt_entry_t	pte_l2_l_coherent;
2780pt_entry_t	pte_l2_s_coherent;
2781
2782pt_entry_t	pte_l1_s_prot_ur;
2783pt_entry_t	pte_l1_s_prot_uw;
2784pt_entry_t	pte_l1_s_prot_kr;
2785pt_entry_t	pte_l1_s_prot_kw;
2786pt_entry_t	pte_l1_s_prot_mask;
2787
2788pt_entry_t	pte_l2_l_prot_ur;
2789pt_entry_t	pte_l2_l_prot_uw;
2790pt_entry_t	pte_l2_l_prot_kr;
2791pt_entry_t	pte_l2_l_prot_kw;
2792pt_entry_t	pte_l2_l_prot_mask;
2793
2794pt_entry_t	pte_l2_s_prot_ur;
2795pt_entry_t	pte_l2_s_prot_uw;
2796pt_entry_t	pte_l2_s_prot_kr;
2797pt_entry_t	pte_l2_s_prot_kw;
2798pt_entry_t	pte_l2_s_prot_mask;
2799
2800pt_entry_t	pte_l1_s_proto;
2801pt_entry_t	pte_l1_c_proto;
2802pt_entry_t	pte_l2_s_proto;
2803
2804void		(*pmap_copy_page_func)(struct vm_page *, struct vm_page *);
2805void		(*pmap_zero_page_func)(struct vm_page *);
2806
2807void
2808pmap_pte_init_armv7(void)
2809{
2810	uint32_t id_mmfr0, id_mmfr3;
2811
2812	/*
2813	 * XXX We want to use proper TEX settings eventually.
2814	 */
2815
2816	/* write-allocate should be tested */
2817	pte_l1_s_cache_mode = L1_S_C|L1_S_B;
2818	pte_l2_l_cache_mode = L2_C|L2_B;
2819	pte_l2_s_cache_mode = L2_C|L2_B;
2820
2821	pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
2822	pte_l2_l_cache_mode_pt = L2_B|L2_C;
2823	pte_l2_s_cache_mode_pt = L2_B|L2_C;
2824	pmap_needs_pte_sync = 1;
2825
2826	pte_l1_s_cache_mask = L1_S_CACHE_MASK_v7;
2827	pte_l2_l_cache_mask = L2_L_CACHE_MASK_v7;
2828	pte_l2_s_cache_mask = L2_S_CACHE_MASK_v7;
2829
2830	pte_l1_s_coherent = L1_S_COHERENT_v7;
2831	pte_l2_l_coherent = L2_L_COHERENT_v7;
2832	pte_l2_s_coherent = L2_S_COHERENT_v7;
2833
2834	pte_l1_s_prot_ur = L1_S_PROT_UR_v7;
2835	pte_l1_s_prot_uw = L1_S_PROT_UW_v7;
2836	pte_l1_s_prot_kr = L1_S_PROT_KR_v7;
2837	pte_l1_s_prot_kw = L1_S_PROT_KW_v7;
2838	pte_l1_s_prot_mask = L1_S_PROT_MASK_v7;
2839
2840	pte_l2_l_prot_ur = L2_L_PROT_UR_v7;
2841	pte_l2_l_prot_uw = L2_L_PROT_UW_v7;
2842	pte_l2_l_prot_kr = L2_L_PROT_KR_v7;
2843	pte_l2_l_prot_kw = L2_L_PROT_KW_v7;
2844	pte_l2_l_prot_mask = L2_L_PROT_MASK_v7;
2845
2846	pte_l2_s_prot_ur = L2_S_PROT_UR_v7;
2847	pte_l2_s_prot_uw = L2_S_PROT_UW_v7;
2848	pte_l2_s_prot_kr = L2_S_PROT_KR_v7;
2849	pte_l2_s_prot_kw = L2_S_PROT_KW_v7;
2850	pte_l2_s_prot_mask = L2_S_PROT_MASK_v7;
2851
2852	pte_l1_s_proto = L1_S_PROTO_v7;
2853	pte_l1_c_proto = L1_C_PROTO_v7;
2854	pte_l2_s_proto = L2_S_PROTO_v7;
2855
2856	pmap_copy_page_func = pmap_copy_page_generic;
2857	pmap_zero_page_func = pmap_zero_page_generic;
2858
2859	/* Check if the PXN bit is supported. */
2860	__asm volatile("mrc p15, 0, %0, c0, c1, 4" : "=r"(id_mmfr0));
2861	if ((id_mmfr0 & ID_MMFR0_VMSA_MASK) >= VMSA_V7_PXN)
2862		l1_c_pxn = L1_C_V7_PXN;
2863
2864	/* Check for coherent walk. */
2865	__asm volatile("mrc p15, 0, %0, c0, c1, 7" : "=r"(id_mmfr3));
2866	if ((id_mmfr3 & 0x00f00000) == 0x00100000)
2867		pmap_needs_pte_sync = 0;
2868}
2869
2870uint32_t pmap_alias_dist;
2871uint32_t pmap_alias_bits;
2872
2873vaddr_t
2874pmap_prefer(vaddr_t foff, vaddr_t va)
2875{
2876	long d, m;
2877
2878	m = pmap_alias_dist;
2879	if (m == 0)             /* m=0 => no cache aliasing */
2880		return va;
2881
2882	d = foff - va;
2883	d &= (m - 1);
2884	return va + d;
2885}
2886