1/*	$OpenBSD: pmap7.c,v 1.66 2023/01/01 19:49:17 miod Exp $	*/
2/*	$NetBSD: pmap.c,v 1.147 2004/01/18 13:03:50 scw Exp $	*/
3
4/*
5 * Copyright 2003 Wasabi Systems, Inc.
6 * All rights reserved.
7 *
8 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed for the NetBSD Project by
21 *      Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 *    or promote products derived from this software without specific prior
24 *    written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copyright (c) 2002-2003 Wasabi Systems, Inc.
41 * Copyright (c) 2001 Richard Earnshaw
42 * Copyright (c) 2001-2002 Christopher Gilbert
43 * All rights reserved.
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. The name of the company nor the name of the author may be used to
51 *    endorse or promote products derived from this software without specific
52 *    prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
55 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
56 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
58 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67/*-
68 * Copyright (c) 1999 The NetBSD Foundation, Inc.
69 * All rights reserved.
70 *
71 * This code is derived from software contributed to The NetBSD Foundation
72 * by Charles M. Hannum.
73 *
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted provided that the following conditions
76 * are met:
77 * 1. Redistributions of source code must retain the above copyright
78 *    notice, this list of conditions and the following disclaimer.
79 * 2. Redistributions in binary form must reproduce the above copyright
80 *    notice, this list of conditions and the following disclaimer in the
81 *    documentation and/or other materials provided with the distribution.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
84 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
86 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
93 * POSSIBILITY OF SUCH DAMAGE.
94 */
95
96/*
97 * Copyright (c) 1994-1998 Mark Brinicombe.
98 * Copyright (c) 1994 Brini.
99 * All rights reserved.
100 *
101 * This code is derived from software written for Brini by Mark Brinicombe
102 *
103 * Redistribution and use in source and binary forms, with or without
104 * modification, are permitted provided that the following conditions
105 * are met:
106 * 1. Redistributions of source code must retain the above copyright
107 *    notice, this list of conditions and the following disclaimer.
108 * 2. Redistributions in binary form must reproduce the above copyright
109 *    notice, this list of conditions and the following disclaimer in the
110 *    documentation and/or other materials provided with the distribution.
111 * 3. All advertising materials mentioning features or use of this software
112 *    must display the following acknowledgement:
113 *	This product includes software developed by Mark Brinicombe.
114 * 4. The name of the author may not be used to endorse or promote products
115 *    derived from this software without specific prior written permission.
116 *
117 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
118 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
119 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
120 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
121 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
122 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
123 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
124 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
125 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
126 *
127 * RiscBSD kernel project
128 *
129 * pmap.c
130 *
131 * Machine dependant vm stuff
132 *
133 * Created      : 20/09/94
134 */
135
136/*
137 * Performance improvements, UVM changes, overhauls and part-rewrites
138 * were contributed by Neil A. Carson <neil@causality.com>.
139 */
140
141/*
142 * Overhauled again to speedup the pmap, use MMU Domains so that L1 tables
143 * can be shared, and re-work the KVM layout, by Steve Woodford of Wasabi
144 * Systems, Inc.
145 *
146 * There are still a few things outstanding at this time:
147 *
148 *   - There are some unresolved issues for MP systems:
149 *
150 *     o The L1 metadata needs a lock, or more specifically, some places
151 *       need to acquire an exclusive lock when modifying L1 translation
152 *       table entries.
153 *
154 *     o When one cpu modifies an L1 entry, and that L1 table is also
155 *       being used by another cpu, then the latter will need to be told
156 *       that a tlb invalidation may be necessary. (But only if the old
157 *       domain number in the L1 entry being over-written is currently
158 *       the active domain on that cpu). I guess there are lots more tlb
159 *       shootdown issues too...
160 *
161 *     o If the vector_page is at 0x00000000 instead of 0xffff0000, then
162 *       MP systems will lose big-time because of the MMU domain hack.
163 *       The only way this can be solved (apart from moving the vector
164 *       page to 0xffff0000) is to reserve the first 1MB of user address
165 *       space for kernel use only. This would require re-linking all
166 *       applications so that the text section starts above this 1MB
167 *       boundary.
168 *
169 *     o Tracking which VM space is resident in the cache/tlb has not yet
170 *       been implemented for MP systems.
171 *
172 *     o Finally, there is a pathological condition where two cpus running
173 *       two separate processes (not procs) which happen to share an L1
174 *       can get into a fight over one or more L1 entries. This will result
175 *       in a significant slow-down if both processes are in tight loops.
176 */
177
178#include <sys/param.h>
179#include <sys/systm.h>
180#include <sys/proc.h>
181#include <sys/malloc.h>
182#include <sys/user.h>
183#include <sys/pool.h>
184
185#include <uvm/uvm.h>
186
187#include <machine/pmap.h>
188#include <machine/pcb.h>
189#include <machine/param.h>
190#include <arm/cpufunc.h>
191
192//#define PMAP_DEBUG
193#ifdef PMAP_DEBUG
194
195/*
196 * for switching to potentially finer grained debugging
197 */
198#define	PDB_FOLLOW	0x0001
199#define	PDB_INIT	0x0002
200#define	PDB_ENTER	0x0004
201#define	PDB_REMOVE	0x0008
202#define	PDB_CREATE	0x0010
203#define	PDB_PTPAGE	0x0020
204#define	PDB_GROWKERN	0x0040
205#define	PDB_BITS	0x0080
206#define	PDB_COLLECT	0x0100
207#define	PDB_PROTECT	0x0200
208#define	PDB_MAP_L1	0x0400
209#define	PDB_BOOTSTRAP	0x1000
210#define	PDB_PARANOIA	0x2000
211#define	PDB_WIRING	0x4000
212#define	PDB_PVDUMP	0x8000
213#define	PDB_KENTER	0x20000
214#define	PDB_KREMOVE	0x40000
215
216#define pmapdebug (cold ? 0 : 0xffffffff)
217#define	NPDEBUG(_lev_,_stat_) \
218	if (pmapdebug & (_lev_)) \
219        	((_stat_))
220
221#else	/* PMAP_DEBUG */
222#define NPDEBUG(_lev_,_stat_) /* Nothing */
223#endif	/* PMAP_DEBUG */
224
225/*
226 * pmap_kernel() points here
227 */
228struct pmap     kernel_pmap_store;
229
230/*
231 * Pool and cache that pmap structures are allocated from.
232 * We use a cache to avoid clearing the pm_l2[] array (1KB)
233 * in pmap_create().
234 */
235struct pool pmap_pmap_pool;
236
237/*
238 * Pool of PV structures
239 */
240struct pool pmap_pv_pool;
241void *pmap_bootstrap_pv_page_alloc(struct pool *, int, int *);
242void pmap_bootstrap_pv_page_free(struct pool *, void *);
243struct pool_allocator pmap_bootstrap_pv_allocator = {
244	pmap_bootstrap_pv_page_alloc, pmap_bootstrap_pv_page_free
245};
246
247/*
248 * Pool and cache of l2_dtable structures.
249 * We use a cache to avoid clearing the structures when they're
250 * allocated. (196 bytes)
251 */
252struct pool pmap_l2dtable_pool;
253vaddr_t pmap_kernel_l2dtable_kva;
254
255/*
256 * Pool and cache of L2 page descriptors.
257 * We use a cache to avoid clearing the descriptor table
258 * when they're allocated. (1KB)
259 */
260struct pool pmap_l2ptp_pool;
261vaddr_t pmap_kernel_l2ptp_kva;
262paddr_t pmap_kernel_l2ptp_phys;
263
264/*
265 * pmap copy/zero page, wb page, and mem(5) hook point
266 */
267pt_entry_t *csrc_pte, *cdst_pte, *cwb_pte;
268vaddr_t csrcp, cdstp, cwbp;
269char *memhook;
270extern caddr_t msgbufaddr;
271
272/*
273 * Flag to indicate if pmap_init() has done its thing
274 */
275int pmap_initialized;
276
277/*
278 * Metadata for L1 translation tables.
279 */
280struct l1_ttable {
281	/* Entry on the L1 Table list */
282	TAILQ_ENTRY(l1_ttable) l1_link;
283
284	/* Physical address of this L1 page table */
285	paddr_t l1_physaddr;
286
287	/* KVA of this L1 page table */
288	pd_entry_t *l1_kva;
289};
290
291/*
292 * Convert a virtual address into its L1 table index. That is, the
293 * index used to locate the L2 descriptor table pointer in an L1 table.
294 * This is basically used to index l1->l1_kva[].
295 *
296 * Each L2 descriptor table represents 1MB of VA space.
297 */
298#define	L1_IDX(va)		(((vaddr_t)(va)) >> L1_S_SHIFT)
299
300/*
301 * Set if the PXN bit is supported.
302 */
303pd_entry_t l1_c_pxn;
304
305/*
306 * A list of all L1 tables
307 */
308TAILQ_HEAD(, l1_ttable) l1_list;
309
310/*
311 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
312 *
313 * This is normally 16MB worth L2 page descriptors for any given pmap.
314 * Reference counts are maintained for L2 descriptors so they can be
315 * freed when empty.
316 */
317struct l2_dtable {
318	/* The number of L2 page descriptors allocated to this l2_dtable */
319	u_int l2_occupancy;
320
321	/* List of L2 page descriptors */
322	struct l2_bucket {
323		pt_entry_t *l2b_kva;	/* KVA of L2 Descriptor Table */
324		paddr_t l2b_phys;	/* Physical address of same */
325		u_short l2b_l1idx;	/* This L2 table's L1 index */
326		u_short l2b_occupancy;	/* How many active descriptors */
327	} l2_bucket[L2_BUCKET_SIZE];
328};
329
330/*
331 * Given an L1 table index, calculate the corresponding l2_dtable index
332 * and bucket index within the l2_dtable.
333 */
334#define	L2_IDX(l1idx)		(((l1idx) >> L2_BUCKET_LOG2) & \
335				 (L2_SIZE - 1))
336#define	L2_BUCKET(l1idx)	((l1idx) & (L2_BUCKET_SIZE - 1))
337
338/*
339 * Given a virtual address, this macro returns the
340 * virtual address required to drop into the next L2 bucket.
341 */
342#define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
343
344/*
345 * L2 allocation.
346 */
347#define	pmap_alloc_l2_dtable()		\
348	    pool_get(&pmap_l2dtable_pool, PR_NOWAIT|PR_ZERO)
349#define	pmap_free_l2_dtable(l2)		\
350	    pool_put(&pmap_l2dtable_pool, (l2))
351
352/*
353 * We try to map the page tables write-through, if possible.  However, not
354 * all CPUs have a write-through cache mode, so on those we have to sync
355 * the cache when we frob page tables.
356 *
357 * We try to evaluate this at compile time, if possible.  However, it's
358 * not always possible to do that, hence this run-time var.
359 */
360int	pmap_needs_pte_sync;
361
362/*
363 * Real definition of pv_entry.
364 */
365struct pv_entry {
366	struct pv_entry *pv_next;       /* next pv_entry */
367	pmap_t		pv_pmap;        /* pmap where mapping lies */
368	vaddr_t		pv_va;          /* virtual address for mapping */
369	u_int		pv_flags;       /* flags */
370};
371
372/*
373 * Macro to determine if a mapping might be resident in the
374 * instruction cache and/or TLB
375 */
376#define	PV_BEEN_EXECD(f)  (((f) & PVF_EXEC) != 0)
377
378/*
379 * Local prototypes
380 */
381void		pmap_alloc_specials(vaddr_t *, int, vaddr_t *,
382		    pt_entry_t **);
383static int	pmap_is_current(pmap_t);
384void		pmap_enter_pv(struct vm_page *, struct pv_entry *,
385		    pmap_t, vaddr_t, u_int);
386static struct pv_entry *pmap_find_pv(struct vm_page *, pmap_t, vaddr_t);
387struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vaddr_t);
388u_int		pmap_modify_pv(struct vm_page *, pmap_t, vaddr_t,
389		    u_int, u_int);
390
391void		pmap_alloc_l1(pmap_t);
392void		pmap_free_l1(pmap_t);
393
394struct l2_bucket *pmap_get_l2_bucket(pmap_t, vaddr_t);
395struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vaddr_t);
396void		pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
397
398void		pmap_clearbit(struct vm_page *, u_int);
399void		pmap_clean_page(struct vm_page *);
400void		pmap_page_remove(struct vm_page *);
401
402void		pmap_init_l1(struct l1_ttable *, pd_entry_t *);
403vaddr_t		kernel_pt_lookup(paddr_t);
404
405
406/*
407 * External function prototypes
408 */
409extern void bzero_page(vaddr_t);
410extern void bcopy_page(vaddr_t, vaddr_t);
411
412/*
413 * Misc variables
414 */
415vaddr_t virtual_avail;
416vaddr_t virtual_end;
417vaddr_t pmap_curmaxkvaddr;
418
419extern pv_addr_t systempage;
420
421static __inline int
422pmap_is_current(pmap_t pm)
423{
424	if (pm == pmap_kernel() ||
425	    (curproc && curproc->p_vmspace->vm_map.pmap == pm))
426		return 1;
427
428	return 0;
429}
430
431/*
432 * A bunch of routines to conditionally flush the caches/TLB depending
433 * on whether the specified pmap actually needs to be flushed at any
434 * given time.
435 */
436static __inline void
437pmap_tlb_flushID_SE(pmap_t pm, vaddr_t va)
438{
439	if (pmap_is_current(pm))
440		cpu_tlb_flushID_SE(va);
441}
442
443static __inline void
444pmap_tlb_flushID(pmap_t pm)
445{
446	if (pmap_is_current(pm))
447		cpu_tlb_flushID();
448}
449
450/*
451 * Returns a pointer to the L2 bucket associated with the specified pmap
452 * and VA, or NULL if no L2 bucket exists for the address.
453 */
454struct l2_bucket *
455pmap_get_l2_bucket(pmap_t pm, vaddr_t va)
456{
457	struct l2_dtable *l2;
458	struct l2_bucket *l2b;
459	u_short l1idx;
460
461	l1idx = L1_IDX(va);
462
463	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
464	    (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
465		return (NULL);
466
467	return (l2b);
468}
469
470/*
471 * main pv_entry manipulation functions:
472 *   pmap_enter_pv: enter a mapping onto a vm_page list
473 *   pmap_remove_pv: remove a mapping from a vm_page list
474 *
475 * NOTE: pmap_enter_pv expects to lock the pvh itself
476 *       pmap_remove_pv expects te caller to lock the pvh before calling
477 */
478
479/*
480 * pmap_enter_pv: enter a mapping onto a vm_page lst
481 *
482 * => caller should have pmap locked
483 * => we will gain the lock on the vm_page and allocate the new pv_entry
484 * => caller should adjust ptp's wire_count before calling
485 * => caller should not adjust pmap's wire_count
486 */
487void
488pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
489    vaddr_t va, u_int flags)
490{
491
492	NPDEBUG(PDB_PVDUMP,
493	    printf("pmap_enter_pv: pm %p, pg %p, flags 0x%x\n", pm, pg, flags));
494
495	pve->pv_pmap = pm;
496	pve->pv_va = va;
497	pve->pv_flags = flags;
498
499	pve->pv_next = pg->mdpage.pvh_list;	/* add to ... */
500	pg->mdpage.pvh_list = pve;		/* ... locked list */
501	pg->mdpage.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
502
503	if (pve->pv_flags & PVF_WIRED)
504		++pm->pm_stats.wired_count;
505}
506
507/*
508 *
509 * pmap_find_pv: Find a pv entry
510 *
511 * => caller should hold lock on vm_page
512 */
513static __inline struct pv_entry *
514pmap_find_pv(struct vm_page *pg, pmap_t pm, vaddr_t va)
515{
516	struct pv_entry *pv;
517
518	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
519		if (pm == pv->pv_pmap && va == pv->pv_va)
520			break;
521	}
522
523	return (pv);
524}
525
526/*
527 * pmap_remove_pv: try to remove a mapping from a pv_list
528 *
529 * => pmap should be locked
530 * => caller should hold lock on vm_page [so that attrs can be adjusted]
531 * => caller should adjust ptp's wire_count and free PTP if needed
532 * => caller should NOT adjust pmap's wire_count
533 * => we return the removed pve
534 */
535struct pv_entry *
536pmap_remove_pv(struct vm_page *pg, pmap_t pm, vaddr_t va)
537{
538	struct pv_entry *pve, **prevptr;
539
540	NPDEBUG(PDB_PVDUMP,
541	    printf("pmap_remove_pv: pm %p, pg %p, va 0x%08lx\n", pm, pg, va));
542
543	prevptr = &pg->mdpage.pvh_list;		/* previous pv_entry pointer */
544	pve = *prevptr;
545
546	while (pve) {
547		if (pve->pv_pmap == pm && pve->pv_va == va) {	/* match? */
548			NPDEBUG(PDB_PVDUMP,
549			    printf("pmap_remove_pv: pm %p, pg %p, flags 0x%x\n", pm, pg, pve->pv_flags));
550			*prevptr = pve->pv_next;		/* remove it! */
551			if (pve->pv_flags & PVF_WIRED)
552			    --pm->pm_stats.wired_count;
553			break;
554		}
555		prevptr = &pve->pv_next;		/* previous pointer */
556		pve = pve->pv_next;			/* advance */
557	}
558
559	return(pve);				/* return removed pve */
560}
561
562/*
563 *
564 * pmap_modify_pv: Update pv flags
565 *
566 * => caller should hold lock on vm_page [so that attrs can be adjusted]
567 * => caller should NOT adjust pmap's wire_count
568 * => we return the old flags
569 *
570 * Modify a physical-virtual mapping in the pv table
571 */
572u_int
573pmap_modify_pv(struct vm_page *pg, pmap_t pm, vaddr_t va,
574    u_int clr_mask, u_int set_mask)
575{
576	struct pv_entry *npv;
577	u_int flags, oflags;
578
579	if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
580		return (0);
581
582	NPDEBUG(PDB_PVDUMP,
583	    printf("pmap_modify_pv: pm %p, pg %p, clr 0x%x, set 0x%x, flags 0x%x\n", pm, pg, clr_mask, set_mask, npv->pv_flags));
584
585	/*
586	 * There is at least one VA mapping this page.
587	 */
588
589	if (clr_mask & (PVF_REF | PVF_MOD))
590		pg->mdpage.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
591
592	oflags = npv->pv_flags;
593	npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
594
595	if ((flags ^ oflags) & PVF_WIRED) {
596		if (flags & PVF_WIRED)
597			++pm->pm_stats.wired_count;
598		else
599			--pm->pm_stats.wired_count;
600	}
601
602	return (oflags);
603}
604
605uint nl1;
606/*
607 * Allocate an L1 translation table for the specified pmap.
608 * This is called at pmap creation time.
609 */
610void
611pmap_alloc_l1(pmap_t pm)
612{
613	struct l1_ttable *l1;
614	struct pglist plist;
615	struct vm_page *m;
616	pd_entry_t *pl1pt;
617	vaddr_t va, eva;
618	int error;
619
620#ifdef PMAP_DEBUG
621printf("%s: %d\n", __func__, ++nl1);
622#endif
623	/* XXX use a pool? or move to inside struct pmap? */
624	l1 = malloc(sizeof(*l1), M_VMPMAP, M_WAITOK);
625
626	/* Allocate a L1 page table */
627	for (;;) {
628		va = (vaddr_t)km_alloc(L1_TABLE_SIZE, &kv_any, &kp_none,
629		    &kd_nowait);
630		if (va != 0)
631			break;
632		uvm_wait("alloc_l1_va");
633	}
634
635	for (;;) {
636		TAILQ_INIT(&plist);
637		error = uvm_pglistalloc(L1_TABLE_SIZE, 0, (paddr_t)-1,
638		    L1_TABLE_SIZE, 0, &plist, 1, UVM_PLA_WAITOK);
639		if (error == 0)
640			break;
641		uvm_wait("alloc_l1_pg");
642	}
643
644	pl1pt = (pd_entry_t *)va;
645	m = TAILQ_FIRST(&plist);
646	for (eva = va + L1_TABLE_SIZE; va < eva; va += PAGE_SIZE) {
647		paddr_t pa = VM_PAGE_TO_PHYS(m);
648
649		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
650		m = TAILQ_NEXT(m, pageq);
651	}
652
653	pmap_init_l1(l1, pl1pt);
654
655	pm->pm_l1 = l1;
656}
657
658/*
659 * Free an L1 translation table.
660 * This is called at pmap destruction time.
661 */
662void
663pmap_free_l1(pmap_t pm)
664{
665	struct l1_ttable *l1 = pm->pm_l1;
666	struct pglist mlist;
667	struct vm_page *pg;
668	struct l2_bucket *l2b;
669	pt_entry_t *ptep;
670	vaddr_t va;
671	uint npg;
672
673	pm->pm_l1 = NULL;
674	TAILQ_REMOVE(&l1_list, l1, l1_link);
675
676	/* free backing pages */
677	TAILQ_INIT(&mlist);
678	va = (vaddr_t)l1->l1_kva;
679	for (npg = atop(L1_TABLE_SIZE); npg != 0; npg--) {
680		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
681		ptep = &l2b->l2b_kva[l2pte_index(va)];
682		pg = PHYS_TO_VM_PAGE(l2pte_pa(*ptep));
683		TAILQ_INSERT_TAIL(&mlist, pg, pageq);
684		va += PAGE_SIZE;
685	}
686	pmap_kremove((vaddr_t)l1->l1_kva, L1_TABLE_SIZE);
687	uvm_pglistfree(&mlist);
688
689	/* free backing va */
690	km_free(l1->l1_kva, L1_TABLE_SIZE, &kv_any, &kp_none);
691
692	free(l1, M_VMPMAP, 0);
693}
694
695/*
696 * void pmap_free_l2_ptp(pt_entry_t *)
697 *
698 * Free an L2 descriptor table.
699 */
700static __inline void
701pmap_free_l2_ptp(pt_entry_t *l2)
702{
703	pool_put(&pmap_l2ptp_pool, (void *)l2);
704}
705
706/*
707 * Returns a pointer to the L2 bucket associated with the specified pmap
708 * and VA.
709 *
710 * If no L2 bucket exists, perform the necessary allocations to put an L2
711 * bucket/page table in place.
712 *
713 * Note that if a new L2 bucket/page was allocated, the caller *must*
714 * increment the bucket occupancy counter appropriately *before*
715 * releasing the pmap's lock to ensure no other thread or cpu deallocates
716 * the bucket/page in the meantime.
717 */
718struct l2_bucket *
719pmap_alloc_l2_bucket(pmap_t pm, vaddr_t va)
720{
721	struct l2_dtable *l2;
722	struct l2_bucket *l2b;
723	u_short l1idx;
724
725	l1idx = L1_IDX(va);
726
727	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
728		/*
729		 * No mapping at this address, as there is
730		 * no entry in the L1 table.
731		 * Need to allocate a new l2_dtable.
732		 */
733		if ((l2 = pmap_alloc_l2_dtable()) == NULL)
734			return (NULL);
735
736		/*
737		 * Link it into the parent pmap
738		 */
739		pm->pm_l2[L2_IDX(l1idx)] = l2;
740	}
741
742	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
743
744	/*
745	 * Fetch pointer to the L2 page table associated with the address.
746	 */
747	if (l2b->l2b_kva == NULL) {
748		pt_entry_t *ptep;
749
750		/*
751		 * No L2 page table has been allocated. Chances are, this
752		 * is because we just allocated the l2_dtable, above.
753		 */
754		ptep = pool_get(&pmap_l2ptp_pool, PR_NOWAIT|PR_ZERO);
755		if (ptep == NULL) {
756			/*
757			 * Oops, no more L2 page tables available at this
758			 * time. We may need to deallocate the l2_dtable
759			 * if we allocated a new one above.
760			 */
761			if (l2->l2_occupancy == 0) {
762				pm->pm_l2[L2_IDX(l1idx)] = NULL;
763				pmap_free_l2_dtable(l2);
764			}
765			return (NULL);
766		}
767		PTE_SYNC_RANGE(ptep, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
768		pmap_extract(pmap_kernel(), (vaddr_t)ptep, &l2b->l2b_phys);
769
770		l2->l2_occupancy++;
771		l2b->l2b_kva = ptep;
772		l2b->l2b_l1idx = l1idx;
773	}
774
775	return (l2b);
776}
777
778/*
779 * One or more mappings in the specified L2 descriptor table have just been
780 * invalidated.
781 *
782 * Garbage collect the metadata and descriptor table itself if necessary.
783 *
784 * The pmap lock must be acquired when this is called (not necessary
785 * for the kernel pmap).
786 */
787void
788pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
789{
790	struct l2_dtable *l2;
791	pd_entry_t *pl1pd;
792	pt_entry_t *ptep;
793	u_short l1idx;
794
795	KDASSERT(count <= l2b->l2b_occupancy);
796
797	/*
798	 * Update the bucket's reference count according to how many
799	 * PTEs the caller has just invalidated.
800	 */
801	l2b->l2b_occupancy -= count;
802
803	/*
804	 * Note:
805	 *
806	 * Level 2 page tables allocated to the kernel pmap are never freed
807	 * as that would require checking all Level 1 page tables and
808	 * removing any references to the Level 2 page table. See also the
809	 * comment elsewhere about never freeing bootstrap L2 descriptors.
810	 *
811	 * We make do with just invalidating the mapping in the L2 table.
812	 *
813	 * This isn't really a big deal in practice and, in fact, leads
814	 * to a performance win over time as we don't need to continually
815	 * alloc/free.
816	 */
817	if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
818		return;
819
820	/*
821	 * There are no more valid mappings in this level 2 page table.
822	 * Go ahead and NULL-out the pointer in the bucket, then
823	 * free the page table.
824	 */
825	l1idx = l2b->l2b_l1idx;
826	ptep = l2b->l2b_kva;
827	l2b->l2b_kva = NULL;
828
829	pl1pd = &pm->pm_l1->l1_kva[l1idx];
830
831	/*
832	 * Invalidate the L1 slot.
833	 */
834	*pl1pd = L1_TYPE_INV;
835	PTE_SYNC(pl1pd);
836	pmap_tlb_flushID_SE(pm, l1idx << L1_S_SHIFT);
837
838	/*
839	 * Release the L2 descriptor table back to the pool cache.
840	 */
841	pmap_free_l2_ptp(ptep);
842
843	/*
844	 * Update the reference count in the associated l2_dtable
845	 */
846	l2 = pm->pm_l2[L2_IDX(l1idx)];
847	if (--l2->l2_occupancy > 0)
848		return;
849
850	/*
851	 * There are no more valid mappings in any of the Level 1
852	 * slots managed by this l2_dtable. Go ahead and NULL-out
853	 * the pointer in the parent pmap and free the l2_dtable.
854	 */
855	pm->pm_l2[L2_IDX(l1idx)] = NULL;
856	pmap_free_l2_dtable(l2);
857}
858
859/*
860 * Modify pte bits for all ptes corresponding to the given physical address.
861 * We use `maskbits' rather than `clearbits' because we're always passing
862 * constants and the latter would require an extra inversion at run-time.
863 */
864void
865pmap_clearbit(struct vm_page *pg, u_int maskbits)
866{
867	struct l2_bucket *l2b;
868	struct pv_entry *pv;
869	pt_entry_t *ptep, npte, opte;
870	pmap_t pm;
871	vaddr_t va;
872	u_int oflags;
873
874	NPDEBUG(PDB_BITS,
875	    printf("pmap_clearbit: pg %p (0x%08lx) mask 0x%x\n",
876	    pg, pg->phys_addr, maskbits));
877
878	/*
879	 * Clear saved attributes (modify, reference)
880	 */
881	pg->mdpage.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
882
883	if (pg->mdpage.pvh_list == NULL)
884		return;
885
886	/*
887	 * Loop over all current mappings setting/clearing as appropriate
888	 */
889	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
890		va = pv->pv_va;
891		pm = pv->pv_pmap;
892		oflags = pv->pv_flags;
893		pv->pv_flags &= ~maskbits;
894
895		l2b = pmap_get_l2_bucket(pm, va);
896		KDASSERT(l2b != NULL);
897
898		ptep = &l2b->l2b_kva[l2pte_index(va)];
899		npte = opte = *ptep;
900		NPDEBUG(PDB_BITS,
901		    printf(
902		    "pmap_clearbit: pv %p, pm %p, va 0x%08lx, flag 0x%x\n",
903		    pv, pv->pv_pmap, pv->pv_va, oflags));
904
905		if (maskbits & (PVF_WRITE|PVF_MOD)) {
906			/* Disable write access. */
907			npte |= L2_V7_AP(0x4);
908		}
909
910		if (maskbits & PVF_REF) {
911			/*
912			 * Clear the Access Flag such that we will
913			 * take a page fault the next time the mapping
914			 * is referenced.
915			 */
916			npte &= ~L2_V7_AF;
917		}
918
919		if (npte != opte) {
920			*ptep = npte;
921			PTE_SYNC(ptep);
922			/* Flush the TLB entry if a current pmap. */
923			if (opte & L2_V7_AF)
924				pmap_tlb_flushID_SE(pm, pv->pv_va);
925		}
926
927		NPDEBUG(PDB_BITS,
928		    printf("pmap_clearbit: pm %p va 0x%lx opte 0x%08x npte 0x%08x\n",
929		    pm, va, opte, npte));
930	}
931}
932
933/*
934 * pmap_clean_page()
935 *
936 * Invalidate all I$ aliases for a single page.
937 */
938void
939pmap_clean_page(struct vm_page *pg)
940{
941	pmap_t pm;
942	struct pv_entry *pv;
943
944	if (curproc)
945		pm = curproc->p_vmspace->vm_map.pmap;
946	else
947		pm = pmap_kernel();
948
949	for (pv = pg->mdpage.pvh_list; pv; pv = pv->pv_next) {
950		/* inline !pmap_is_current(pv->pv_pmap) */
951		if (pv->pv_pmap != pmap_kernel() && pv->pv_pmap != pm)
952			continue;
953
954		if (PV_BEEN_EXECD(pv->pv_flags))
955			cpu_icache_sync_range(pv->pv_va, PAGE_SIZE);
956	}
957}
958
959/*
960 * Routine:	pmap_page_remove
961 * Function:
962 *		Removes this physical page from
963 *		all physical maps in which it resides.
964 *		Reflects back modify bits to the pager.
965 */
966void
967pmap_page_remove(struct vm_page *pg)
968{
969	struct l2_bucket *l2b;
970	struct pv_entry *pv, *npv;
971	pmap_t pm, curpm;
972	pt_entry_t *ptep, opte;
973	int flush;
974
975	NPDEBUG(PDB_FOLLOW,
976	    printf("pmap_page_remove: pg %p (0x%08lx)\n", pg, pg->phys_addr));
977
978	pv = pg->mdpage.pvh_list;
979	if (pv == NULL)
980		return;
981
982	flush = 0;
983	if (curproc)
984		curpm = curproc->p_vmspace->vm_map.pmap;
985	else
986		curpm = pmap_kernel();
987
988	while (pv) {
989		pm = pv->pv_pmap;
990
991		l2b = pmap_get_l2_bucket(pm, pv->pv_va);
992		KDASSERT(l2b != NULL);
993
994		ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
995		opte = *ptep;
996		if (opte != L2_TYPE_INV) {
997			/* inline pmap_is_current(pm) */
998			if ((opte & L2_V7_AF) &&
999			    (pm == curpm || pm == pmap_kernel())) {
1000				if (PV_BEEN_EXECD(pv->pv_flags))
1001					cpu_icache_sync_range(pv->pv_va, PAGE_SIZE);
1002				flush = 1;
1003			}
1004
1005			/*
1006			 * Update statistics
1007			 */
1008			--pm->pm_stats.resident_count;
1009
1010			/* Wired bit */
1011			if (pv->pv_flags & PVF_WIRED)
1012				--pm->pm_stats.wired_count;
1013
1014			/*
1015			 * Invalidate the PTEs.
1016			 */
1017			*ptep = L2_TYPE_INV;
1018			PTE_SYNC(ptep);
1019			if (flush)
1020				cpu_tlb_flushID_SE(pv->pv_va);
1021
1022			pmap_free_l2_bucket(pm, l2b, 1);
1023		}
1024
1025		npv = pv->pv_next;
1026		pool_put(&pmap_pv_pool, pv);
1027		pv = npv;
1028	}
1029	pg->mdpage.pvh_list = NULL;
1030}
1031
1032/*
1033 * pmap_t pmap_create(void)
1034 *
1035 *      Create a new pmap structure from scratch.
1036 */
1037pmap_t
1038pmap_create(void)
1039{
1040	pmap_t pm;
1041
1042	pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1043
1044	pm->pm_refs = 1;
1045	pm->pm_stats.wired_count = 0;
1046	pmap_alloc_l1(pm);
1047
1048	return (pm);
1049}
1050
1051/*
1052 * void pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot,
1053 *     int flags)
1054 *
1055 *      Insert the given physical page (p) at
1056 *      the specified virtual address (v) in the
1057 *      target physical map with the protection requested.
1058 *
1059 *      NB:  This is the only routine which MAY NOT lazy-evaluate
1060 *      or lose information.  That is, this routine must actually
1061 *      insert this page into the given map NOW.
1062 */
1063int
1064pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1065{
1066	struct l2_bucket *l2b;
1067	struct vm_page *pg, *opg;
1068	struct pv_entry *pve;
1069	pt_entry_t *ptep, npte, opte;
1070	u_int nflags;
1071	u_int oflags;
1072	int mapped = 1;
1073
1074	NPDEBUG(PDB_ENTER, printf("pmap_enter: pm %p va 0x%lx pa 0x%lx prot %x flag %x\n", pm, va, pa, prot, flags));
1075
1076	KDASSERT((flags & PMAP_WIRED) == 0 || (flags & PROT_MASK) != 0);
1077	KDASSERT(((va | pa) & PGOFSET) == 0);
1078
1079	/*
1080	 * Get a pointer to the page.  Later on in this function, we
1081	 * test for a managed page by checking pg != NULL.
1082	 */
1083	pg = pmap_initialized ? PHYS_TO_VM_PAGE(pa) : NULL;
1084
1085	nflags = 0;
1086	if (prot & PROT_WRITE)
1087		nflags |= PVF_WRITE;
1088	if (prot & PROT_EXEC)
1089		nflags |= PVF_EXEC;
1090	if (flags & PMAP_WIRED)
1091		nflags |= PVF_WIRED;
1092
1093	/*
1094	 * Fetch the L2 bucket which maps this page, allocating one if
1095	 * necessary for user pmaps.
1096	 */
1097	if (pm == pmap_kernel())
1098		l2b = pmap_get_l2_bucket(pm, va);
1099	else
1100		l2b = pmap_alloc_l2_bucket(pm, va);
1101	if (l2b == NULL) {
1102		if (flags & PMAP_CANFAIL)
1103			return (ENOMEM);
1104
1105		panic("pmap_enter: failed to allocate L2 bucket");
1106	}
1107	ptep = &l2b->l2b_kva[l2pte_index(va)];
1108	opte = *ptep;
1109	npte = L2_S_PROTO | pa;
1110
1111	if (opte != L2_TYPE_INV) {
1112		/*
1113		 * There is already a mapping at this address.
1114		 * If the physical address is different, lookup the
1115		 * vm_page.
1116		 */
1117		if (l2pte_pa(opte) != pa)
1118			opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
1119		else
1120			opg = pg;
1121	} else
1122		opg = NULL;
1123
1124	if (pg) {
1125		/*
1126		 * This has to be a managed mapping.
1127		 */
1128		if ((flags & PROT_MASK) ||
1129		    (pg->mdpage.pvh_attrs & PVF_REF)) {
1130			/*
1131			 * - The access type indicates that we don't need
1132			 *   to do referenced emulation.
1133			 * OR
1134			 * - The physical page has already been referenced
1135			 *   so no need to re-do referenced emulation here.
1136			 */
1137			nflags |= PVF_REF;
1138			npte |= L2_V7_AF;
1139
1140			if ((flags & PROT_WRITE) ||
1141			    (pg->mdpage.pvh_attrs & PVF_MOD)) {
1142				/*
1143				 * This is a writable mapping, and the
1144				 * page's mod state indicates it has
1145				 * already been modified. Make it
1146				 * writable from the outset.
1147				 */
1148				nflags |= PVF_MOD;
1149			} else {
1150				prot &= ~PROT_WRITE;
1151			}
1152		} else {
1153			/*
1154			 * Need to do page referenced emulation.
1155			 */
1156			prot &= ~PROT_WRITE;
1157			mapped = 0;
1158		}
1159
1160		npte |= pte_l2_s_cache_mode;
1161
1162		if (pg == opg) {
1163			/*
1164			 * We're changing the attrs of an existing mapping.
1165			 */
1166			oflags = pmap_modify_pv(pg, pm, va,
1167			    PVF_WRITE | PVF_EXEC | PVF_WIRED |
1168			    PVF_MOD | PVF_REF, nflags);
1169		} else {
1170			/*
1171			 * New mapping, or changing the backing page
1172			 * of an existing mapping.
1173			 */
1174			if (opg) {
1175				/*
1176				 * Replacing an existing mapping with a new one.
1177				 * It is part of our managed memory so we
1178				 * must remove it from the PV list
1179				 */
1180				pve = pmap_remove_pv(opg, pm, va);
1181			} else
1182			if ((pve = pool_get(&pmap_pv_pool, PR_NOWAIT)) == NULL){
1183				if ((flags & PMAP_CANFAIL) == 0)
1184					panic("pmap_enter: no pv entries");
1185
1186				if (pm != pmap_kernel())
1187					pmap_free_l2_bucket(pm, l2b, 0);
1188
1189				NPDEBUG(PDB_ENTER,
1190				    printf("pmap_enter: ENOMEM\n"));
1191				return (ENOMEM);
1192			}
1193
1194			pmap_enter_pv(pg, pve, pm, va, nflags);
1195		}
1196	} else {
1197		/*
1198		 * We're mapping an unmanaged page.
1199		 * These are always readable, and possibly writable, from
1200		 * the get go as we don't need to track ref/mod status.
1201		 */
1202		npte |= L2_V7_AF;
1203
1204		if (opg) {
1205			/*
1206			 * Looks like there's an existing 'managed' mapping
1207			 * at this address.
1208			 */
1209			pve = pmap_remove_pv(opg, pm, va);
1210			pool_put(&pmap_pv_pool, pve);
1211		}
1212	}
1213
1214	/*
1215	 * Make sure userland mappings get the right permissions
1216	 */
1217	npte |= L2_S_PROT(pm == pmap_kernel() ?  PTE_KERNEL : PTE_USER, prot);
1218
1219	/*
1220	 * Keep the stats up to date
1221	 */
1222	if (opte == L2_TYPE_INV) {
1223		l2b->l2b_occupancy++;
1224		pm->pm_stats.resident_count++;
1225	}
1226
1227	NPDEBUG(PDB_ENTER,
1228	    printf("pmap_enter: opte 0x%08x npte 0x%08x\n", opte, npte));
1229
1230	/*
1231	 * If this is just a wiring change, the two PTEs will be
1232	 * identical, so there's no need to update the page table.
1233	 */
1234	if (npte != opte) {
1235		*ptep = npte;
1236		/*
1237		 * We only need to frob the cache/tlb if this pmap
1238		 * is current
1239		 */
1240		PTE_SYNC(ptep);
1241		if (npte & L2_V7_AF) {
1242			/*
1243			 * This mapping is likely to be accessed as
1244			 * soon as we return to userland. Fix up the
1245			 * L1 entry to avoid taking another page fault.
1246			 */
1247			pd_entry_t *pl1pd, l1pd;
1248
1249			pl1pd = &pm->pm_l1->l1_kva[L1_IDX(va)];
1250			l1pd = L1_C_PROTO | l2b->l2b_phys | l1_c_pxn;
1251			if (*pl1pd != l1pd) {
1252				*pl1pd = l1pd;
1253				PTE_SYNC(pl1pd);
1254			}
1255		}
1256
1257		if (opte & L2_V7_AF)
1258			pmap_tlb_flushID_SE(pm, va);
1259	}
1260
1261	/*
1262	 * Make sure executable pages do not have stale data in I$,
1263	 * which is VIPT.
1264	 */
1265	if (mapped && (prot & PROT_EXEC) != 0 && pmap_is_current(pm))
1266		cpu_icache_sync_range(va, PAGE_SIZE);
1267
1268	return (0);
1269}
1270
1271/*
1272 * pmap_remove()
1273 *
1274 * pmap_remove is responsible for nuking a number of mappings for a range
1275 * of virtual address space in the current pmap.
1276 */
1277
1278void
1279pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
1280{
1281	struct l2_bucket *l2b;
1282	vaddr_t next_bucket;
1283	pt_entry_t *ptep;
1284	u_int mappings;
1285
1286	NPDEBUG(PDB_REMOVE, printf("pmap_remove: pmap=%p sva=%08lx eva=%08lx\n",
1287	    pm, sva, eva));
1288
1289	while (sva < eva) {
1290		/*
1291		 * Do one L2 bucket's worth at a time.
1292		 */
1293		next_bucket = L2_NEXT_BUCKET(sva);
1294		if (next_bucket > eva)
1295			next_bucket = eva;
1296
1297		l2b = pmap_get_l2_bucket(pm, sva);
1298		if (l2b == NULL) {
1299			sva = next_bucket;
1300			continue;
1301		}
1302
1303		ptep = &l2b->l2b_kva[l2pte_index(sva)];
1304		mappings = 0;
1305
1306		while (sva < next_bucket) {
1307			struct vm_page *pg;
1308			pt_entry_t pte;
1309			paddr_t pa;
1310
1311			pte = *ptep;
1312
1313			if (pte == L2_TYPE_INV) {
1314				/*
1315				 * Nothing here, move along
1316				 */
1317				sva += PAGE_SIZE;
1318				ptep++;
1319				continue;
1320			}
1321
1322			pm->pm_stats.resident_count--;
1323			pa = l2pte_pa(pte);
1324
1325			/*
1326			 * Update flags. In a number of circumstances,
1327			 * we could cluster a lot of these and do a
1328			 * number of sequential pages in one go.
1329			 */
1330			pg = PHYS_TO_VM_PAGE(pa);
1331			if (pg != NULL) {
1332				struct pv_entry *pve;
1333				pve = pmap_remove_pv(pg, pm, sva);
1334				if (pve != NULL)
1335					pool_put(&pmap_pv_pool, pve);
1336			}
1337
1338			/*
1339			 * If the cache is physically indexed, we need
1340			 * to flush any changes to the page before it
1341			 * gets invalidated.
1342			 */
1343			if (pg != NULL)
1344				pmap_clean_page(pg);
1345
1346			*ptep = L2_TYPE_INV;
1347			PTE_SYNC(ptep);
1348			if (pte & L2_V7_AF)
1349				pmap_tlb_flushID_SE(pm, sva);
1350
1351			sva += PAGE_SIZE;
1352			ptep++;
1353			mappings++;
1354		}
1355
1356		/*
1357		 * Deal with any left overs
1358		 */
1359		if (!pmap_is_current(pm))
1360			cpu_idcache_wbinv_all();
1361
1362		pmap_free_l2_bucket(pm, l2b, mappings);
1363	}
1364}
1365
1366/*
1367 * pmap_kenter_pa: enter an unmanaged, wired kernel mapping
1368 *
1369 * We assume there is already sufficient KVM space available
1370 * to do this, as we can't allocate L2 descriptor tables/metadata
1371 * from here.
1372 */
1373void
1374pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1375{
1376	struct l2_bucket *l2b;
1377	pt_entry_t *ptep, opte, npte;
1378	pt_entry_t cache_mode = pte_l2_s_cache_mode;
1379
1380	NPDEBUG(PDB_KENTER,
1381	    printf("pmap_kenter_pa: va 0x%08lx, pa 0x%08lx, prot 0x%x\n",
1382	    va, pa, prot));
1383
1384	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1385	KDASSERT(l2b != NULL);
1386
1387	ptep = &l2b->l2b_kva[l2pte_index(va)];
1388	opte = *ptep;
1389
1390	if (opte == L2_TYPE_INV)
1391		l2b->l2b_occupancy++;
1392
1393	if (pa & PMAP_DEVICE)
1394		cache_mode = L2_B | L2_V7_S_XN;
1395	else if (pa & PMAP_NOCACHE)
1396		cache_mode = L2_V7_S_TEX(1);
1397
1398	npte = L2_S_PROTO | (pa & PMAP_PA_MASK) | L2_V7_AF |
1399	    L2_S_PROT(PTE_KERNEL, prot) | cache_mode;
1400	*ptep = npte;
1401	PTE_SYNC(ptep);
1402	if (opte & L2_V7_AF)
1403		cpu_tlb_flushD_SE(va);
1404
1405	if (pa & PMAP_NOCACHE) {
1406		cpu_dcache_wbinv_range(va, PAGE_SIZE);
1407		cpu_sdcache_wbinv_range(va, (pa & PMAP_PA_MASK), PAGE_SIZE);
1408	}
1409}
1410
1411void
1412pmap_kenter_cache(vaddr_t va, paddr_t pa, vm_prot_t prot, int cacheable)
1413{
1414	if (cacheable == 0)
1415		pa |= PMAP_NOCACHE;
1416	pmap_kenter_pa(va, pa, prot);
1417}
1418
1419void
1420pmap_kremove(vaddr_t va, vsize_t len)
1421{
1422	struct l2_bucket *l2b;
1423	pt_entry_t *ptep, *sptep, opte;
1424	vaddr_t next_bucket, eva;
1425	u_int mappings;
1426
1427	NPDEBUG(PDB_KREMOVE, printf("pmap_kremove: va 0x%08lx, len 0x%08lx\n",
1428	    va, len));
1429
1430	eva = va + len;
1431
1432	while (va < eva) {
1433		next_bucket = L2_NEXT_BUCKET(va);
1434		if (next_bucket > eva)
1435			next_bucket = eva;
1436
1437		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1438		KDASSERT(l2b != NULL);
1439
1440		sptep = ptep = &l2b->l2b_kva[l2pte_index(va)];
1441		mappings = 0;
1442
1443		while (va < next_bucket) {
1444			opte = *ptep;
1445			if (opte != L2_TYPE_INV) {
1446				*ptep = L2_TYPE_INV;
1447				PTE_SYNC(ptep);
1448				mappings++;
1449			}
1450			if (opte & L2_V7_AF)
1451				cpu_tlb_flushD_SE(va);
1452			va += PAGE_SIZE;
1453			ptep++;
1454		}
1455		KDASSERT(mappings <= l2b->l2b_occupancy);
1456		l2b->l2b_occupancy -= mappings;
1457	}
1458}
1459
1460int
1461pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
1462{
1463	struct l2_dtable *l2;
1464	pd_entry_t *pl1pd, l1pd;
1465	pt_entry_t *ptep, pte;
1466	paddr_t pa;
1467	u_int l1idx;
1468
1469
1470	l1idx = L1_IDX(va);
1471	pl1pd = &pm->pm_l1->l1_kva[l1idx];
1472	l1pd = *pl1pd;
1473
1474	if (l1pte_section_p(l1pd)) {
1475		/*
1476		 * These should only happen for pmap_kernel()
1477		 */
1478		KDASSERT(pm == pmap_kernel());
1479		pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
1480	} else {
1481		/*
1482		 * Note that we can't rely on the validity of the L1
1483		 * descriptor as an indication that a mapping exists.
1484		 * We have to look it up in the L2 dtable.
1485		 */
1486		l2 = pm->pm_l2[L2_IDX(l1idx)];
1487
1488		if (l2 == NULL ||
1489		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
1490			return 0;
1491		}
1492
1493		ptep = &ptep[l2pte_index(va)];
1494		pte = *ptep;
1495
1496		if (pte == L2_TYPE_INV)
1497			return 0;
1498
1499		switch (pte & L2_TYPE_MASK) {
1500		case L2_TYPE_L:
1501			pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
1502			break;
1503		/*
1504		 * Can't check for L2_TYPE_S on V7 because of the XN
1505		 * bit being part of L2_TYPE_MASK for S mappings.
1506		 */
1507		default:
1508			pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
1509			break;
1510		}
1511	}
1512
1513	if (pap != NULL)
1514		*pap = pa;
1515
1516	return 1;
1517}
1518
1519void
1520pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1521{
1522	struct l2_bucket *l2b;
1523	pt_entry_t *ptep, opte, npte;
1524	vaddr_t next_bucket;
1525	int flush;
1526
1527	NPDEBUG(PDB_PROTECT,
1528	    printf("pmap_protect: pm %p sva 0x%lx eva 0x%lx prot 0x%x",
1529	    pm, sva, eva, prot));
1530
1531	if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1532		return;
1533
1534	if (prot == PROT_NONE) {
1535		pmap_remove(pm, sva, eva);
1536		return;
1537	}
1538
1539	/* XXX is that threshold of 4 the best choice for v7? */
1540	if (pmap_is_current(pm))
1541		flush = ((eva - sva) > (PAGE_SIZE * 4)) ? -1 : 0;
1542	else
1543		flush = -1;
1544
1545	while (sva < eva) {
1546		next_bucket = L2_NEXT_BUCKET(sva);
1547		if (next_bucket > eva)
1548			next_bucket = eva;
1549
1550		l2b = pmap_get_l2_bucket(pm, sva);
1551		if (l2b == NULL) {
1552			sva = next_bucket;
1553			continue;
1554		}
1555
1556		ptep = &l2b->l2b_kva[l2pte_index(sva)];
1557
1558		while (sva < next_bucket) {
1559			npte = opte = *ptep;
1560			if (opte != L2_TYPE_INV) {
1561				struct vm_page *pg;
1562
1563				if ((prot & PROT_WRITE) == 0)
1564					npte |= L2_V7_AP(0x4);
1565				if ((prot & PROT_EXEC) == 0)
1566					npte |= L2_V7_S_XN;
1567				*ptep = npte;
1568				PTE_SYNC(ptep);
1569
1570				pg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
1571				if (pg != NULL && (prot & PROT_WRITE) == 0)
1572					pmap_modify_pv(pg, pm, sva,
1573					    PVF_WRITE, 0);
1574
1575				if (flush >= 0) {
1576					flush++;
1577					if (opte & L2_V7_AF)
1578						cpu_tlb_flushID_SE(sva);
1579				}
1580			}
1581
1582			sva += PAGE_SIZE;
1583			ptep++;
1584		}
1585	}
1586
1587	if (flush < 0)
1588		pmap_tlb_flushID(pm);
1589
1590	NPDEBUG(PDB_PROTECT, printf("\n"));
1591}
1592
1593void
1594pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1595{
1596
1597	NPDEBUG(PDB_PROTECT,
1598	    printf("pmap_page_protect: pg %p (0x%08lx), prot 0x%x\n",
1599	    pg, pg->phys_addr, prot));
1600
1601	switch(prot) {
1602	case PROT_READ | PROT_WRITE | PROT_EXEC:
1603	case PROT_READ | PROT_WRITE:
1604		return;
1605
1606	case PROT_READ:
1607	case PROT_READ | PROT_EXEC:
1608		pmap_clearbit(pg, PVF_WRITE);
1609		break;
1610
1611	default:
1612		pmap_page_remove(pg);
1613		break;
1614	}
1615}
1616
1617/*
1618 * pmap_clear_modify:
1619 *
1620 *	Clear the "modified" attribute for a page.
1621 */
1622int
1623pmap_clear_modify(struct vm_page *pg)
1624{
1625	int rv;
1626
1627	if (pg->mdpage.pvh_attrs & PVF_MOD) {
1628		rv = 1;
1629		pmap_clearbit(pg, PVF_MOD);
1630	} else
1631		rv = 0;
1632
1633	return (rv);
1634}
1635
1636/*
1637 * pmap_clear_reference:
1638 *
1639 *	Clear the "referenced" attribute for a page.
1640 */
1641int
1642pmap_clear_reference(struct vm_page *pg)
1643{
1644	int rv;
1645
1646	if (pg->mdpage.pvh_attrs & PVF_REF) {
1647		rv = 1;
1648		pmap_clearbit(pg, PVF_REF);
1649	} else
1650		rv = 0;
1651
1652	return (rv);
1653}
1654
1655/*
1656 * pmap_is_modified:
1657 *
1658 *	Test if a page has the "modified" attribute.
1659 */
1660/* See <arm/pmap.h> */
1661
1662/*
1663 * pmap_is_referenced:
1664 *
1665 *	Test if a page has the "referenced" attribute.
1666 */
1667/* See <arm/pmap.h> */
1668
1669/*
1670 * dab_access() handles the following data aborts:
1671 *
1672 *  FAULT_ACCESS_2 - Access flag fault -- Level 2
1673 *
1674 * Set the Access Flag and mark the page as referenced.
1675 */
1676int
1677dab_access(trapframe_t *tf, u_int fsr, u_int far, struct proc *p)
1678{
1679	struct pmap *pm = p->p_vmspace->vm_map.pmap;
1680	vaddr_t va = trunc_page(far);
1681	struct l2_dtable *l2;
1682	struct l2_bucket *l2b;
1683	pt_entry_t *ptep, pte;
1684	struct pv_entry *pv;
1685	struct vm_page *pg;
1686	paddr_t pa;
1687	u_int l1idx;
1688
1689	if (!TRAP_USERMODE(tf) && far >= VM_MIN_KERNEL_ADDRESS)
1690		pm = pmap_kernel();
1691
1692	l1idx = L1_IDX(va);
1693
1694	/*
1695	 * If there is no l2_dtable for this address, then the process
1696	 * has no business accessing it.
1697	 */
1698	l2 = pm->pm_l2[L2_IDX(l1idx)];
1699	KASSERT(l2 != NULL);
1700
1701	/*
1702	 * Likewise if there is no L2 descriptor table
1703	 */
1704	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
1705	KASSERT(l2b->l2b_kva != NULL);
1706
1707	/*
1708	 * Check the PTE itself.
1709	 */
1710	ptep = &l2b->l2b_kva[l2pte_index(va)];
1711	pte = *ptep;
1712	KASSERT(pte != L2_TYPE_INV);
1713
1714	pa = l2pte_pa(pte);
1715
1716	/*
1717	 * Perform page referenced emulation.
1718	 */
1719	KASSERT((pte & L2_V7_AF) == 0);
1720
1721	/* Extract the physical address of the page */
1722	pg = PHYS_TO_VM_PAGE(pa);
1723	KASSERT(pg != NULL);
1724
1725	/* Get the current flags for this page. */
1726	pv = pmap_find_pv(pg, pm, va);
1727	KASSERT(pv != NULL);
1728
1729	pg->mdpage.pvh_attrs |= PVF_REF;
1730	pv->pv_flags |= PVF_REF;
1731	pte |= L2_V7_AF;
1732
1733	*ptep = pte;
1734	PTE_SYNC(ptep);
1735	return 0;
1736}
1737
1738/*
1739 * Routine:	pmap_proc_iflush
1740 *
1741 * Function:
1742 *	Synchronize caches corresponding to [addr, addr+len) in p.
1743 *
1744 */
1745void
1746pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1747{
1748	/* We only need to do anything if it is the current process. */
1749	if (pr == curproc->p_p)
1750		cpu_icache_sync_range(va, len);
1751}
1752
1753/*
1754 * Routine:	pmap_unwire
1755 * Function:	Clear the wired attribute for a map/virtual-address pair.
1756 *
1757 * In/out conditions:
1758 *		The mapping must already exist in the pmap.
1759 */
1760void
1761pmap_unwire(pmap_t pm, vaddr_t va)
1762{
1763	struct l2_bucket *l2b;
1764	pt_entry_t *ptep, pte;
1765	struct vm_page *pg;
1766	paddr_t pa;
1767
1768	NPDEBUG(PDB_WIRING, printf("pmap_unwire: pm %p, va 0x%08lx\n", pm, va));
1769
1770	l2b = pmap_get_l2_bucket(pm, va);
1771	KDASSERT(l2b != NULL);
1772
1773	ptep = &l2b->l2b_kva[l2pte_index(va)];
1774	pte = *ptep;
1775
1776	/* Extract the physical address of the page */
1777	pa = l2pte_pa(pte);
1778
1779	if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
1780		/* Update the wired bit in the pv entry for this page. */
1781		(void) pmap_modify_pv(pg, pm, va, PVF_WIRED, 0);
1782	}
1783}
1784
1785void
1786pmap_activate(struct proc *p)
1787{
1788	pmap_t pm;
1789	struct pcb *pcb;
1790
1791	pm = p->p_vmspace->vm_map.pmap;
1792	pcb = &p->p_addr->u_pcb;
1793
1794	pmap_set_pcb_pagedir(pm, pcb);
1795
1796	if (p == curproc) {
1797		u_int cur_ttb;
1798
1799		__asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
1800
1801		cur_ttb &= ~(L1_TABLE_SIZE - 1);
1802
1803		if (cur_ttb == (u_int)pcb->pcb_pagedir) {
1804			/*
1805			 * No need to switch address spaces.
1806			 */
1807			return;
1808		}
1809
1810		__asm volatile("cpsid if");
1811		cpu_setttb(pcb->pcb_pagedir);
1812		__asm volatile("cpsie if");
1813	}
1814}
1815
1816void
1817pmap_update(pmap_t pm)
1818{
1819	/*
1820	 * make sure TLB/cache operations have completed.
1821	 */
1822}
1823
1824/*
1825 * Retire the given physical map from service.
1826 * Should only be called if the map contains no valid mappings.
1827 */
1828void
1829pmap_destroy(pmap_t pm)
1830{
1831	u_int count;
1832
1833	/*
1834	 * Drop reference count
1835	 */
1836	count = --pm->pm_refs;
1837	if (count > 0)
1838		return;
1839
1840	/*
1841	 * reference count is zero, free pmap resources and then free pmap.
1842	 */
1843
1844	pmap_free_l1(pm);
1845
1846	/* return the pmap to the pool */
1847	pool_put(&pmap_pmap_pool, pm);
1848}
1849
1850
1851/*
1852 * void pmap_reference(pmap_t pm)
1853 *
1854 * Add a reference to the specified pmap.
1855 */
1856void
1857pmap_reference(pmap_t pm)
1858{
1859	if (pm == NULL)
1860		return;
1861
1862	pm->pm_refs++;
1863}
1864
1865/*
1866 * pmap_zero_page()
1867 *
1868 * Zero a given physical page by mapping it at a page hook point.
1869 * In doing the zero page op, the page we zero is mapped cacheable, as with
1870 * StrongARM accesses to non-cached pages are non-burst making writing
1871 * _any_ bulk data very slow.
1872 */
1873void
1874pmap_zero_page_generic(struct vm_page *pg)
1875{
1876	paddr_t phys = VM_PAGE_TO_PHYS(pg);
1877#ifdef DEBUG
1878	if (pg->mdpage.pvh_list != NULL)
1879		panic("pmap_zero_page: page has mappings");
1880#endif
1881
1882	/*
1883	 * Hook in the page, zero it, and purge the cache for that
1884	 * zeroed page. Invalidate the TLB as needed.
1885	 */
1886	*cdst_pte = L2_S_PROTO | phys | L2_V7_AF |
1887	    L2_S_PROT(PTE_KERNEL, PROT_WRITE) | pte_l2_s_cache_mode;
1888	PTE_SYNC(cdst_pte);
1889	cpu_tlb_flushD_SE(cdstp);
1890	bzero_page(cdstp);
1891}
1892
1893/*
1894 * pmap_copy_page()
1895 *
1896 * Copy one physical page into another, by mapping the pages into
1897 * hook points. The same comment regarding cachability as in
1898 * pmap_zero_page also applies here.
1899 */
1900void
1901pmap_copy_page_generic(struct vm_page *src_pg, struct vm_page *dst_pg)
1902{
1903	paddr_t src = VM_PAGE_TO_PHYS(src_pg);
1904	paddr_t dst = VM_PAGE_TO_PHYS(dst_pg);
1905#ifdef DEBUG
1906	if (dst_pg->mdpage.pvh_list != NULL)
1907		panic("pmap_copy_page: dst page has mappings");
1908#endif
1909
1910	/*
1911	 * Map the pages into the page hook points, copy them, and purge
1912	 * the cache for the appropriate page. Invalidate the TLB
1913	 * as required.
1914	 */
1915	*csrc_pte = L2_S_PROTO | src | L2_V7_AF |
1916	    L2_S_PROT(PTE_KERNEL, PROT_READ) | pte_l2_s_cache_mode;
1917	PTE_SYNC(csrc_pte);
1918	*cdst_pte = L2_S_PROTO | dst | L2_V7_AF |
1919	    L2_S_PROT(PTE_KERNEL, PROT_WRITE) | pte_l2_s_cache_mode;
1920	PTE_SYNC(cdst_pte);
1921	cpu_tlb_flushD_SE(csrcp);
1922	cpu_tlb_flushD_SE(cdstp);
1923	bcopy_page(csrcp, cdstp);
1924}
1925
1926/*
1927 * void pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1928 *
1929 * Return the start and end addresses of the kernel's virtual space.
1930 * These values are setup in pmap_bootstrap and are updated as pages
1931 * are allocated.
1932 */
1933void
1934pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1935{
1936	*start = virtual_avail;
1937	*end = virtual_end;
1938}
1939
1940/*
1941 * Helper function for pmap_grow_l2_bucket()
1942 */
1943static __inline int
1944pmap_grow_map(vaddr_t va, pt_entry_t cache_mode, paddr_t *pap)
1945{
1946	struct l2_bucket *l2b;
1947	pt_entry_t *ptep;
1948	paddr_t pa;
1949
1950	KASSERT((va & PAGE_MASK) == 0);
1951
1952	if (uvm.page_init_done == 0) {
1953		if (uvm_page_physget(&pa) == 0)
1954			return (1);
1955	} else {
1956		struct vm_page *pg;
1957		pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
1958		if (pg == NULL)
1959			return (1);
1960		pa = VM_PAGE_TO_PHYS(pg);
1961	}
1962
1963	if (pap)
1964		*pap = pa;
1965
1966	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1967	KDASSERT(l2b != NULL);
1968
1969	ptep = &l2b->l2b_kva[l2pte_index(va)];
1970	*ptep = L2_S_PROTO | pa | L2_V7_AF | cache_mode |
1971	    L2_S_PROT(PTE_KERNEL, PROT_READ | PROT_WRITE);
1972	PTE_SYNC(ptep);
1973	cpu_tlb_flushD_SE(va);
1974
1975	memset((void *)va, 0, PAGE_SIZE);
1976	return (0);
1977}
1978
1979/*
1980 * This is the same as pmap_alloc_l2_bucket(), except that it is only
1981 * used by pmap_growkernel().
1982 */
1983static __inline struct l2_bucket *
1984pmap_grow_l2_bucket(pmap_t pm, vaddr_t va)
1985{
1986	struct l2_dtable *l2;
1987	struct l2_bucket *l2b;
1988	u_short l1idx;
1989	vaddr_t nva;
1990
1991	l1idx = L1_IDX(va);
1992
1993	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
1994		/*
1995		 * No mapping at this address, as there is
1996		 * no entry in the L1 table.
1997		 * Need to allocate a new l2_dtable.
1998		 */
1999		nva = pmap_kernel_l2dtable_kva;
2000		if ((nva & PGOFSET) == 0) {
2001			/*
2002			 * Need to allocate a backing page
2003			 */
2004			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2005				return (NULL);
2006		}
2007
2008		l2 = (struct l2_dtable *)nva;
2009		nva += sizeof(struct l2_dtable);
2010
2011		if ((nva & PGOFSET) < (pmap_kernel_l2dtable_kva & PGOFSET)) {
2012			/*
2013			 * The new l2_dtable straddles a page boundary.
2014			 * Map in another page to cover it.
2015			 */
2016			if (pmap_grow_map(trunc_page(nva),
2017			    pte_l2_s_cache_mode, NULL))
2018				return (NULL);
2019		}
2020
2021		pmap_kernel_l2dtable_kva = nva;
2022
2023		/*
2024		 * Link it into the parent pmap
2025		 */
2026		pm->pm_l2[L2_IDX(l1idx)] = l2;
2027	}
2028
2029	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2030
2031	/*
2032	 * Fetch pointer to the L2 page table associated with the address.
2033	 */
2034	if (l2b->l2b_kva == NULL) {
2035		pt_entry_t *ptep;
2036
2037		/*
2038		 * No L2 page table has been allocated. Chances are, this
2039		 * is because we just allocated the l2_dtable, above.
2040		 */
2041		nva = pmap_kernel_l2ptp_kva;
2042		ptep = (pt_entry_t *)nva;
2043		if ((nva & PGOFSET) == 0) {
2044			/*
2045			 * Need to allocate a backing page
2046			 */
2047			if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
2048			    &pmap_kernel_l2ptp_phys))
2049				return (NULL);
2050			PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
2051		}
2052
2053		l2->l2_occupancy++;
2054		l2b->l2b_kva = ptep;
2055		l2b->l2b_l1idx = l1idx;
2056		l2b->l2b_phys = pmap_kernel_l2ptp_phys;
2057
2058		pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
2059		pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
2060	}
2061
2062	return (l2b);
2063}
2064
2065vaddr_t
2066pmap_growkernel(vaddr_t maxkvaddr)
2067{
2068	pmap_t kpm = pmap_kernel();
2069	struct l1_ttable *l1;
2070	struct l2_bucket *l2b;
2071	pd_entry_t *pl1pd;
2072	int s;
2073
2074	if (maxkvaddr <= pmap_curmaxkvaddr)
2075		goto out;		/* we are OK */
2076
2077	NPDEBUG(PDB_GROWKERN,
2078	    printf("pmap_growkernel: growing kernel from 0x%lx to 0x%lx\n",
2079	    pmap_curmaxkvaddr, maxkvaddr));
2080
2081	KDASSERT(maxkvaddr <= virtual_end);
2082
2083	/*
2084	 * whoops!   we need to add kernel PTPs
2085	 */
2086
2087	s = splhigh();	/* to be safe */
2088
2089	/* Map 1MB at a time */
2090	for (; pmap_curmaxkvaddr < maxkvaddr; pmap_curmaxkvaddr += L1_S_SIZE) {
2091
2092		l2b = pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
2093		KDASSERT(l2b != NULL);
2094
2095		/* Distribute new L1 entry to all other L1s */
2096		TAILQ_FOREACH(l1, &l1_list, l1_link) {
2097			pl1pd = &l1->l1_kva[L1_IDX(pmap_curmaxkvaddr)];
2098			*pl1pd = L1_C_PROTO | l2b->l2b_phys;
2099			PTE_SYNC(pl1pd);
2100		}
2101	}
2102
2103	/*
2104	 * flush out the cache, expensive but growkernel will happen so
2105	 * rarely
2106	 */
2107	cpu_dcache_wbinv_all();
2108	cpu_sdcache_wbinv_all();
2109	cpu_tlb_flushD();
2110
2111	splx(s);
2112
2113out:
2114	return (pmap_curmaxkvaddr);
2115}
2116
2117/************************ Utility routines ****************************/
2118
2119/*
2120 * vector_page_setprot:
2121 *
2122 *	Manipulate the protection of the vector page.
2123 */
2124void
2125vector_page_setprot(int prot)
2126{
2127	struct l2_bucket *l2b;
2128	pt_entry_t *ptep;
2129
2130	l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
2131	KDASSERT(l2b != NULL);
2132
2133	ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
2134
2135	*ptep = (*ptep & ~L2_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
2136	PTE_SYNC(ptep);
2137	cpu_tlb_flushD_SE(vector_page);
2138}
2139
2140/*
2141 * This is used to stuff certain critical values into the PCB where they
2142 * can be accessed quickly from cpu_switch() et al.
2143 */
2144void
2145pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
2146{
2147	KDASSERT(pm->pm_l1);
2148	pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
2149}
2150
2151/*
2152 * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
2153 * Returns 1 if the mapping exists, else 0.
2154 *
2155 * NOTE: This function is only used by a couple of arm-specific modules.
2156 * It is not safe to take any pmap locks here, since we could be right
2157 * in the middle of debugging the pmap anyway...
2158 *
2159 * It is possible for this routine to return 0 even though a valid
2160 * mapping does exist. This is because we don't lock, so the metadata
2161 * state may be inconsistent.
2162 *
2163 * NOTE: We can return a NULL *ptp in the case where the L1 pde is
2164 * a "section" mapping.
2165 */
2166int
2167pmap_get_pde_pte(pmap_t pm, vaddr_t va, pd_entry_t **pdp, pt_entry_t **ptp)
2168{
2169	struct l2_dtable *l2;
2170	pd_entry_t *pl1pd, l1pd;
2171	pt_entry_t *ptep;
2172	u_short l1idx;
2173
2174	if (pm->pm_l1 == NULL)
2175		return 0;
2176
2177	l1idx = L1_IDX(va);
2178	*pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
2179	l1pd = *pl1pd;
2180
2181	if (l1pte_section_p(l1pd)) {
2182		*ptp = NULL;
2183		return 1;
2184	}
2185
2186	l2 = pm->pm_l2[L2_IDX(l1idx)];
2187	if (l2 == NULL ||
2188	    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
2189		return 0;
2190	}
2191
2192	*ptp = &ptep[l2pte_index(va)];
2193	return 1;
2194}
2195
2196/************************ Bootstrapping routines ****************************/
2197
2198void
2199pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
2200{
2201	l1->l1_kva = l1pt;
2202
2203	/*
2204	 * Copy the kernel's L1 entries to each new L1.
2205	 */
2206	if (pmap_initialized)
2207		memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
2208
2209	if (pmap_extract(pmap_kernel(), (vaddr_t)l1pt, &l1->l1_physaddr) == 0)
2210		panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
2211
2212	TAILQ_INSERT_TAIL(&l1_list, l1, l1_link);
2213}
2214
2215/*
2216 * pmap_bootstrap() is called from the board-specific initarm() routine
2217 * once the kernel L1/L2 descriptors tables have been set up.
2218 *
2219 * This is a somewhat convoluted process since pmap bootstrap is, effectively,
2220 * spread over a number of disparate files/functions.
2221 *
2222 * We are passed the following parameters
2223 *  - kernel_l1pt
2224 *    This is a pointer to the base of the kernel's L1 translation table.
2225 *  - vstart
2226 *    1MB-aligned start of managed kernel virtual memory.
2227 *  - vend
2228 *    1MB-aligned end of managed kernel virtual memory.
2229 *
2230 * We use the first parameter to build the metadata (struct l1_ttable and
2231 * struct l2_dtable) necessary to track kernel mappings.
2232 */
2233#define	PMAP_STATIC_L2_SIZE 16
2234void
2235pmap_bootstrap(pd_entry_t *kernel_l1pt, vaddr_t vstart, vaddr_t vend)
2236{
2237	static struct l1_ttable static_l1;
2238	static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
2239	struct l1_ttable *l1 = &static_l1;
2240	struct l2_dtable *l2;
2241	struct l2_bucket *l2b;
2242	pmap_t pm = pmap_kernel();
2243	pd_entry_t pde;
2244	pt_entry_t *ptep;
2245	paddr_t pa;
2246	vsize_t size;
2247	int l1idx, l2idx, l2next = 0;
2248
2249	/*
2250	 * Initialise the kernel pmap object
2251	 */
2252	pm->pm_l1 = l1;
2253	pm->pm_refs = 1;
2254
2255	/*
2256	 * Scan the L1 translation table created by initarm() and create
2257	 * the required metadata for all valid mappings found in it.
2258	 */
2259	for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
2260		pde = kernel_l1pt[l1idx];
2261
2262		/*
2263		 * We're only interested in Coarse mappings.
2264		 * pmap_extract() can deal with section mappings without
2265		 * recourse to checking L2 metadata.
2266		 */
2267		if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
2268			continue;
2269
2270		/*
2271		 * Lookup the KVA of this L2 descriptor table
2272		 */
2273		pa = (paddr_t)(pde & L1_C_ADDR_MASK);
2274		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2275		if (ptep == NULL) {
2276			panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
2277			    (u_int)l1idx << L1_S_SHIFT, pa);
2278		}
2279
2280		/*
2281		 * Fetch the associated L2 metadata structure.
2282		 * Allocate a new one if necessary.
2283		 */
2284		if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
2285			if (l2next == PMAP_STATIC_L2_SIZE)
2286				panic("pmap_bootstrap: out of static L2s");
2287			pm->pm_l2[L2_IDX(l1idx)] = l2 = &static_l2[l2next++];
2288		}
2289
2290		/*
2291		 * One more L1 slot tracked...
2292		 */
2293		l2->l2_occupancy++;
2294
2295		/*
2296		 * Fill in the details of the L2 descriptor in the
2297		 * appropriate bucket.
2298		 */
2299		l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2300		l2b->l2b_kva = ptep;
2301		l2b->l2b_phys = pa;
2302		l2b->l2b_l1idx = l1idx;
2303
2304		/*
2305		 * Establish an initial occupancy count for this descriptor
2306		 */
2307		for (l2idx = 0;
2308		    l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
2309		    l2idx++) {
2310			if (ptep[l2idx] != L2_TYPE_INV)
2311				l2b->l2b_occupancy++;
2312		}
2313	}
2314
2315	cpu_idcache_wbinv_all();
2316	cpu_sdcache_wbinv_all();
2317	cpu_tlb_flushID();
2318
2319	/*
2320	 * now we allocate the "special" VAs which are used for tmp mappings
2321	 * by the pmap (and other modules).  we allocate the VAs by advancing
2322	 * virtual_avail (note that there are no pages mapped at these VAs).
2323	 *
2324	 * Managed KVM space start from wherever initarm() tells us.
2325	 */
2326	virtual_avail = vstart;
2327	virtual_end = vend;
2328
2329	pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
2330	pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
2331	pmap_alloc_specials(&virtual_avail, 1, &cwbp, &cwb_pte);
2332	pmap_alloc_specials(&virtual_avail, 1, (void *)&memhook, NULL);
2333	pmap_alloc_specials(&virtual_avail, round_page(MSGBUFSIZE) / PAGE_SIZE,
2334	    (void *)&msgbufaddr, NULL);
2335
2336	/*
2337	 * Allocate a range of kernel virtual address space to be used
2338	 * for L2 descriptor tables and metadata allocation in
2339	 * pmap_growkernel().
2340	 */
2341	size = ((virtual_end - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE;
2342	pmap_alloc_specials(&virtual_avail,
2343	    round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
2344	    &pmap_kernel_l2ptp_kva, NULL);
2345
2346	size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
2347	pmap_alloc_specials(&virtual_avail,
2348	    round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
2349	    &pmap_kernel_l2dtable_kva, NULL);
2350
2351	/*
2352	 * We can now initialise the first L1's metadata.
2353	 */
2354	TAILQ_INIT(&l1_list);
2355	pmap_init_l1(l1, kernel_l1pt);
2356
2357	/*
2358	 * Initialize the pmap pool.
2359	 */
2360	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
2361	    "pmappl", &pool_allocator_single);
2362	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
2363	    "pvepl", &pmap_bootstrap_pv_allocator);
2364	pool_init(&pmap_l2dtable_pool, sizeof(struct l2_dtable), 0, IPL_VM, 0,
2365	    "l2dtblpl", NULL);
2366	pool_init(&pmap_l2ptp_pool, L2_TABLE_SIZE_REAL, L2_TABLE_SIZE_REAL,
2367	    IPL_VM, 0, "l2ptppl", &pool_allocator_single);
2368
2369	cpu_dcache_wbinv_all();
2370	cpu_sdcache_wbinv_all();
2371}
2372
2373void
2374pmap_alloc_specials(vaddr_t *availp, int pages, vaddr_t *vap, pt_entry_t **ptep)
2375{
2376	vaddr_t va = *availp;
2377	struct l2_bucket *l2b;
2378
2379	if (ptep) {
2380		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2381		if (l2b == NULL)
2382			panic("pmap_alloc_specials: no l2b for 0x%lx", va);
2383
2384		if (ptep)
2385			*ptep = &l2b->l2b_kva[l2pte_index(va)];
2386	}
2387
2388	*vap = va;
2389	*availp = va + (PAGE_SIZE * pages);
2390}
2391
2392void
2393pmap_init(void)
2394{
2395	pool_setlowat(&pmap_pv_pool, (PAGE_SIZE / sizeof(struct pv_entry)) * 2);
2396
2397	pmap_initialized = 1;
2398}
2399
2400static vaddr_t last_bootstrap_page = 0;
2401static void *free_bootstrap_pages = NULL;
2402
2403void *
2404pmap_bootstrap_pv_page_alloc(struct pool *pp, int flags, int *slowdown)
2405{
2406	extern void *pool_page_alloc(struct pool *, int, int *);
2407	vaddr_t new_page;
2408	void *rv;
2409
2410	if (pmap_initialized)
2411		return (pool_page_alloc(pp, flags, slowdown));
2412	*slowdown = 0;
2413
2414	if (free_bootstrap_pages) {
2415		rv = free_bootstrap_pages;
2416		free_bootstrap_pages = *((void **)rv);
2417		return (rv);
2418	}
2419
2420	new_page = uvm_km_kmemalloc(kernel_map, NULL, PAGE_SIZE,
2421	    (flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT);
2422
2423	last_bootstrap_page = new_page;
2424	return ((void *)new_page);
2425}
2426
2427void
2428pmap_bootstrap_pv_page_free(struct pool *pp, void *v)
2429{
2430	extern void pool_page_free(struct pool *, void *);
2431
2432	if (pmap_initialized) {
2433		pool_page_free(pp, v);
2434		return;
2435	}
2436
2437	if ((vaddr_t)v < last_bootstrap_page) {
2438		*((void **)v) = free_bootstrap_pages;
2439		free_bootstrap_pages = v;
2440		return;
2441	}
2442}
2443
2444/*
2445 * pmap_postinit()
2446 *
2447 * This routine is called after the vm and kmem subsystems have been
2448 * initialised. This allows the pmap code to perform any initialisation
2449 * that can only be done once the memory allocation is in place.
2450 */
2451void
2452pmap_postinit(void)
2453{
2454	pool_setlowat(&pmap_l2ptp_pool,
2455	    (PAGE_SIZE / L2_TABLE_SIZE_REAL) * 4);
2456	pool_setlowat(&pmap_l2dtable_pool,
2457	    (PAGE_SIZE / sizeof(struct l2_dtable)) * 2);
2458}
2459
2460/*
2461 * Note that the following routines are used by board-specific initialisation
2462 * code to configure the initial kernel page tables.
2463 *
2464 * If ARM32_NEW_VM_LAYOUT is *not* defined, they operate on the assumption that
2465 * L2 page-table pages are 4KB in size and use 4 L1 slots. This mimics the
2466 * behaviour of the old pmap, and provides an easy migration path for
2467 * initial bring-up of the new pmap on existing ports. Fortunately,
2468 * pmap_bootstrap() compensates for this hackery. This is only a stop-gap and
2469 * will be deprecated.
2470 *
2471 * If ARM32_NEW_VM_LAYOUT *is* defined, these functions deal with 1KB L2 page
2472 * tables.
2473 */
2474
2475/*
2476 * This list exists for the benefit of pmap_map_chunk().  It keeps track
2477 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
2478 * find them as necessary.
2479 *
2480 * Note that the data on this list MUST remain valid after initarm() returns,
2481 * as pmap_bootstrap() uses it to construct L2 table metadata.
2482 */
2483SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
2484
2485vaddr_t
2486kernel_pt_lookup(paddr_t pa)
2487{
2488	pv_addr_t *pv;
2489
2490	SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
2491#ifndef ARM32_NEW_VM_LAYOUT
2492		if (pv->pv_pa == (pa & ~PGOFSET))
2493			return (pv->pv_va | (pa & PGOFSET));
2494#else
2495		if (pv->pv_pa == pa)
2496			return (pv->pv_va);
2497#endif
2498	}
2499	return (0);
2500}
2501
2502/*
2503 * pmap_map_section:
2504 *
2505 *	Create a single section mapping.
2506 */
2507void
2508pmap_map_section(vaddr_t l1pt, vaddr_t va, paddr_t pa, int prot, int cache)
2509{
2510	pd_entry_t *pde = (pd_entry_t *) l1pt;
2511	pd_entry_t fl;
2512
2513	switch (cache) {
2514	case PTE_NOCACHE:
2515	default:
2516		fl = 0;
2517		break;
2518
2519	case PTE_CACHE:
2520		fl = pte_l1_s_cache_mode;
2521		break;
2522
2523	case PTE_PAGETABLE:
2524		fl = pte_l1_s_cache_mode_pt;
2525		break;
2526	}
2527
2528	pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | L1_S_V7_AF |
2529	    L1_S_PROT(PTE_KERNEL, prot) | fl;
2530	PTE_SYNC(&pde[va >> L1_S_SHIFT]);
2531}
2532
2533/*
2534 * pmap_map_entry:
2535 *
2536 *	Create a single page mapping.
2537 */
2538void
2539pmap_map_entry(vaddr_t l1pt, vaddr_t va, paddr_t pa, int prot, int cache)
2540{
2541	pd_entry_t *pde = (pd_entry_t *) l1pt;
2542	pt_entry_t fl;
2543	pt_entry_t *pte;
2544
2545	switch (cache) {
2546	case PTE_NOCACHE:
2547	default:
2548		fl = 0;
2549		break;
2550
2551	case PTE_CACHE:
2552		fl = pte_l2_s_cache_mode;
2553		break;
2554
2555	case PTE_PAGETABLE:
2556		fl = pte_l2_s_cache_mode_pt;
2557		break;
2558	}
2559
2560	if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
2561		panic("pmap_map_entry: no L2 table for VA 0x%08lx", va);
2562
2563#ifndef ARM32_NEW_VM_LAYOUT
2564	pte = (pt_entry_t *)
2565	    kernel_pt_lookup(pde[va >> L1_S_SHIFT] & L2_S_FRAME);
2566#else
2567	pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
2568#endif
2569	if (pte == NULL)
2570		panic("pmap_map_entry: can't find L2 table for VA 0x%08lx", va);
2571
2572#ifndef ARM32_NEW_VM_LAYOUT
2573	pte[(va >> PGSHIFT) & 0x3ff] = L2_S_PROTO | pa | L2_V7_AF |
2574	    L2_S_PROT(PTE_KERNEL, prot) | fl;
2575	PTE_SYNC(&pte[(va >> PGSHIFT) & 0x3ff]);
2576#else
2577	pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_V7_AF |
2578	    L2_S_PROT(PTE_KERNEL, prot) | fl;
2579	PTE_SYNC(&pte[l2pte_index(va)]);
2580#endif
2581}
2582
2583/*
2584 * pmap_link_l2pt:
2585 *
2586 *	Link the L2 page table specified by "l2pv" into the L1
2587 *	page table at the slot for "va".
2588 */
2589void
2590pmap_link_l2pt(vaddr_t l1pt, vaddr_t va, pv_addr_t *l2pv)
2591{
2592	pd_entry_t *pde = (pd_entry_t *) l1pt;
2593	u_int slot = va >> L1_S_SHIFT;
2594
2595	pde[slot + 0] = L1_C_PROTO | (l2pv->pv_pa + 0x000);
2596#ifdef ARM32_NEW_VM_LAYOUT
2597	PTE_SYNC(&pde[slot]);
2598#else
2599	pde[slot + 1] = L1_C_PROTO | (l2pv->pv_pa + 0x400);
2600	pde[slot + 2] = L1_C_PROTO | (l2pv->pv_pa + 0x800);
2601	pde[slot + 3] = L1_C_PROTO | (l2pv->pv_pa + 0xc00);
2602	PTE_SYNC_RANGE(&pde[slot + 0], 4);
2603#endif
2604
2605	SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
2606}
2607
2608/*
2609 * pmap_map_chunk:
2610 *
2611 *	Map a chunk of memory using the most efficient mappings
2612 *	possible (section, large page, small page) into the
2613 *	provided L1 and L2 tables at the specified virtual address.
2614 */
2615vsize_t
2616pmap_map_chunk(vaddr_t l1pt, vaddr_t va, paddr_t pa, vsize_t size,
2617    int prot, int cache)
2618{
2619	pd_entry_t *pde = (pd_entry_t *) l1pt;
2620	pt_entry_t *pte, f1, f2s, f2l;
2621	vsize_t resid;
2622	int i;
2623
2624	resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
2625
2626	if (l1pt == 0)
2627		panic("pmap_map_chunk: no L1 table provided");
2628
2629#ifdef VERBOSE_INIT_ARM
2630	printf("pmap_map_chunk: pa=0x%lx va=0x%lx size=0x%lx resid=0x%lx "
2631	    "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
2632#endif
2633
2634	switch (cache) {
2635	case PTE_NOCACHE:
2636	default:
2637		f1 = 0;
2638		f2l = 0;
2639		f2s = 0;
2640		break;
2641
2642	case PTE_CACHE:
2643		f1 = pte_l1_s_cache_mode;
2644		f2l = pte_l2_l_cache_mode;
2645		f2s = pte_l2_s_cache_mode;
2646		break;
2647
2648	case PTE_PAGETABLE:
2649		f1 = pte_l1_s_cache_mode_pt;
2650		f2l = pte_l2_l_cache_mode_pt;
2651		f2s = pte_l2_s_cache_mode_pt;
2652		break;
2653	}
2654
2655	size = resid;
2656
2657	while (resid > 0) {
2658		/* See if we can use a section mapping. */
2659		if (L1_S_MAPPABLE_P(va, pa, resid)) {
2660#ifdef VERBOSE_INIT_ARM
2661			printf("S");
2662#endif
2663			pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
2664			    L1_S_V7_AF | L1_S_PROT(PTE_KERNEL, prot) | f1;
2665			PTE_SYNC(&pde[va >> L1_S_SHIFT]);
2666			va += L1_S_SIZE;
2667			pa += L1_S_SIZE;
2668			resid -= L1_S_SIZE;
2669			continue;
2670		}
2671
2672		/*
2673		 * Ok, we're going to use an L2 table.  Make sure
2674		 * one is actually in the corresponding L1 slot
2675		 * for the current VA.
2676		 */
2677		if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
2678			panic("pmap_map_chunk: no L2 table for VA 0x%08lx", va);
2679
2680#ifndef ARM32_NEW_VM_LAYOUT
2681		pte = (pt_entry_t *)
2682		    kernel_pt_lookup(pde[va >> L1_S_SHIFT] & L2_S_FRAME);
2683#else
2684		pte = (pt_entry_t *) kernel_pt_lookup(
2685		    pde[L1_IDX(va)] & L1_C_ADDR_MASK);
2686#endif
2687		if (pte == NULL)
2688			panic("pmap_map_chunk: can't find L2 table for VA"
2689			    "0x%08lx", va);
2690
2691		/* See if we can use a L2 large page mapping. */
2692		if (L2_L_MAPPABLE_P(va, pa, resid)) {
2693#ifdef VERBOSE_INIT_ARM
2694			printf("L");
2695#endif
2696			for (i = 0; i < 16; i++) {
2697#ifndef ARM32_NEW_VM_LAYOUT
2698				pte[((va >> PGSHIFT) & 0x3f0) + i] =
2699				    L2_L_PROTO | pa | L2_V7_AF |
2700				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
2701				PTE_SYNC(&pte[((va >> PGSHIFT) & 0x3f0) + i]);
2702#else
2703				pte[l2pte_index(va) + i] =
2704				    L2_L_PROTO | pa | L2_V7_AF |
2705				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
2706				PTE_SYNC(&pte[l2pte_index(va) + i]);
2707#endif
2708			}
2709			va += L2_L_SIZE;
2710			pa += L2_L_SIZE;
2711			resid -= L2_L_SIZE;
2712			continue;
2713		}
2714
2715		/* Use a small page mapping. */
2716#ifdef VERBOSE_INIT_ARM
2717		printf("P");
2718#endif
2719#ifndef ARM32_NEW_VM_LAYOUT
2720		pte[(va >> PGSHIFT) & 0x3ff] = L2_S_PROTO | pa | L2_V7_AF |
2721		    L2_S_PROT(PTE_KERNEL, prot) | f2s;
2722		PTE_SYNC(&pte[(va >> PGSHIFT) & 0x3ff]);
2723#else
2724		pte[l2pte_index(va)] = L2_S_PROTO | pa | L2_V7_AF |
2725		    L2_S_PROT(PTE_KERNEL, prot) | f2s;
2726		PTE_SYNC(&pte[l2pte_index(va)]);
2727#endif
2728		va += PAGE_SIZE;
2729		pa += PAGE_SIZE;
2730		resid -= PAGE_SIZE;
2731	}
2732#ifdef VERBOSE_INIT_ARM
2733	printf("\n");
2734#endif
2735	return (size);
2736}
2737
2738/********************** PTE initialization routines **************************/
2739
2740/*
2741 * These routines are called when the CPU type is identified to set up
2742 * the PTE prototypes, cache modes, etc.
2743 *
2744 * The variables are always here, just in case LKMs need to reference
2745 * them (though, they shouldn't).
2746 */
2747
2748pt_entry_t	pte_l1_s_cache_mode;
2749pt_entry_t	pte_l1_s_cache_mode_pt;
2750pt_entry_t	pte_l1_s_cache_mask;
2751
2752pt_entry_t	pte_l2_l_cache_mode;
2753pt_entry_t	pte_l2_l_cache_mode_pt;
2754pt_entry_t	pte_l2_l_cache_mask;
2755
2756pt_entry_t	pte_l2_s_cache_mode;
2757pt_entry_t	pte_l2_s_cache_mode_pt;
2758pt_entry_t	pte_l2_s_cache_mask;
2759
2760pt_entry_t	pte_l1_s_coherent;
2761pt_entry_t	pte_l2_l_coherent;
2762pt_entry_t	pte_l2_s_coherent;
2763
2764pt_entry_t	pte_l1_s_prot_ur;
2765pt_entry_t	pte_l1_s_prot_uw;
2766pt_entry_t	pte_l1_s_prot_kr;
2767pt_entry_t	pte_l1_s_prot_kw;
2768pt_entry_t	pte_l1_s_prot_mask;
2769
2770pt_entry_t	pte_l2_l_prot_ur;
2771pt_entry_t	pte_l2_l_prot_uw;
2772pt_entry_t	pte_l2_l_prot_kr;
2773pt_entry_t	pte_l2_l_prot_kw;
2774pt_entry_t	pte_l2_l_prot_mask;
2775
2776pt_entry_t	pte_l2_s_prot_ur;
2777pt_entry_t	pte_l2_s_prot_uw;
2778pt_entry_t	pte_l2_s_prot_kr;
2779pt_entry_t	pte_l2_s_prot_kw;
2780pt_entry_t	pte_l2_s_prot_mask;
2781
2782pt_entry_t	pte_l1_s_proto;
2783pt_entry_t	pte_l1_c_proto;
2784pt_entry_t	pte_l2_s_proto;
2785
2786void		(*pmap_copy_page_func)(struct vm_page *, struct vm_page *);
2787void		(*pmap_zero_page_func)(struct vm_page *);
2788
2789void
2790pmap_pte_init_armv7(void)
2791{
2792	uint32_t id_mmfr0, id_mmfr3;
2793
2794	/*
2795	 * XXX We want to use proper TEX settings eventually.
2796	 */
2797
2798	/* write-allocate should be tested */
2799	pte_l1_s_cache_mode = L1_S_C|L1_S_B;
2800	pte_l2_l_cache_mode = L2_C|L2_B;
2801	pte_l2_s_cache_mode = L2_C|L2_B;
2802
2803	pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
2804	pte_l2_l_cache_mode_pt = L2_B|L2_C;
2805	pte_l2_s_cache_mode_pt = L2_B|L2_C;
2806	pmap_needs_pte_sync = 1;
2807
2808	pte_l1_s_cache_mask = L1_S_CACHE_MASK_v7;
2809	pte_l2_l_cache_mask = L2_L_CACHE_MASK_v7;
2810	pte_l2_s_cache_mask = L2_S_CACHE_MASK_v7;
2811
2812	pte_l1_s_coherent = L1_S_COHERENT_v7;
2813	pte_l2_l_coherent = L2_L_COHERENT_v7;
2814	pte_l2_s_coherent = L2_S_COHERENT_v7;
2815
2816	pte_l1_s_prot_ur = L1_S_PROT_UR_v7;
2817	pte_l1_s_prot_uw = L1_S_PROT_UW_v7;
2818	pte_l1_s_prot_kr = L1_S_PROT_KR_v7;
2819	pte_l1_s_prot_kw = L1_S_PROT_KW_v7;
2820	pte_l1_s_prot_mask = L1_S_PROT_MASK_v7;
2821
2822	pte_l2_l_prot_ur = L2_L_PROT_UR_v7;
2823	pte_l2_l_prot_uw = L2_L_PROT_UW_v7;
2824	pte_l2_l_prot_kr = L2_L_PROT_KR_v7;
2825	pte_l2_l_prot_kw = L2_L_PROT_KW_v7;
2826	pte_l2_l_prot_mask = L2_L_PROT_MASK_v7;
2827
2828	pte_l2_s_prot_ur = L2_S_PROT_UR_v7;
2829	pte_l2_s_prot_uw = L2_S_PROT_UW_v7;
2830	pte_l2_s_prot_kr = L2_S_PROT_KR_v7;
2831	pte_l2_s_prot_kw = L2_S_PROT_KW_v7;
2832	pte_l2_s_prot_mask = L2_S_PROT_MASK_v7;
2833
2834	pte_l1_s_proto = L1_S_PROTO_v7;
2835	pte_l1_c_proto = L1_C_PROTO_v7;
2836	pte_l2_s_proto = L2_S_PROTO_v7;
2837
2838	pmap_copy_page_func = pmap_copy_page_generic;
2839	pmap_zero_page_func = pmap_zero_page_generic;
2840
2841	/* Check if the PXN bit is supported. */
2842	__asm volatile("mrc p15, 0, %0, c0, c1, 4" : "=r"(id_mmfr0));
2843	if ((id_mmfr0 & ID_MMFR0_VMSA_MASK) >= VMSA_V7_PXN)
2844		l1_c_pxn = L1_C_V7_PXN;
2845
2846	/* Check for coherent walk. */
2847	__asm volatile("mrc p15, 0, %0, c0, c1, 7" : "=r"(id_mmfr3));
2848	if ((id_mmfr3 & 0x00f00000) == 0x00100000)
2849		pmap_needs_pte_sync = 0;
2850}
2851