pmap-v4.c revision 175397
1/* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */
2/*-
3 * Copyright 2004 Olivier Houchard.
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*-
39 * Copyright (c) 2002-2003 Wasabi Systems, Inc.
40 * Copyright (c) 2001 Richard Earnshaw
41 * Copyright (c) 2001-2002 Christopher Gilbert
42 * All rights reserved.
43 *
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. The name of the company nor the name of the author may be used to
50 *    endorse or promote products derived from this software without specific
51 *    prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
54 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
57 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
58 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
59 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65/*-
66 * Copyright (c) 1999 The NetBSD Foundation, Inc.
67 * All rights reserved.
68 *
69 * This code is derived from software contributed to The NetBSD Foundation
70 * by Charles M. Hannum.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 *    notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 *    notice, this list of conditions and the following disclaimer in the
79 *    documentation and/or other materials provided with the distribution.
80 * 3. All advertising materials mentioning features or use of this software
81 *    must display the following acknowledgement:
82 *        This product includes software developed by the NetBSD
83 *        Foundation, Inc. and its contributors.
84 * 4. Neither the name of The NetBSD Foundation nor the names of its
85 *    contributors may be used to endorse or promote products derived
86 *    from this software without specific prior written permission.
87 *
88 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
89 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
90 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
91 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
92 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
93 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
94 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
95 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
96 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
97 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
98 * POSSIBILITY OF SUCH DAMAGE.
99 */
100
101/*-
102 * Copyright (c) 1994-1998 Mark Brinicombe.
103 * Copyright (c) 1994 Brini.
104 * All rights reserved.
105 *
106 * This code is derived from software written for Brini by Mark Brinicombe
107 *
108 * Redistribution and use in source and binary forms, with or without
109 * modification, are permitted provided that the following conditions
110 * are met:
111 * 1. Redistributions of source code must retain the above copyright
112 *    notice, this list of conditions and the following disclaimer.
113 * 2. Redistributions in binary form must reproduce the above copyright
114 *    notice, this list of conditions and the following disclaimer in the
115 *    documentation and/or other materials provided with the distribution.
116 * 3. All advertising materials mentioning features or use of this software
117 *    must display the following acknowledgement:
118 *      This product includes software developed by Mark Brinicombe.
119 * 4. The name of the author may not be used to endorse or promote products
120 *    derived from this software without specific prior written permission.
121 *
122 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
123 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
124 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
125 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
126 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
127 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
128 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
129 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
130 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
131 *
132 * RiscBSD kernel project
133 *
134 * pmap.c
135 *
136 * Machine dependant vm stuff
137 *
138 * Created      : 20/09/94
139 */
140
141/*
142 * Special compilation symbols
143 * PMAP_DEBUG           - Build in pmap_debug_level code
144 */
145/* Include header files */
146
147#include "opt_vm.h"
148
149#include <sys/cdefs.h>
150__FBSDID("$FreeBSD: head/sys/arm/arm/pmap.c 175397 2008-01-17 12:41:59Z cognet $");
151#include <sys/param.h>
152#include <sys/systm.h>
153#include <sys/kernel.h>
154#include <sys/proc.h>
155#include <sys/malloc.h>
156#include <sys/msgbuf.h>
157#include <sys/vmmeter.h>
158#include <sys/mman.h>
159#include <sys/smp.h>
160#include <sys/sched.h>
161
162#include <vm/vm.h>
163#include <vm/uma.h>
164#include <vm/pmap.h>
165#include <vm/vm_kern.h>
166#include <vm/vm_object.h>
167#include <vm/vm_map.h>
168#include <vm/vm_page.h>
169#include <vm/vm_pageout.h>
170#include <vm/vm_extern.h>
171#include <sys/lock.h>
172#include <sys/mutex.h>
173#include <machine/md_var.h>
174#include <machine/vmparam.h>
175#include <machine/cpu.h>
176#include <machine/cpufunc.h>
177#include <machine/pcb.h>
178
179#ifdef PMAP_DEBUG
180#define PDEBUG(_lev_,_stat_) \
181        if (pmap_debug_level >= (_lev_)) \
182                ((_stat_))
183#define dprintf printf
184
185int pmap_debug_level = 0;
186#define PMAP_INLINE
187#else   /* PMAP_DEBUG */
188#define PDEBUG(_lev_,_stat_) /* Nothing */
189#define dprintf(x, arg...)
190#define PMAP_INLINE __inline
191#endif  /* PMAP_DEBUG */
192
193extern struct pv_addr systempage;
194/*
195 * Internal function prototypes
196 */
197static void pmap_free_pv_entry (pv_entry_t);
198static pv_entry_t pmap_get_pv_entry(void);
199
200static void		pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t,
201    vm_prot_t, boolean_t, int);
202static void		pmap_vac_me_harder(struct vm_page *, pmap_t,
203    vm_offset_t);
204static void		pmap_vac_me_kpmap(struct vm_page *, pmap_t,
205    vm_offset_t);
206static void		pmap_vac_me_user(struct vm_page *, pmap_t, vm_offset_t);
207static void		pmap_alloc_l1(pmap_t);
208static void		pmap_free_l1(pmap_t);
209static void		pmap_use_l1(pmap_t);
210
211static int		pmap_clearbit(struct vm_page *, u_int);
212
213static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t);
214static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t);
215static void		pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
216static vm_offset_t	kernel_pt_lookup(vm_paddr_t);
217
218static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
219
220vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
221vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
222vm_offset_t pmap_curmaxkvaddr;
223vm_paddr_t kernel_l1pa;
224
225extern void *end;
226vm_offset_t kernel_vm_end = 0;
227
228struct pmap kernel_pmap_store;
229pmap_t kernel_pmap;
230
231static pt_entry_t *csrc_pte, *cdst_pte;
232static vm_offset_t csrcp, cdstp;
233static struct mtx cmtx;
234
235static void		pmap_init_l1(struct l1_ttable *, pd_entry_t *);
236/*
237 * These routines are called when the CPU type is identified to set up
238 * the PTE prototypes, cache modes, etc.
239 *
240 * The variables are always here, just in case LKMs need to reference
241 * them (though, they shouldn't).
242 */
243
244pt_entry_t	pte_l1_s_cache_mode;
245pt_entry_t	pte_l1_s_cache_mode_pt;
246pt_entry_t	pte_l1_s_cache_mask;
247
248pt_entry_t	pte_l2_l_cache_mode;
249pt_entry_t	pte_l2_l_cache_mode_pt;
250pt_entry_t	pte_l2_l_cache_mask;
251
252pt_entry_t	pte_l2_s_cache_mode;
253pt_entry_t	pte_l2_s_cache_mode_pt;
254pt_entry_t	pte_l2_s_cache_mask;
255
256pt_entry_t	pte_l2_s_prot_u;
257pt_entry_t	pte_l2_s_prot_w;
258pt_entry_t	pte_l2_s_prot_mask;
259
260pt_entry_t	pte_l1_s_proto;
261pt_entry_t	pte_l1_c_proto;
262pt_entry_t	pte_l2_s_proto;
263
264void		(*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
265void		(*pmap_zero_page_func)(vm_paddr_t, int, int);
266/*
267 * Which pmap is currently 'live' in the cache
268 *
269 * XXXSCW: Fix for SMP ...
270 */
271union pmap_cache_state *pmap_cache_state;
272
273struct msgbuf *msgbufp = 0;
274
275extern void bcopy_page(vm_offset_t, vm_offset_t);
276extern void bzero_page(vm_offset_t);
277
278extern vm_offset_t alloc_firstaddr;
279
280char *_tmppt;
281
282/*
283 * Metadata for L1 translation tables.
284 */
285struct l1_ttable {
286	/* Entry on the L1 Table list */
287	SLIST_ENTRY(l1_ttable) l1_link;
288
289	/* Entry on the L1 Least Recently Used list */
290	TAILQ_ENTRY(l1_ttable) l1_lru;
291
292	/* Track how many domains are allocated from this L1 */
293	volatile u_int l1_domain_use_count;
294
295	/*
296	 * A free-list of domain numbers for this L1.
297	 * We avoid using ffs() and a bitmap to track domains since ffs()
298	 * is slow on ARM.
299	 */
300	u_int8_t l1_domain_first;
301	u_int8_t l1_domain_free[PMAP_DOMAINS];
302
303	/* Physical address of this L1 page table */
304	vm_paddr_t l1_physaddr;
305
306	/* KVA of this L1 page table */
307	pd_entry_t *l1_kva;
308};
309
310/*
311 * Convert a virtual address into its L1 table index. That is, the
312 * index used to locate the L2 descriptor table pointer in an L1 table.
313 * This is basically used to index l1->l1_kva[].
314 *
315 * Each L2 descriptor table represents 1MB of VA space.
316 */
317#define	L1_IDX(va)		(((vm_offset_t)(va)) >> L1_S_SHIFT)
318
319/*
320 * L1 Page Tables are tracked using a Least Recently Used list.
321 *  - New L1s are allocated from the HEAD.
322 *  - Freed L1s are added to the TAIl.
323 *  - Recently accessed L1s (where an 'access' is some change to one of
324 *    the userland pmaps which owns this L1) are moved to the TAIL.
325 */
326static TAILQ_HEAD(, l1_ttable) l1_lru_list;
327/*
328 * A list of all L1 tables
329 */
330static SLIST_HEAD(, l1_ttable) l1_list;
331static struct mtx l1_lru_lock;
332
333/*
334 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
335 *
336 * This is normally 16MB worth L2 page descriptors for any given pmap.
337 * Reference counts are maintained for L2 descriptors so they can be
338 * freed when empty.
339 */
340struct l2_dtable {
341	/* The number of L2 page descriptors allocated to this l2_dtable */
342	u_int l2_occupancy;
343
344	/* List of L2 page descriptors */
345	struct l2_bucket {
346		pt_entry_t *l2b_kva;	/* KVA of L2 Descriptor Table */
347		vm_paddr_t l2b_phys;	/* Physical address of same */
348		u_short l2b_l1idx;	/* This L2 table's L1 index */
349		u_short l2b_occupancy;	/* How many active descriptors */
350	} l2_bucket[L2_BUCKET_SIZE];
351};
352
353/* pmap_kenter_internal flags */
354#define KENTER_CACHE	0x1
355#define KENTER_USER	0x2
356
357/*
358 * Given an L1 table index, calculate the corresponding l2_dtable index
359 * and bucket index within the l2_dtable.
360 */
361#define	L2_IDX(l1idx)		(((l1idx) >> L2_BUCKET_LOG2) & \
362				 (L2_SIZE - 1))
363#define	L2_BUCKET(l1idx)	((l1idx) & (L2_BUCKET_SIZE - 1))
364
365/*
366 * Given a virtual address, this macro returns the
367 * virtual address required to drop into the next L2 bucket.
368 */
369#define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
370
371/*
372 * L2 allocation.
373 */
374#define	pmap_alloc_l2_dtable()		\
375		(void*)uma_zalloc(l2table_zone, M_NOWAIT|M_USE_RESERVE)
376#define	pmap_free_l2_dtable(l2)		\
377		uma_zfree(l2table_zone, l2)
378
379/*
380 * We try to map the page tables write-through, if possible.  However, not
381 * all CPUs have a write-through cache mode, so on those we have to sync
382 * the cache when we frob page tables.
383 *
384 * We try to evaluate this at compile time, if possible.  However, it's
385 * not always possible to do that, hence this run-time var.
386 */
387int	pmap_needs_pte_sync;
388
389/*
390 * Macro to determine if a mapping might be resident in the
391 * instruction cache and/or TLB
392 */
393#define	PV_BEEN_EXECD(f)  (((f) & (PVF_REF | PVF_EXEC)) == (PVF_REF | PVF_EXEC))
394
395/*
396 * Macro to determine if a mapping might be resident in the
397 * data cache and/or TLB
398 */
399#define	PV_BEEN_REFD(f)   (((f) & PVF_REF) != 0)
400
401#ifndef PMAP_SHPGPERPROC
402#define PMAP_SHPGPERPROC 200
403#endif
404
405#define pmap_is_current(pm)	((pm) == pmap_kernel() || \
406            curproc->p_vmspace->vm_map.pmap == (pm))
407static uma_zone_t pvzone;
408uma_zone_t l2zone;
409static uma_zone_t l2table_zone;
410static vm_offset_t pmap_kernel_l2dtable_kva;
411static vm_offset_t pmap_kernel_l2ptp_kva;
412static vm_paddr_t pmap_kernel_l2ptp_phys;
413static struct vm_object pvzone_obj;
414static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
415
416/*
417 * This list exists for the benefit of pmap_map_chunk().  It keeps track
418 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
419 * find them as necessary.
420 *
421 * Note that the data on this list MUST remain valid after initarm() returns,
422 * as pmap_bootstrap() uses it to contruct L2 table metadata.
423 */
424SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
425
426static void
427pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
428{
429	int i;
430
431	l1->l1_kva = l1pt;
432	l1->l1_domain_use_count = 0;
433	l1->l1_domain_first = 0;
434
435	for (i = 0; i < PMAP_DOMAINS; i++)
436		l1->l1_domain_free[i] = i + 1;
437
438	/*
439	 * Copy the kernel's L1 entries to each new L1.
440	 */
441	if (l1pt != pmap_kernel()->pm_l1->l1_kva)
442		memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
443
444	if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0)
445		panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
446	SLIST_INSERT_HEAD(&l1_list, l1, l1_link);
447	TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
448}
449
450static vm_offset_t
451kernel_pt_lookup(vm_paddr_t pa)
452{
453	struct pv_addr *pv;
454
455	SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
456		if (pv->pv_pa == pa)
457			return (pv->pv_va);
458	}
459	return (0);
460}
461
462#if (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0
463void
464pmap_pte_init_generic(void)
465{
466
467	pte_l1_s_cache_mode = L1_S_B|L1_S_C;
468	pte_l1_s_cache_mask = L1_S_CACHE_MASK_generic;
469
470	pte_l2_l_cache_mode = L2_B|L2_C;
471	pte_l2_l_cache_mask = L2_L_CACHE_MASK_generic;
472
473	pte_l2_s_cache_mode = L2_B|L2_C;
474	pte_l2_s_cache_mask = L2_S_CACHE_MASK_generic;
475
476	/*
477	 * If we have a write-through cache, set B and C.  If
478	 * we have a write-back cache, then we assume setting
479	 * only C will make those pages write-through.
480	 */
481	if (cpufuncs.cf_dcache_wb_range == (void *) cpufunc_nullop) {
482		pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
483		pte_l2_l_cache_mode_pt = L2_B|L2_C;
484		pte_l2_s_cache_mode_pt = L2_B|L2_C;
485	} else {
486		pte_l1_s_cache_mode_pt = L1_S_C;
487		pte_l2_l_cache_mode_pt = L2_C;
488		pte_l2_s_cache_mode_pt = L2_C;
489	}
490
491	pte_l2_s_prot_u = L2_S_PROT_U_generic;
492	pte_l2_s_prot_w = L2_S_PROT_W_generic;
493	pte_l2_s_prot_mask = L2_S_PROT_MASK_generic;
494
495	pte_l1_s_proto = L1_S_PROTO_generic;
496	pte_l1_c_proto = L1_C_PROTO_generic;
497	pte_l2_s_proto = L2_S_PROTO_generic;
498
499	pmap_copy_page_func = pmap_copy_page_generic;
500	pmap_zero_page_func = pmap_zero_page_generic;
501}
502
503#if defined(CPU_ARM8)
504void
505pmap_pte_init_arm8(void)
506{
507
508	/*
509	 * ARM8 is compatible with generic, but we need to use
510	 * the page tables uncached.
511	 */
512	pmap_pte_init_generic();
513
514	pte_l1_s_cache_mode_pt = 0;
515	pte_l2_l_cache_mode_pt = 0;
516	pte_l2_s_cache_mode_pt = 0;
517}
518#endif /* CPU_ARM8 */
519
520#if defined(CPU_ARM9) && defined(ARM9_CACHE_WRITE_THROUGH)
521void
522pmap_pte_init_arm9(void)
523{
524
525	/*
526	 * ARM9 is compatible with generic, but we want to use
527	 * write-through caching for now.
528	 */
529	pmap_pte_init_generic();
530
531	pte_l1_s_cache_mode = L1_S_C;
532	pte_l2_l_cache_mode = L2_C;
533	pte_l2_s_cache_mode = L2_C;
534
535	pte_l1_s_cache_mode_pt = L1_S_C;
536	pte_l2_l_cache_mode_pt = L2_C;
537	pte_l2_s_cache_mode_pt = L2_C;
538}
539#endif /* CPU_ARM9 */
540#endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
541
542#if defined(CPU_ARM10)
543void
544pmap_pte_init_arm10(void)
545{
546
547	/*
548	 * ARM10 is compatible with generic, but we want to use
549	 * write-through caching for now.
550	 */
551	pmap_pte_init_generic();
552
553	pte_l1_s_cache_mode = L1_S_B | L1_S_C;
554	pte_l2_l_cache_mode = L2_B | L2_C;
555	pte_l2_s_cache_mode = L2_B | L2_C;
556
557	pte_l1_s_cache_mode_pt = L1_S_C;
558	pte_l2_l_cache_mode_pt = L2_C;
559	pte_l2_s_cache_mode_pt = L2_C;
560
561}
562#endif /* CPU_ARM10 */
563
564#if  ARM_MMU_SA1 == 1
565void
566pmap_pte_init_sa1(void)
567{
568
569	/*
570	 * The StrongARM SA-1 cache does not have a write-through
571	 * mode.  So, do the generic initialization, then reset
572	 * the page table cache mode to B=1,C=1, and note that
573	 * the PTEs need to be sync'd.
574	 */
575	pmap_pte_init_generic();
576
577	pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
578	pte_l2_l_cache_mode_pt = L2_B|L2_C;
579	pte_l2_s_cache_mode_pt = L2_B|L2_C;
580
581	pmap_needs_pte_sync = 1;
582}
583#endif /* ARM_MMU_SA1 == 1*/
584
585#if ARM_MMU_XSCALE == 1
586#if (ARM_NMMUS > 1) || defined (CPU_XSCALE_CORE3)
587static u_int xscale_use_minidata;
588#endif
589
590void
591pmap_pte_init_xscale(void)
592{
593	uint32_t auxctl;
594	int write_through = 0;
595
596	pte_l1_s_cache_mode = L1_S_B|L1_S_C|L1_S_XSCALE_P;
597	pte_l1_s_cache_mask = L1_S_CACHE_MASK_xscale;
598
599	pte_l2_l_cache_mode = L2_B|L2_C;
600	pte_l2_l_cache_mask = L2_L_CACHE_MASK_xscale;
601
602	pte_l2_s_cache_mode = L2_B|L2_C;
603	pte_l2_s_cache_mask = L2_S_CACHE_MASK_xscale;
604
605	pte_l1_s_cache_mode_pt = L1_S_C;
606	pte_l2_l_cache_mode_pt = L2_C;
607	pte_l2_s_cache_mode_pt = L2_C;
608#ifdef XSCALE_CACHE_READ_WRITE_ALLOCATE
609	/*
610	 * The XScale core has an enhanced mode where writes that
611	 * miss the cache cause a cache line to be allocated.  This
612	 * is significantly faster than the traditional, write-through
613	 * behavior of this case.
614	 */
615	pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_X);
616	pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_X);
617	pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_X);
618#endif /* XSCALE_CACHE_READ_WRITE_ALLOCATE */
619#ifdef XSCALE_CACHE_WRITE_THROUGH
620	/*
621	 * Some versions of the XScale core have various bugs in
622	 * their cache units, the work-around for which is to run
623	 * the cache in write-through mode.  Unfortunately, this
624	 * has a major (negative) impact on performance.  So, we
625	 * go ahead and run fast-and-loose, in the hopes that we
626	 * don't line up the planets in a way that will trip the
627	 * bugs.
628	 *
629	 * However, we give you the option to be slow-but-correct.
630	 */
631	write_through = 1;
632#elif defined(XSCALE_CACHE_WRITE_BACK)
633	/* force write back cache mode */
634	write_through = 0;
635#elif defined(CPU_XSCALE_PXA2X0)
636	/*
637	 * Intel PXA2[15]0 processors are known to have a bug in
638	 * write-back cache on revision 4 and earlier (stepping
639	 * A[01] and B[012]).  Fixed for C0 and later.
640	 */
641	{
642		uint32_t id, type;
643
644		id = cpufunc_id();
645		type = id & ~(CPU_ID_XSCALE_COREREV_MASK|CPU_ID_REVISION_MASK);
646
647		if (type == CPU_ID_PXA250 || type == CPU_ID_PXA210) {
648			if ((id & CPU_ID_REVISION_MASK) < 5) {
649				/* write through for stepping A0-1 and B0-2 */
650				write_through = 1;
651			}
652		}
653	}
654#endif /* XSCALE_CACHE_WRITE_THROUGH */
655
656	if (write_through) {
657		pte_l1_s_cache_mode = L1_S_C;
658		pte_l2_l_cache_mode = L2_C;
659		pte_l2_s_cache_mode = L2_C;
660	}
661
662#if (ARM_NMMUS > 1)
663	xscale_use_minidata = 1;
664#endif
665
666	pte_l2_s_prot_u = L2_S_PROT_U_xscale;
667	pte_l2_s_prot_w = L2_S_PROT_W_xscale;
668	pte_l2_s_prot_mask = L2_S_PROT_MASK_xscale;
669
670	pte_l1_s_proto = L1_S_PROTO_xscale;
671	pte_l1_c_proto = L1_C_PROTO_xscale;
672	pte_l2_s_proto = L2_S_PROTO_xscale;
673
674#ifdef CPU_XSCALE_CORE3
675	pmap_copy_page_func = pmap_copy_page_generic;
676	pmap_zero_page_func = pmap_zero_page_generic;
677	xscale_use_minidata = 0;
678	/* Make sure it is L2-cachable */
679    	pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_T);
680	pte_l1_s_cache_mode_pt = pte_l1_s_cache_mode &~ L1_S_XSCALE_P;
681	pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_T) ;
682	pte_l2_l_cache_mode_pt = pte_l1_s_cache_mode;
683	pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_T);
684	pte_l2_s_cache_mode_pt = pte_l2_s_cache_mode;
685
686#else
687	pmap_copy_page_func = pmap_copy_page_xscale;
688	pmap_zero_page_func = pmap_zero_page_xscale;
689#endif
690
691	/*
692	 * Disable ECC protection of page table access, for now.
693	 */
694	__asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
695	auxctl &= ~XSCALE_AUXCTL_P;
696	__asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
697}
698
699/*
700 * xscale_setup_minidata:
701 *
702 *	Set up the mini-data cache clean area.  We require the
703 *	caller to allocate the right amount of physically and
704 *	virtually contiguous space.
705 */
706extern vm_offset_t xscale_minidata_clean_addr;
707extern vm_size_t xscale_minidata_clean_size; /* already initialized */
708void
709xscale_setup_minidata(vm_offset_t l1pt, vm_offset_t va, vm_paddr_t pa)
710{
711	pd_entry_t *pde = (pd_entry_t *) l1pt;
712	pt_entry_t *pte;
713	vm_size_t size;
714	uint32_t auxctl;
715
716	xscale_minidata_clean_addr = va;
717
718	/* Round it to page size. */
719	size = (xscale_minidata_clean_size + L2_S_OFFSET) & L2_S_FRAME;
720
721	for (; size != 0;
722	     va += L2_S_SIZE, pa += L2_S_SIZE, size -= L2_S_SIZE) {
723		pte = (pt_entry_t *) kernel_pt_lookup(
724		    pde[L1_IDX(va)] & L1_C_ADDR_MASK);
725		if (pte == NULL)
726			panic("xscale_setup_minidata: can't find L2 table for "
727			    "VA 0x%08x", (u_int32_t) va);
728		pte[l2pte_index(va)] =
729		    L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
730		    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
731	}
732
733	/*
734	 * Configure the mini-data cache for write-back with
735	 * read/write-allocate.
736	 *
737	 * NOTE: In order to reconfigure the mini-data cache, we must
738	 * make sure it contains no valid data!  In order to do that,
739	 * we must issue a global data cache invalidate command!
740	 *
741	 * WE ASSUME WE ARE RUNNING UN-CACHED WHEN THIS ROUTINE IS CALLED!
742	 * THIS IS VERY IMPORTANT!
743	 */
744
745	/* Invalidate data and mini-data. */
746	__asm __volatile("mcr p15, 0, %0, c7, c6, 0" : : "r" (0));
747	__asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
748	auxctl = (auxctl & ~XSCALE_AUXCTL_MD_MASK) | XSCALE_AUXCTL_MD_WB_RWA;
749	__asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
750}
751#endif
752
753/*
754 * Allocate an L1 translation table for the specified pmap.
755 * This is called at pmap creation time.
756 */
757static void
758pmap_alloc_l1(pmap_t pm)
759{
760	struct l1_ttable *l1;
761	u_int8_t domain;
762
763	/*
764	 * Remove the L1 at the head of the LRU list
765	 */
766	mtx_lock(&l1_lru_lock);
767	l1 = TAILQ_FIRST(&l1_lru_list);
768	TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
769
770	/*
771	 * Pick the first available domain number, and update
772	 * the link to the next number.
773	 */
774	domain = l1->l1_domain_first;
775	l1->l1_domain_first = l1->l1_domain_free[domain];
776
777	/*
778	 * If there are still free domain numbers in this L1,
779	 * put it back on the TAIL of the LRU list.
780	 */
781	if (++l1->l1_domain_use_count < PMAP_DOMAINS)
782		TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
783
784	mtx_unlock(&l1_lru_lock);
785
786	/*
787	 * Fix up the relevant bits in the pmap structure
788	 */
789	pm->pm_l1 = l1;
790	pm->pm_domain = domain + 1;
791}
792
793/*
794 * Free an L1 translation table.
795 * This is called at pmap destruction time.
796 */
797static void
798pmap_free_l1(pmap_t pm)
799{
800	struct l1_ttable *l1 = pm->pm_l1;
801
802	mtx_lock(&l1_lru_lock);
803
804	/*
805	 * If this L1 is currently on the LRU list, remove it.
806	 */
807	if (l1->l1_domain_use_count < PMAP_DOMAINS)
808		TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
809
810	/*
811	 * Free up the domain number which was allocated to the pmap
812	 */
813	l1->l1_domain_free[pm->pm_domain - 1] = l1->l1_domain_first;
814	l1->l1_domain_first = pm->pm_domain - 1;
815	l1->l1_domain_use_count--;
816
817	/*
818	 * The L1 now must have at least 1 free domain, so add
819	 * it back to the LRU list. If the use count is zero,
820	 * put it at the head of the list, otherwise it goes
821	 * to the tail.
822	 */
823	if (l1->l1_domain_use_count == 0) {
824		TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru);
825	}	else
826		TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
827
828	mtx_unlock(&l1_lru_lock);
829}
830
831static PMAP_INLINE void
832pmap_use_l1(pmap_t pm)
833{
834	struct l1_ttable *l1;
835
836	/*
837	 * Do nothing if we're in interrupt context.
838	 * Access to an L1 by the kernel pmap must not affect
839	 * the LRU list.
840	 */
841	if (pm == pmap_kernel())
842		return;
843
844	l1 = pm->pm_l1;
845
846	/*
847	 * If the L1 is not currently on the LRU list, just return
848	 */
849	if (l1->l1_domain_use_count == PMAP_DOMAINS)
850		return;
851
852	mtx_lock(&l1_lru_lock);
853
854	/*
855	 * Check the use count again, now that we've acquired the lock
856	 */
857	if (l1->l1_domain_use_count == PMAP_DOMAINS) {
858		mtx_unlock(&l1_lru_lock);
859		return;
860	}
861
862	/*
863	 * Move the L1 to the back of the LRU list
864	 */
865	TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
866	TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
867
868	mtx_unlock(&l1_lru_lock);
869}
870
871
872/*
873 * Returns a pointer to the L2 bucket associated with the specified pmap
874 * and VA, or NULL if no L2 bucket exists for the address.
875 */
876static PMAP_INLINE struct l2_bucket *
877pmap_get_l2_bucket(pmap_t pm, vm_offset_t va)
878{
879	struct l2_dtable *l2;
880	struct l2_bucket *l2b;
881	u_short l1idx;
882
883	l1idx = L1_IDX(va);
884
885	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
886	    (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
887		return (NULL);
888
889	return (l2b);
890}
891
892/*
893 * Returns a pointer to the L2 bucket associated with the specified pmap
894 * and VA.
895 *
896 * If no L2 bucket exists, perform the necessary allocations to put an L2
897 * bucket/page table in place.
898 *
899 * Note that if a new L2 bucket/page was allocated, the caller *must*
900 * increment the bucket occupancy counter appropriately *before*
901 * releasing the pmap's lock to ensure no other thread or cpu deallocates
902 * the bucket/page in the meantime.
903 */
904static struct l2_bucket *
905pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
906{
907	struct l2_dtable *l2;
908	struct l2_bucket *l2b;
909	u_short l1idx;
910
911	l1idx = L1_IDX(va);
912
913	PMAP_ASSERT_LOCKED(pm);
914	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
915	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
916		/*
917		 * No mapping at this address, as there is
918		 * no entry in the L1 table.
919		 * Need to allocate a new l2_dtable.
920		 */
921again_l2table:
922		PMAP_UNLOCK(pm);
923		vm_page_unlock_queues();
924		if ((l2 = pmap_alloc_l2_dtable()) == NULL) {
925			vm_page_lock_queues();
926			PMAP_LOCK(pm);
927			return (NULL);
928		}
929		vm_page_lock_queues();
930		PMAP_LOCK(pm);
931		if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
932			PMAP_UNLOCK(pm);
933			vm_page_unlock_queues();
934			uma_zfree(l2table_zone, l2);
935			vm_page_lock_queues();
936			PMAP_LOCK(pm);
937			l2 = pm->pm_l2[L2_IDX(l1idx)];
938			if (l2 == NULL)
939				goto again_l2table;
940			/*
941			 * Someone already allocated the l2_dtable while
942			 * we were doing the same.
943			 */
944		} else {
945			bzero(l2, sizeof(*l2));
946			/*
947			 * Link it into the parent pmap
948			 */
949			pm->pm_l2[L2_IDX(l1idx)] = l2;
950		}
951	}
952
953	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
954
955	/*
956	 * Fetch pointer to the L2 page table associated with the address.
957	 */
958	if (l2b->l2b_kva == NULL) {
959		pt_entry_t *ptep;
960
961		/*
962		 * No L2 page table has been allocated. Chances are, this
963		 * is because we just allocated the l2_dtable, above.
964		 */
965again_ptep:
966		PMAP_UNLOCK(pm);
967		vm_page_unlock_queues();
968		ptep = (void*)uma_zalloc(l2zone, M_NOWAIT|M_USE_RESERVE);
969		vm_page_lock_queues();
970		PMAP_LOCK(pm);
971		if (l2b->l2b_kva != 0) {
972			/* We lost the race. */
973			PMAP_UNLOCK(pm);
974			vm_page_unlock_queues();
975			uma_zfree(l2zone, ptep);
976			vm_page_lock_queues();
977			PMAP_LOCK(pm);
978			if (l2b->l2b_kva == 0)
979				goto again_ptep;
980			return (l2b);
981		}
982		l2b->l2b_phys = vtophys(ptep);
983		if (ptep == NULL) {
984			/*
985			 * Oops, no more L2 page tables available at this
986			 * time. We may need to deallocate the l2_dtable
987			 * if we allocated a new one above.
988			 */
989			if (l2->l2_occupancy == 0) {
990				pm->pm_l2[L2_IDX(l1idx)] = NULL;
991				pmap_free_l2_dtable(l2);
992			}
993			return (NULL);
994		}
995
996		l2->l2_occupancy++;
997		l2b->l2b_kva = ptep;
998		l2b->l2b_l1idx = l1idx;
999	}
1000
1001	return (l2b);
1002}
1003
1004static PMAP_INLINE void
1005#ifndef PMAP_INCLUDE_PTE_SYNC
1006pmap_free_l2_ptp(pt_entry_t *l2)
1007#else
1008pmap_free_l2_ptp(boolean_t need_sync, pt_entry_t *l2)
1009#endif
1010{
1011#ifdef PMAP_INCLUDE_PTE_SYNC
1012	/*
1013	 * Note: With a write-back cache, we may need to sync this
1014	 * L2 table before re-using it.
1015	 * This is because it may have belonged to a non-current
1016	 * pmap, in which case the cache syncs would have been
1017	 * skipped when the pages were being unmapped. If the
1018	 * L2 table were then to be immediately re-allocated to
1019	 * the *current* pmap, it may well contain stale mappings
1020	 * which have not yet been cleared by a cache write-back
1021	 * and so would still be visible to the mmu.
1022	 */
1023	if (need_sync)
1024		PTE_SYNC_RANGE(l2, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
1025#endif
1026	uma_zfree(l2zone, l2);
1027}
1028/*
1029 * One or more mappings in the specified L2 descriptor table have just been
1030 * invalidated.
1031 *
1032 * Garbage collect the metadata and descriptor table itself if necessary.
1033 *
1034 * The pmap lock must be acquired when this is called (not necessary
1035 * for the kernel pmap).
1036 */
1037static void
1038pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
1039{
1040	struct l2_dtable *l2;
1041	pd_entry_t *pl1pd, l1pd;
1042	pt_entry_t *ptep;
1043	u_short l1idx;
1044
1045
1046	/*
1047	 * Update the bucket's reference count according to how many
1048	 * PTEs the caller has just invalidated.
1049	 */
1050	l2b->l2b_occupancy -= count;
1051
1052	/*
1053	 * Note:
1054	 *
1055	 * Level 2 page tables allocated to the kernel pmap are never freed
1056	 * as that would require checking all Level 1 page tables and
1057	 * removing any references to the Level 2 page table. See also the
1058	 * comment elsewhere about never freeing bootstrap L2 descriptors.
1059	 *
1060	 * We make do with just invalidating the mapping in the L2 table.
1061	 *
1062	 * This isn't really a big deal in practice and, in fact, leads
1063	 * to a performance win over time as we don't need to continually
1064	 * alloc/free.
1065	 */
1066	if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
1067		return;
1068
1069	/*
1070	 * There are no more valid mappings in this level 2 page table.
1071	 * Go ahead and NULL-out the pointer in the bucket, then
1072	 * free the page table.
1073	 */
1074	l1idx = l2b->l2b_l1idx;
1075	ptep = l2b->l2b_kva;
1076	l2b->l2b_kva = NULL;
1077
1078	pl1pd = &pm->pm_l1->l1_kva[l1idx];
1079
1080	/*
1081	 * If the L1 slot matches the pmap's domain
1082	 * number, then invalidate it.
1083	 */
1084	l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK);
1085	if (l1pd == (L1_C_DOM(pm->pm_domain) | L1_TYPE_C)) {
1086		*pl1pd = 0;
1087		PTE_SYNC(pl1pd);
1088	}
1089
1090	/*
1091	 * Release the L2 descriptor table back to the pool cache.
1092	 */
1093#ifndef PMAP_INCLUDE_PTE_SYNC
1094	pmap_free_l2_ptp(ptep);
1095#else
1096	pmap_free_l2_ptp(!pmap_is_current(pm), ptep);
1097#endif
1098
1099	/*
1100	 * Update the reference count in the associated l2_dtable
1101	 */
1102	l2 = pm->pm_l2[L2_IDX(l1idx)];
1103	if (--l2->l2_occupancy > 0)
1104		return;
1105
1106	/*
1107	 * There are no more valid mappings in any of the Level 1
1108	 * slots managed by this l2_dtable. Go ahead and NULL-out
1109	 * the pointer in the parent pmap and free the l2_dtable.
1110	 */
1111	pm->pm_l2[L2_IDX(l1idx)] = NULL;
1112	pmap_free_l2_dtable(l2);
1113}
1114
1115/*
1116 * Pool cache constructors for L2 descriptor tables, metadata and pmap
1117 * structures.
1118 */
1119static int
1120pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags)
1121{
1122#ifndef PMAP_INCLUDE_PTE_SYNC
1123	struct l2_bucket *l2b;
1124	pt_entry_t *ptep, pte;
1125#ifdef ARM_USE_SMALL_ALLOC
1126	pd_entry_t *pde;
1127#endif
1128	vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK;
1129
1130	/*
1131	 * The mappings for these page tables were initially made using
1132	 * pmap_kenter() by the pool subsystem. Therefore, the cache-
1133	 * mode will not be right for page table mappings. To avoid
1134	 * polluting the pmap_kenter() code with a special case for
1135	 * page tables, we simply fix up the cache-mode here if it's not
1136	 * correct.
1137	 */
1138#ifdef ARM_USE_SMALL_ALLOC
1139	pde = &kernel_pmap->pm_l1->l1_kva[L1_IDX(va)];
1140	if (!l1pte_section_p(*pde)) {
1141#endif
1142		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1143		ptep = &l2b->l2b_kva[l2pte_index(va)];
1144		pte = *ptep;
1145
1146		if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
1147			/*
1148			 * Page tables must have the cache-mode set to
1149			 * Write-Thru.
1150			 */
1151			*ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
1152			PTE_SYNC(ptep);
1153			cpu_tlb_flushD_SE(va);
1154			cpu_cpwait();
1155		}
1156#ifdef ARM_USE_SMALL_ALLOC
1157	}
1158#endif
1159#endif
1160	memset(mem, 0, L2_TABLE_SIZE_REAL);
1161	PTE_SYNC_RANGE(mem, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
1162	return (0);
1163}
1164
1165/*
1166 * A bunch of routines to conditionally flush the caches/TLB depending
1167 * on whether the specified pmap actually needs to be flushed at any
1168 * given time.
1169 */
1170static PMAP_INLINE void
1171pmap_tlb_flushID_SE(pmap_t pm, vm_offset_t va)
1172{
1173
1174	if (pmap_is_current(pm))
1175		cpu_tlb_flushID_SE(va);
1176}
1177
1178static PMAP_INLINE void
1179pmap_tlb_flushD_SE(pmap_t pm, vm_offset_t va)
1180{
1181
1182	if (pmap_is_current(pm))
1183		cpu_tlb_flushD_SE(va);
1184}
1185
1186static PMAP_INLINE void
1187pmap_tlb_flushID(pmap_t pm)
1188{
1189
1190	if (pmap_is_current(pm))
1191		cpu_tlb_flushID();
1192}
1193static PMAP_INLINE void
1194pmap_tlb_flushD(pmap_t pm)
1195{
1196
1197	if (pmap_is_current(pm))
1198		cpu_tlb_flushD();
1199}
1200
1201static PMAP_INLINE void
1202pmap_idcache_wbinv_range(pmap_t pm, vm_offset_t va, vm_size_t len)
1203{
1204
1205	if (pmap_is_current(pm))
1206		cpu_idcache_wbinv_range(va, len);
1207}
1208
1209static PMAP_INLINE void
1210pmap_dcache_wb_range(pmap_t pm, vm_offset_t va, vm_size_t len,
1211    boolean_t do_inv, boolean_t rd_only)
1212{
1213
1214	if (pmap_is_current(pm)) {
1215		if (do_inv) {
1216			if (rd_only)
1217				cpu_dcache_inv_range(va, len);
1218			else
1219				cpu_dcache_wbinv_range(va, len);
1220		} else
1221		if (!rd_only)
1222			cpu_dcache_wb_range(va, len);
1223	}
1224}
1225
1226static PMAP_INLINE void
1227pmap_idcache_wbinv_all(pmap_t pm)
1228{
1229
1230	if (pmap_is_current(pm))
1231		cpu_idcache_wbinv_all();
1232}
1233
1234static PMAP_INLINE void
1235pmap_dcache_wbinv_all(pmap_t pm)
1236{
1237
1238	if (pmap_is_current(pm))
1239		cpu_dcache_wbinv_all();
1240}
1241
1242/*
1243 * PTE_SYNC_CURRENT:
1244 *
1245 *     Make sure the pte is written out to RAM.
1246 *     We need to do this for one of two cases:
1247 *       - We're dealing with the kernel pmap
1248 *       - There is no pmap active in the cache/tlb.
1249 *       - The specified pmap is 'active' in the cache/tlb.
1250 */
1251#ifdef PMAP_INCLUDE_PTE_SYNC
1252#define	PTE_SYNC_CURRENT(pm, ptep)	\
1253do {					\
1254	if (PMAP_NEEDS_PTE_SYNC && 	\
1255	    pmap_is_current(pm))	\
1256		PTE_SYNC(ptep);		\
1257} while (/*CONSTCOND*/0)
1258#else
1259#define	PTE_SYNC_CURRENT(pm, ptep)	/* nothing */
1260#endif
1261
1262/*
1263 * Since we have a virtually indexed cache, we may need to inhibit caching if
1264 * there is more than one mapping and at least one of them is writable.
1265 * Since we purge the cache on every context switch, we only need to check for
1266 * other mappings within the same pmap, or kernel_pmap.
1267 * This function is also called when a page is unmapped, to possibly reenable
1268 * caching on any remaining mappings.
1269 *
1270 * The code implements the following logic, where:
1271 *
1272 * KW = # of kernel read/write pages
1273 * KR = # of kernel read only pages
1274 * UW = # of user read/write pages
1275 * UR = # of user read only pages
1276 *
1277 * KC = kernel mapping is cacheable
1278 * UC = user mapping is cacheable
1279 *
1280 *               KW=0,KR=0  KW=0,KR>0  KW=1,KR=0  KW>1,KR>=0
1281 *             +---------------------------------------------
1282 * UW=0,UR=0   | ---        KC=1       KC=1       KC=0
1283 * UW=0,UR>0   | UC=1       KC=1,UC=1  KC=0,UC=0  KC=0,UC=0
1284 * UW=1,UR=0   | UC=1       KC=0,UC=0  KC=0,UC=0  KC=0,UC=0
1285 * UW>1,UR>=0  | UC=0       KC=0,UC=0  KC=0,UC=0  KC=0,UC=0
1286 */
1287
1288static const int pmap_vac_flags[4][4] = {
1289	{-1,		0,		0,		PVF_KNC},
1290	{0,		0,		PVF_NC,		PVF_NC},
1291	{0,		PVF_NC,		PVF_NC,		PVF_NC},
1292	{PVF_UNC,	PVF_NC,		PVF_NC,		PVF_NC}
1293};
1294
1295static PMAP_INLINE int
1296pmap_get_vac_flags(const struct vm_page *pg)
1297{
1298	int kidx, uidx;
1299
1300	kidx = 0;
1301	if (pg->md.kro_mappings || pg->md.krw_mappings > 1)
1302		kidx |= 1;
1303	if (pg->md.krw_mappings)
1304		kidx |= 2;
1305
1306	uidx = 0;
1307	if (pg->md.uro_mappings || pg->md.urw_mappings > 1)
1308		uidx |= 1;
1309	if (pg->md.urw_mappings)
1310		uidx |= 2;
1311
1312	return (pmap_vac_flags[uidx][kidx]);
1313}
1314
1315static __inline void
1316pmap_vac_me_harder(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1317{
1318	int nattr;
1319
1320	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1321	nattr = pmap_get_vac_flags(pg);
1322
1323	if (nattr < 0) {
1324		pg->md.pvh_attrs &= ~PVF_NC;
1325		return;
1326	}
1327
1328	if (nattr == 0 && (pg->md.pvh_attrs & PVF_NC) == 0) {
1329		return;
1330	}
1331
1332	if (pm == pmap_kernel())
1333		pmap_vac_me_kpmap(pg, pm, va);
1334	else
1335		pmap_vac_me_user(pg, pm, va);
1336
1337	pg->md.pvh_attrs = (pg->md.pvh_attrs & ~PVF_NC) | nattr;
1338}
1339
1340static void
1341pmap_vac_me_kpmap(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1342{
1343	u_int u_cacheable, u_entries;
1344	struct pv_entry *pv;
1345	pmap_t last_pmap = pm;
1346
1347	/*
1348	 * Pass one, see if there are both kernel and user pmaps for
1349	 * this page.  Calculate whether there are user-writable or
1350	 * kernel-writable pages.
1351	 */
1352	u_cacheable = 0;
1353	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1354		if (pv->pv_pmap != pm && (pv->pv_flags & PVF_NC) == 0)
1355			u_cacheable++;
1356	}
1357
1358	u_entries = pg->md.urw_mappings + pg->md.uro_mappings;
1359
1360	/*
1361	 * We know we have just been updating a kernel entry, so if
1362	 * all user pages are already cacheable, then there is nothing
1363	 * further to do.
1364	 */
1365	if (pg->md.k_mappings == 0 && u_cacheable == u_entries)
1366		return;
1367
1368	if (u_entries) {
1369		/*
1370		 * Scan over the list again, for each entry, if it
1371		 * might not be set correctly, call pmap_vac_me_user
1372		 * to recalculate the settings.
1373		 */
1374		TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1375			/*
1376			 * We know kernel mappings will get set
1377			 * correctly in other calls.  We also know
1378			 * that if the pmap is the same as last_pmap
1379			 * then we've just handled this entry.
1380			 */
1381			if (pv->pv_pmap == pm || pv->pv_pmap == last_pmap)
1382				continue;
1383
1384			/*
1385			 * If there are kernel entries and this page
1386			 * is writable but non-cacheable, then we can
1387			 * skip this entry also.
1388			 */
1389			if (pg->md.k_mappings &&
1390			    (pv->pv_flags & (PVF_NC | PVF_WRITE)) ==
1391			    (PVF_NC | PVF_WRITE))
1392				continue;
1393
1394			/*
1395			 * Similarly if there are no kernel-writable
1396			 * entries and the page is already
1397			 * read-only/cacheable.
1398			 */
1399			if (pg->md.krw_mappings == 0 &&
1400			    (pv->pv_flags & (PVF_NC | PVF_WRITE)) == 0)
1401				continue;
1402
1403			/*
1404			 * For some of the remaining cases, we know
1405			 * that we must recalculate, but for others we
1406			 * can't tell if they are correct or not, so
1407			 * we recalculate anyway.
1408			 */
1409			pmap_vac_me_user(pg, (last_pmap = pv->pv_pmap), 0);
1410		}
1411
1412		if (pg->md.k_mappings == 0)
1413			return;
1414	}
1415
1416	pmap_vac_me_user(pg, pm, va);
1417}
1418
1419static void
1420pmap_vac_me_user(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1421{
1422	pmap_t kpmap = pmap_kernel();
1423	struct pv_entry *pv, *npv;
1424	struct l2_bucket *l2b;
1425	pt_entry_t *ptep, pte;
1426	u_int entries = 0;
1427	u_int writable = 0;
1428	u_int cacheable_entries = 0;
1429	u_int kern_cacheable = 0;
1430	u_int other_writable = 0;
1431
1432	/*
1433	 * Count mappings and writable mappings in this pmap.
1434	 * Include kernel mappings as part of our own.
1435	 * Keep a pointer to the first one.
1436	 */
1437	npv = TAILQ_FIRST(&pg->md.pv_list);
1438	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1439		/* Count mappings in the same pmap */
1440		if (pm == pv->pv_pmap || kpmap == pv->pv_pmap) {
1441			if (entries++ == 0)
1442				npv = pv;
1443
1444			/* Cacheable mappings */
1445			if ((pv->pv_flags & PVF_NC) == 0) {
1446				cacheable_entries++;
1447				if (kpmap == pv->pv_pmap)
1448					kern_cacheable++;
1449			}
1450
1451			/* Writable mappings */
1452			if (pv->pv_flags & PVF_WRITE)
1453				++writable;
1454		} else
1455		if (pv->pv_flags & PVF_WRITE)
1456			other_writable = 1;
1457	}
1458
1459	/*
1460	 * Enable or disable caching as necessary.
1461	 * Note: the first entry might be part of the kernel pmap,
1462	 * so we can't assume this is indicative of the state of the
1463	 * other (maybe non-kpmap) entries.
1464	 */
1465	if ((entries > 1 && writable) ||
1466	    (entries > 0 && pm == kpmap && other_writable)) {
1467		if (cacheable_entries == 0)
1468			return;
1469
1470		for (pv = npv; pv; pv = TAILQ_NEXT(pv, pv_list)) {
1471			if ((pm != pv->pv_pmap && kpmap != pv->pv_pmap) ||
1472			    (pv->pv_flags & PVF_NC))
1473				continue;
1474
1475			pv->pv_flags |= PVF_NC;
1476
1477			l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
1478			ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
1479			pte = *ptep & ~L2_S_CACHE_MASK;
1480
1481			if ((va != pv->pv_va || pm != pv->pv_pmap) &&
1482			    l2pte_valid(pte)) {
1483				if (PV_BEEN_EXECD(pv->pv_flags)) {
1484					pmap_idcache_wbinv_range(pv->pv_pmap,
1485					    pv->pv_va, PAGE_SIZE);
1486					pmap_tlb_flushID_SE(pv->pv_pmap,
1487					    pv->pv_va);
1488				} else
1489				if (PV_BEEN_REFD(pv->pv_flags)) {
1490					pmap_dcache_wb_range(pv->pv_pmap,
1491					    pv->pv_va, PAGE_SIZE, TRUE,
1492					    (pv->pv_flags & PVF_WRITE) == 0);
1493					pmap_tlb_flushD_SE(pv->pv_pmap,
1494					    pv->pv_va);
1495				}
1496			}
1497
1498			*ptep = pte;
1499			PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
1500		}
1501		cpu_cpwait();
1502	} else
1503	if (entries > cacheable_entries) {
1504		/*
1505		 * Turn cacheing back on for some pages.  If it is a kernel
1506		 * page, only do so if there are no other writable pages.
1507		 */
1508		for (pv = npv; pv; pv = TAILQ_NEXT(pv, pv_list)) {
1509			if (!(pv->pv_flags & PVF_NC) || (pm != pv->pv_pmap &&
1510			    (kpmap != pv->pv_pmap || other_writable)))
1511				continue;
1512
1513			pv->pv_flags &= ~PVF_NC;
1514
1515			l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
1516			ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
1517			pte = (*ptep & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode;
1518
1519			if (l2pte_valid(pte)) {
1520				if (PV_BEEN_EXECD(pv->pv_flags)) {
1521					pmap_tlb_flushID_SE(pv->pv_pmap,
1522					    pv->pv_va);
1523				} else
1524				if (PV_BEEN_REFD(pv->pv_flags)) {
1525					pmap_tlb_flushD_SE(pv->pv_pmap,
1526					    pv->pv_va);
1527				}
1528			}
1529
1530			*ptep = pte;
1531			PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
1532		}
1533	}
1534}
1535
1536/*
1537 * Modify pte bits for all ptes corresponding to the given physical address.
1538 * We use `maskbits' rather than `clearbits' because we're always passing
1539 * constants and the latter would require an extra inversion at run-time.
1540 */
1541static int
1542pmap_clearbit(struct vm_page *pg, u_int maskbits)
1543{
1544	struct l2_bucket *l2b;
1545	struct pv_entry *pv;
1546	pt_entry_t *ptep, npte, opte;
1547	pmap_t pm;
1548	vm_offset_t va;
1549	u_int oflags;
1550	int count = 0;
1551
1552	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1553
1554	/*
1555	 * Clear saved attributes (modify, reference)
1556	 */
1557	pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
1558
1559	if (TAILQ_EMPTY(&pg->md.pv_list)) {
1560		return (0);
1561	}
1562
1563	/*
1564	 * Loop over all current mappings setting/clearing as appropos
1565	 */
1566	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1567		va = pv->pv_va;
1568		pm = pv->pv_pmap;
1569		oflags = pv->pv_flags;
1570		pv->pv_flags &= ~maskbits;
1571
1572		PMAP_LOCK(pm);
1573
1574		l2b = pmap_get_l2_bucket(pm, va);
1575
1576		ptep = &l2b->l2b_kva[l2pte_index(va)];
1577		npte = opte = *ptep;
1578
1579		if (maskbits & (PVF_WRITE|PVF_MOD)) {
1580			if ((pv->pv_flags & PVF_NC)) {
1581				/*
1582				 * Entry is not cacheable:
1583				 *
1584				 * Don't turn caching on again if this is a
1585				 * modified emulation. This would be
1586				 * inconsitent with the settings created by
1587				 * pmap_vac_me_harder(). Otherwise, it's safe
1588				 * to re-enable cacheing.
1589				 *
1590				 * There's no need to call pmap_vac_me_harder()
1591				 * here: all pages are losing their write
1592				 * permission.
1593				 */
1594				if (maskbits & PVF_WRITE) {
1595					npte |= pte_l2_s_cache_mode;
1596					pv->pv_flags &= ~PVF_NC;
1597				}
1598			} else
1599			if (opte & L2_S_PROT_W) {
1600				vm_page_dirty(pg);
1601				/*
1602				 * Entry is writable/cacheable: check if pmap
1603				 * is current if it is flush it, otherwise it
1604				 * won't be in the cache
1605				 */
1606				if (PV_BEEN_EXECD(oflags))
1607					pmap_idcache_wbinv_range(pm, pv->pv_va,
1608					    PAGE_SIZE);
1609				else
1610				if (PV_BEEN_REFD(oflags))
1611					pmap_dcache_wb_range(pm, pv->pv_va,
1612					    PAGE_SIZE,
1613					    (maskbits & PVF_REF) ? TRUE : FALSE,
1614					    FALSE);
1615			}
1616
1617			/* make the pte read only */
1618			npte &= ~L2_S_PROT_W;
1619
1620			if (maskbits & PVF_WRITE) {
1621				/*
1622				 * Keep alias accounting up to date
1623				 */
1624				if (pv->pv_pmap == pmap_kernel()) {
1625					if (oflags & PVF_WRITE) {
1626						pg->md.krw_mappings--;
1627						pg->md.kro_mappings++;
1628					}
1629				} else
1630				if (oflags & PVF_WRITE) {
1631					pg->md.urw_mappings--;
1632					pg->md.uro_mappings++;
1633				}
1634			}
1635		}
1636
1637		if (maskbits & PVF_REF) {
1638			if ((pv->pv_flags & PVF_NC) == 0 &&
1639			    (maskbits & (PVF_WRITE|PVF_MOD)) == 0) {
1640				/*
1641				 * Check npte here; we may have already
1642				 * done the wbinv above, and the validity
1643				 * of the PTE is the same for opte and
1644				 * npte.
1645				 */
1646				if (npte & L2_S_PROT_W) {
1647					if (PV_BEEN_EXECD(oflags))
1648						pmap_idcache_wbinv_range(pm,
1649						    pv->pv_va, PAGE_SIZE);
1650					else
1651					if (PV_BEEN_REFD(oflags))
1652						pmap_dcache_wb_range(pm,
1653						    pv->pv_va, PAGE_SIZE,
1654						    TRUE, FALSE);
1655				} else
1656				if ((npte & L2_TYPE_MASK) != L2_TYPE_INV) {
1657					/* XXXJRT need idcache_inv_range */
1658					if (PV_BEEN_EXECD(oflags))
1659						pmap_idcache_wbinv_range(pm,
1660						    pv->pv_va, PAGE_SIZE);
1661					else
1662					if (PV_BEEN_REFD(oflags))
1663						pmap_dcache_wb_range(pm,
1664						    pv->pv_va, PAGE_SIZE,
1665						    TRUE, TRUE);
1666				}
1667			}
1668
1669			/*
1670			 * Make the PTE invalid so that we will take a
1671			 * page fault the next time the mapping is
1672			 * referenced.
1673			 */
1674			npte &= ~L2_TYPE_MASK;
1675			npte |= L2_TYPE_INV;
1676		}
1677
1678		if (npte != opte) {
1679			count++;
1680			*ptep = npte;
1681			PTE_SYNC(ptep);
1682			/* Flush the TLB entry if a current pmap. */
1683			if (PV_BEEN_EXECD(oflags))
1684				pmap_tlb_flushID_SE(pm, pv->pv_va);
1685			else
1686			if (PV_BEEN_REFD(oflags))
1687				pmap_tlb_flushD_SE(pm, pv->pv_va);
1688		}
1689
1690		PMAP_UNLOCK(pm);
1691
1692	}
1693
1694	if (maskbits & PVF_WRITE)
1695		vm_page_flag_clear(pg, PG_WRITEABLE);
1696	return (count);
1697}
1698
1699/*
1700 * main pv_entry manipulation functions:
1701 *   pmap_enter_pv: enter a mapping onto a vm_page list
1702 *   pmap_remove_pv: remove a mappiing from a vm_page list
1703 *
1704 * NOTE: pmap_enter_pv expects to lock the pvh itself
1705 *       pmap_remove_pv expects te caller to lock the pvh before calling
1706 */
1707
1708/*
1709 * pmap_enter_pv: enter a mapping onto a vm_page lst
1710 *
1711 * => caller should hold the proper lock on pmap_main_lock
1712 * => caller should have pmap locked
1713 * => we will gain the lock on the vm_page and allocate the new pv_entry
1714 * => caller should adjust ptp's wire_count before calling
1715 * => caller should not adjust pmap's wire_count
1716 */
1717static void
1718pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
1719    vm_offset_t va, u_int flags)
1720{
1721
1722	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1723	PMAP_ASSERT_LOCKED(pm);
1724	pve->pv_pmap = pm;
1725	pve->pv_va = va;
1726	pve->pv_flags = flags;
1727
1728	TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list);
1729	TAILQ_INSERT_HEAD(&pm->pm_pvlist, pve, pv_plist);
1730	pg->md.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
1731	if (pm == pmap_kernel()) {
1732		if (flags & PVF_WRITE)
1733			pg->md.krw_mappings++;
1734		else
1735			pg->md.kro_mappings++;
1736	} else {
1737		if (flags & PVF_WRITE)
1738			pg->md.urw_mappings++;
1739		else
1740			pg->md.uro_mappings++;
1741	}
1742	pg->md.pv_list_count++;
1743	if (pve->pv_flags & PVF_WIRED)
1744		++pm->pm_stats.wired_count;
1745	vm_page_flag_set(pg, PG_REFERENCED);
1746}
1747
1748/*
1749 *
1750 * pmap_find_pv: Find a pv entry
1751 *
1752 * => caller should hold lock on vm_page
1753 */
1754static PMAP_INLINE struct pv_entry *
1755pmap_find_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1756{
1757	struct pv_entry *pv;
1758
1759	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1760	TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list)
1761	    if (pm == pv->pv_pmap && va == pv->pv_va)
1762		    break;
1763	return (pv);
1764}
1765
1766/*
1767 * vector_page_setprot:
1768 *
1769 *	Manipulate the protection of the vector page.
1770 */
1771void
1772vector_page_setprot(int prot)
1773{
1774	struct l2_bucket *l2b;
1775	pt_entry_t *ptep;
1776
1777	l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
1778
1779	ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
1780
1781	*ptep = (*ptep & ~L1_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
1782	PTE_SYNC(ptep);
1783	cpu_tlb_flushD_SE(vector_page);
1784	cpu_cpwait();
1785}
1786
1787/*
1788 * pmap_remove_pv: try to remove a mapping from a pv_list
1789 *
1790 * => caller should hold proper lock on pmap_main_lock
1791 * => pmap should be locked
1792 * => caller should hold lock on vm_page [so that attrs can be adjusted]
1793 * => caller should adjust ptp's wire_count and free PTP if needed
1794 * => caller should NOT adjust pmap's wire_count
1795 * => we return the removed pve
1796 */
1797
1798static void
1799pmap_nuke_pv(struct vm_page *pg, pmap_t pm, struct pv_entry *pve)
1800{
1801
1802	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1803	PMAP_ASSERT_LOCKED(pm);
1804	TAILQ_REMOVE(&pg->md.pv_list, pve, pv_list);
1805	TAILQ_REMOVE(&pm->pm_pvlist, pve, pv_plist);
1806	if (pve->pv_flags & PVF_WIRED)
1807		--pm->pm_stats.wired_count;
1808	pg->md.pv_list_count--;
1809	if (pg->md.pvh_attrs & PVF_MOD)
1810		vm_page_dirty(pg);
1811	if (pm == pmap_kernel()) {
1812		if (pve->pv_flags & PVF_WRITE)
1813			pg->md.krw_mappings--;
1814		else
1815			pg->md.kro_mappings--;
1816	} else
1817		if (pve->pv_flags & PVF_WRITE)
1818			pg->md.urw_mappings--;
1819		else
1820			pg->md.uro_mappings--;
1821	if (TAILQ_FIRST(&pg->md.pv_list) == NULL ||
1822	    (pg->md.krw_mappings == 0 && pg->md.urw_mappings == 0)) {
1823		pg->md.pvh_attrs &= ~PVF_MOD;
1824		if (TAILQ_FIRST(&pg->md.pv_list) == NULL)
1825			pg->md.pvh_attrs &= ~PVF_REF;
1826		vm_page_flag_clear(pg, PG_WRITEABLE);
1827	}
1828	if (TAILQ_FIRST(&pg->md.pv_list))
1829		vm_page_flag_set(pg, PG_REFERENCED);
1830	if (pve->pv_flags & PVF_WRITE)
1831		pmap_vac_me_harder(pg, pm, 0);
1832}
1833
1834static struct pv_entry *
1835pmap_remove_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1836{
1837	struct pv_entry *pve;
1838
1839	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1840	pve = TAILQ_FIRST(&pg->md.pv_list);
1841
1842	while (pve) {
1843		if (pve->pv_pmap == pm && pve->pv_va == va) {	/* match? */
1844			pmap_nuke_pv(pg, pm, pve);
1845			break;
1846		}
1847		pve = TAILQ_NEXT(pve, pv_list);
1848	}
1849
1850	return(pve);				/* return removed pve */
1851}
1852/*
1853 *
1854 * pmap_modify_pv: Update pv flags
1855 *
1856 * => caller should hold lock on vm_page [so that attrs can be adjusted]
1857 * => caller should NOT adjust pmap's wire_count
1858 * => caller must call pmap_vac_me_harder() if writable status of a page
1859 *    may have changed.
1860 * => we return the old flags
1861 *
1862 * Modify a physical-virtual mapping in the pv table
1863 */
1864static u_int
1865pmap_modify_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va,
1866    u_int clr_mask, u_int set_mask)
1867{
1868	struct pv_entry *npv;
1869	u_int flags, oflags;
1870
1871	PMAP_ASSERT_LOCKED(pm);
1872	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1873	if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
1874		return (0);
1875
1876	/*
1877	 * There is at least one VA mapping this page.
1878	 */
1879
1880	if (clr_mask & (PVF_REF | PVF_MOD))
1881		pg->md.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
1882
1883	oflags = npv->pv_flags;
1884	npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
1885
1886	if ((flags ^ oflags) & PVF_WIRED) {
1887		if (flags & PVF_WIRED)
1888			++pm->pm_stats.wired_count;
1889		else
1890			--pm->pm_stats.wired_count;
1891	}
1892
1893	if ((flags ^ oflags) & PVF_WRITE) {
1894		if (pm == pmap_kernel()) {
1895			if (flags & PVF_WRITE) {
1896				pg->md.krw_mappings++;
1897				pg->md.kro_mappings--;
1898			} else {
1899				pg->md.kro_mappings++;
1900				pg->md.krw_mappings--;
1901			}
1902		} else
1903		if (flags & PVF_WRITE) {
1904			pg->md.urw_mappings++;
1905			pg->md.uro_mappings--;
1906		} else {
1907			pg->md.uro_mappings++;
1908			pg->md.urw_mappings--;
1909		}
1910		if (pg->md.krw_mappings == 0 && pg->md.urw_mappings == 0) {
1911			pg->md.pvh_attrs &= ~PVF_MOD;
1912			vm_page_flag_clear(pg, PG_WRITEABLE);
1913		}
1914		pmap_vac_me_harder(pg, pm, 0);
1915	}
1916
1917	return (oflags);
1918}
1919
1920/* Function to set the debug level of the pmap code */
1921#ifdef PMAP_DEBUG
1922void
1923pmap_debug(int level)
1924{
1925	pmap_debug_level = level;
1926	dprintf("pmap_debug: level=%d\n", pmap_debug_level);
1927}
1928#endif  /* PMAP_DEBUG */
1929
1930void
1931pmap_pinit0(struct pmap *pmap)
1932{
1933	PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap));
1934
1935	dprintf("pmap_pinit0: pmap = %08x, pm_pdir = %08x\n",
1936		(u_int32_t) pmap, (u_int32_t) pmap->pm_pdir);
1937	bcopy(kernel_pmap, pmap, sizeof(*pmap));
1938	bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx));
1939	PMAP_LOCK_INIT(pmap);
1940}
1941
1942/*
1943 *	Initialize a vm_page's machine-dependent fields.
1944 */
1945void
1946pmap_page_init(vm_page_t m)
1947{
1948
1949	TAILQ_INIT(&m->md.pv_list);
1950	m->md.pv_list_count = 0;
1951}
1952
1953/*
1954 *      Initialize the pmap module.
1955 *      Called by vm_init, to initialize any structures that the pmap
1956 *      system needs to map virtual memory.
1957 */
1958void
1959pmap_init(void)
1960{
1961	int shpgperproc = PMAP_SHPGPERPROC;
1962
1963	PDEBUG(1, printf("pmap_init: phys_start = %08x\n"));
1964
1965	/*
1966	 * init the pv free list
1967	 */
1968	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
1969	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1970	/*
1971	 * Now it is safe to enable pv_table recording.
1972	 */
1973	PDEBUG(1, printf("pmap_init: done!\n"));
1974
1975	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1976
1977	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
1978	pv_entry_high_water = 9 * (pv_entry_max / 10);
1979	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
1980	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1981	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable),
1982	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1983	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1984
1985	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
1986
1987}
1988
1989int
1990pmap_fault_fixup(pmap_t pm, vm_offset_t va, vm_prot_t ftype, int user)
1991{
1992	struct l2_dtable *l2;
1993	struct l2_bucket *l2b;
1994	pd_entry_t *pl1pd, l1pd;
1995	pt_entry_t *ptep, pte;
1996	vm_paddr_t pa;
1997	u_int l1idx;
1998	int rv = 0;
1999
2000	l1idx = L1_IDX(va);
2001	vm_page_lock_queues();
2002	PMAP_LOCK(pm);
2003
2004	/*
2005	 * If there is no l2_dtable for this address, then the process
2006	 * has no business accessing it.
2007	 *
2008	 * Note: This will catch userland processes trying to access
2009	 * kernel addresses.
2010	 */
2011	l2 = pm->pm_l2[L2_IDX(l1idx)];
2012	if (l2 == NULL)
2013		goto out;
2014
2015	/*
2016	 * Likewise if there is no L2 descriptor table
2017	 */
2018	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2019	if (l2b->l2b_kva == NULL)
2020		goto out;
2021
2022	/*
2023	 * Check the PTE itself.
2024	 */
2025	ptep = &l2b->l2b_kva[l2pte_index(va)];
2026	pte = *ptep;
2027	if (pte == 0)
2028		goto out;
2029
2030	/*
2031	 * Catch a userland access to the vector page mapped at 0x0
2032	 */
2033	if (user && (pte & L2_S_PROT_U) == 0)
2034		goto out;
2035	if (va == vector_page)
2036		goto out;
2037
2038	pa = l2pte_pa(pte);
2039
2040	if ((ftype & VM_PROT_WRITE) && (pte & L2_S_PROT_W) == 0) {
2041		/*
2042		 * This looks like a good candidate for "page modified"
2043		 * emulation...
2044		 */
2045		struct pv_entry *pv;
2046		struct vm_page *pg;
2047
2048		/* Extract the physical address of the page */
2049		if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) {
2050			goto out;
2051		}
2052		/* Get the current flags for this page. */
2053
2054		pv = pmap_find_pv(pg, pm, va);
2055		if (pv == NULL) {
2056			goto out;
2057		}
2058
2059		/*
2060		 * Do the flags say this page is writable? If not then it
2061		 * is a genuine write fault. If yes then the write fault is
2062		 * our fault as we did not reflect the write access in the
2063		 * PTE. Now we know a write has occurred we can correct this
2064		 * and also set the modified bit
2065		 */
2066		if ((pv->pv_flags & PVF_WRITE) == 0) {
2067			goto out;
2068		}
2069
2070		pg->md.pvh_attrs |= PVF_REF | PVF_MOD;
2071		vm_page_dirty(pg);
2072		pv->pv_flags |= PVF_REF | PVF_MOD;
2073
2074		/*
2075		 * Re-enable write permissions for the page.  No need to call
2076		 * pmap_vac_me_harder(), since this is just a
2077		 * modified-emulation fault, and the PVF_WRITE bit isn't
2078		 * changing. We've already set the cacheable bits based on
2079		 * the assumption that we can write to this page.
2080		 */
2081		*ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO | L2_S_PROT_W;
2082		PTE_SYNC(ptep);
2083		rv = 1;
2084	} else
2085	if ((pte & L2_TYPE_MASK) == L2_TYPE_INV) {
2086		/*
2087		 * This looks like a good candidate for "page referenced"
2088		 * emulation.
2089		 */
2090		struct pv_entry *pv;
2091		struct vm_page *pg;
2092
2093		/* Extract the physical address of the page */
2094		if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2095			goto out;
2096		/* Get the current flags for this page. */
2097
2098		pv = pmap_find_pv(pg, pm, va);
2099		if (pv == NULL)
2100			goto out;
2101
2102		pg->md.pvh_attrs |= PVF_REF;
2103		pv->pv_flags |= PVF_REF;
2104
2105
2106		*ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO;
2107		PTE_SYNC(ptep);
2108		rv = 1;
2109	}
2110
2111	/*
2112	 * We know there is a valid mapping here, so simply
2113	 * fix up the L1 if necessary.
2114	 */
2115	pl1pd = &pm->pm_l1->l1_kva[l1idx];
2116	l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO;
2117	if (*pl1pd != l1pd) {
2118		*pl1pd = l1pd;
2119		PTE_SYNC(pl1pd);
2120		rv = 1;
2121	}
2122
2123#ifdef CPU_SA110
2124	/*
2125	 * There are bugs in the rev K SA110.  This is a check for one
2126	 * of them.
2127	 */
2128	if (rv == 0 && curcpu()->ci_arm_cputype == CPU_ID_SA110 &&
2129	    curcpu()->ci_arm_cpurev < 3) {
2130		/* Always current pmap */
2131		if (l2pte_valid(pte)) {
2132			extern int kernel_debug;
2133			if (kernel_debug & 1) {
2134				struct proc *p = curlwp->l_proc;
2135				printf("prefetch_abort: page is already "
2136				    "mapped - pte=%p *pte=%08x\n", ptep, pte);
2137				printf("prefetch_abort: pc=%08lx proc=%p "
2138				    "process=%s\n", va, p, p->p_comm);
2139				printf("prefetch_abort: far=%08x fs=%x\n",
2140				    cpu_faultaddress(), cpu_faultstatus());
2141			}
2142#ifdef DDB
2143			if (kernel_debug & 2)
2144				Debugger();
2145#endif
2146			rv = 1;
2147		}
2148	}
2149#endif /* CPU_SA110 */
2150
2151#ifdef DEBUG
2152	/*
2153	 * If 'rv == 0' at this point, it generally indicates that there is a
2154	 * stale TLB entry for the faulting address. This happens when two or
2155	 * more processes are sharing an L1. Since we don't flush the TLB on
2156	 * a context switch between such processes, we can take domain faults
2157	 * for mappings which exist at the same VA in both processes. EVEN IF
2158	 * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for
2159	 * example.
2160	 *
2161	 * This is extremely likely to happen if pmap_enter() updated the L1
2162	 * entry for a recently entered mapping. In this case, the TLB is
2163	 * flushed for the new mapping, but there may still be TLB entries for
2164	 * other mappings belonging to other processes in the 1MB range
2165	 * covered by the L1 entry.
2166	 *
2167	 * Since 'rv == 0', we know that the L1 already contains the correct
2168	 * value, so the fault must be due to a stale TLB entry.
2169	 *
2170	 * Since we always need to flush the TLB anyway in the case where we
2171	 * fixed up the L1, or frobbed the L2 PTE, we effectively deal with
2172	 * stale TLB entries dynamically.
2173	 *
2174	 * However, the above condition can ONLY happen if the current L1 is
2175	 * being shared. If it happens when the L1 is unshared, it indicates
2176	 * that other parts of the pmap are not doing their job WRT managing
2177	 * the TLB.
2178	 */
2179	if (rv == 0 && pm->pm_l1->l1_domain_use_count == 1) {
2180		extern int last_fault_code;
2181		printf("fixup: pm %p, va 0x%lx, ftype %d - nothing to do!\n",
2182		    pm, va, ftype);
2183		printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n",
2184		    l2, l2b, ptep, pl1pd);
2185		printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n",
2186		    pte, l1pd, last_fault_code);
2187#ifdef DDB
2188		Debugger();
2189#endif
2190	}
2191#endif
2192
2193	cpu_tlb_flushID_SE(va);
2194	cpu_cpwait();
2195
2196	rv = 1;
2197
2198out:
2199	vm_page_unlock_queues();
2200	PMAP_UNLOCK(pm);
2201	return (rv);
2202}
2203
2204void
2205pmap_postinit(void)
2206{
2207	struct l2_bucket *l2b;
2208	struct l1_ttable *l1;
2209	pd_entry_t *pl1pt;
2210	pt_entry_t *ptep, pte;
2211	vm_offset_t va, eva;
2212	u_int loop, needed;
2213
2214	needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0);
2215	needed -= 1;
2216	l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK);
2217
2218	for (loop = 0; loop < needed; loop++, l1++) {
2219		/* Allocate a L1 page table */
2220		va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0,
2221		    0xffffffff, L1_TABLE_SIZE, 0);
2222
2223		if (va == 0)
2224			panic("Cannot allocate L1 KVM");
2225
2226		eva = va + L1_TABLE_SIZE;
2227		pl1pt = (pd_entry_t *)va;
2228
2229		while (va < eva) {
2230				l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2231				ptep = &l2b->l2b_kva[l2pte_index(va)];
2232				pte = *ptep;
2233				pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
2234				*ptep = pte;
2235				PTE_SYNC(ptep);
2236				cpu_tlb_flushD_SE(va);
2237
2238				va += PAGE_SIZE;
2239		}
2240		pmap_init_l1(l1, pl1pt);
2241	}
2242
2243
2244#ifdef DEBUG
2245	printf("pmap_postinit: Allocated %d static L1 descriptor tables\n",
2246	    needed);
2247#endif
2248}
2249
2250/*
2251 * This is used to stuff certain critical values into the PCB where they
2252 * can be accessed quickly from cpu_switch() et al.
2253 */
2254void
2255pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
2256{
2257	struct l2_bucket *l2b;
2258
2259	pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
2260	pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) |
2261	    (DOMAIN_CLIENT << (pm->pm_domain * 2));
2262
2263	if (vector_page < KERNBASE) {
2264		pcb->pcb_pl1vec = &pm->pm_l1->l1_kva[L1_IDX(vector_page)];
2265		l2b = pmap_get_l2_bucket(pm, vector_page);
2266		pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO |
2267	 	    L1_C_DOM(pm->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL);
2268	} else
2269		pcb->pcb_pl1vec = NULL;
2270}
2271
2272void
2273pmap_activate(struct thread *td)
2274{
2275	pmap_t pm;
2276	struct pcb *pcb;
2277
2278	pm = vmspace_pmap(td->td_proc->p_vmspace);
2279	pcb = td->td_pcb;
2280
2281	critical_enter();
2282	pmap_set_pcb_pagedir(pm, pcb);
2283
2284	if (td == curthread) {
2285		u_int cur_dacr, cur_ttb;
2286
2287		__asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
2288		__asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr));
2289
2290		cur_ttb &= ~(L1_TABLE_SIZE - 1);
2291
2292		if (cur_ttb == (u_int)pcb->pcb_pagedir &&
2293		    cur_dacr == pcb->pcb_dacr) {
2294			/*
2295			 * No need to switch address spaces.
2296			 */
2297			critical_exit();
2298			return;
2299		}
2300
2301
2302		/*
2303		 * We MUST, I repeat, MUST fix up the L1 entry corresponding
2304		 * to 'vector_page' in the incoming L1 table before switching
2305		 * to it otherwise subsequent interrupts/exceptions (including
2306		 * domain faults!) will jump into hyperspace.
2307		 */
2308		if (pcb->pcb_pl1vec) {
2309
2310			*pcb->pcb_pl1vec = pcb->pcb_l1vec;
2311			/*
2312			 * Don't need to PTE_SYNC() at this point since
2313			 * cpu_setttb() is about to flush both the cache
2314			 * and the TLB.
2315			 */
2316		}
2317
2318		cpu_domains(pcb->pcb_dacr);
2319		cpu_setttb(pcb->pcb_pagedir);
2320	}
2321	critical_exit();
2322}
2323
2324static int
2325pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va)
2326{
2327	pd_entry_t *pdep, pde;
2328	pt_entry_t *ptep, pte;
2329	vm_offset_t pa;
2330	int rv = 0;
2331
2332	/*
2333	 * Make sure the descriptor itself has the correct cache mode
2334	 */
2335	pdep = &kl1[L1_IDX(va)];
2336	pde = *pdep;
2337
2338	if (l1pte_section_p(pde)) {
2339		if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) {
2340			*pdep = (pde & ~L1_S_CACHE_MASK) |
2341			    pte_l1_s_cache_mode_pt;
2342			PTE_SYNC(pdep);
2343			cpu_dcache_wbinv_range((vm_offset_t)pdep,
2344			    sizeof(*pdep));
2345			rv = 1;
2346		}
2347	} else {
2348		pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
2349		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2350		if (ptep == NULL)
2351			panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep);
2352
2353		ptep = &ptep[l2pte_index(va)];
2354		pte = *ptep;
2355		if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
2356			*ptep = (pte & ~L2_S_CACHE_MASK) |
2357			    pte_l2_s_cache_mode_pt;
2358			PTE_SYNC(ptep);
2359			cpu_dcache_wbinv_range((vm_offset_t)ptep,
2360			    sizeof(*ptep));
2361			rv = 1;
2362		}
2363	}
2364
2365	return (rv);
2366}
2367
2368static void
2369pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap,
2370    pt_entry_t **ptep)
2371{
2372	vm_offset_t va = *availp;
2373	struct l2_bucket *l2b;
2374
2375	if (ptep) {
2376		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2377		if (l2b == NULL)
2378			panic("pmap_alloc_specials: no l2b for 0x%x", va);
2379
2380		*ptep = &l2b->l2b_kva[l2pte_index(va)];
2381	}
2382
2383	*vap = va;
2384	*availp = va + (PAGE_SIZE * pages);
2385}
2386
2387/*
2388 *	Bootstrap the system enough to run with virtual memory.
2389 *
2390 *	On the arm this is called after mapping has already been enabled
2391 *	and just syncs the pmap module with what has already been done.
2392 *	[We can't call it easily with mapping off since the kernel is not
2393 *	mapped with PA == VA, hence we would have to relocate every address
2394 *	from the linked base (virtual) address "KERNBASE" to the actual
2395 *	(physical) address starting relative to 0]
2396 */
2397#define PMAP_STATIC_L2_SIZE 16
2398#ifdef ARM_USE_SMALL_ALLOC
2399extern struct mtx smallalloc_mtx;
2400#endif
2401
2402void
2403pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt)
2404{
2405	static struct l1_ttable static_l1;
2406	static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
2407	struct l1_ttable *l1 = &static_l1;
2408	struct l2_dtable *l2;
2409	struct l2_bucket *l2b;
2410	pd_entry_t pde;
2411	pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
2412	pt_entry_t *ptep;
2413	vm_paddr_t pa;
2414	vm_offset_t va;
2415	vm_size_t size;
2416	int l1idx, l2idx, l2next = 0;
2417
2418	PDEBUG(1, printf("firstaddr = %08x, loadaddr = %08x\n",
2419	    firstaddr, loadaddr));
2420
2421	virtual_avail = firstaddr;
2422	kernel_pmap = &kernel_pmap_store;
2423	kernel_pmap->pm_l1 = l1;
2424	kernel_l1pa = l1pt->pv_pa;
2425
2426	/*
2427	 * Scan the L1 translation table created by initarm() and create
2428	 * the required metadata for all valid mappings found in it.
2429	 */
2430	for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
2431		pde = kernel_l1pt[l1idx];
2432
2433		/*
2434		 * We're only interested in Coarse mappings.
2435		 * pmap_extract() can deal with section mappings without
2436		 * recourse to checking L2 metadata.
2437		 */
2438		if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
2439			continue;
2440
2441		/*
2442		 * Lookup the KVA of this L2 descriptor table
2443		 */
2444		pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
2445		ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2446
2447		if (ptep == NULL) {
2448			panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
2449			    (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa);
2450		}
2451
2452		/*
2453		 * Fetch the associated L2 metadata structure.
2454		 * Allocate a new one if necessary.
2455		 */
2456		if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
2457			if (l2next == PMAP_STATIC_L2_SIZE)
2458				panic("pmap_bootstrap: out of static L2s");
2459			kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 =
2460			    &static_l2[l2next++];
2461		}
2462
2463		/*
2464		 * One more L1 slot tracked...
2465		 */
2466		l2->l2_occupancy++;
2467
2468		/*
2469		 * Fill in the details of the L2 descriptor in the
2470		 * appropriate bucket.
2471		 */
2472		l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2473		l2b->l2b_kva = ptep;
2474		l2b->l2b_phys = pa;
2475		l2b->l2b_l1idx = l1idx;
2476
2477		/*
2478		 * Establish an initial occupancy count for this descriptor
2479		 */
2480		for (l2idx = 0;
2481		    l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
2482		    l2idx++) {
2483			if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) {
2484				l2b->l2b_occupancy++;
2485			}
2486		}
2487
2488		/*
2489		 * Make sure the descriptor itself has the correct cache mode.
2490		 * If not, fix it, but whine about the problem. Port-meisters
2491		 * should consider this a clue to fix up their initarm()
2492		 * function. :)
2493		 */
2494		if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) {
2495			printf("pmap_bootstrap: WARNING! wrong cache mode for "
2496			    "L2 pte @ %p\n", ptep);
2497		}
2498	}
2499
2500
2501	/*
2502	 * Ensure the primary (kernel) L1 has the correct cache mode for
2503	 * a page table. Bitch if it is not correctly set.
2504	 */
2505	for (va = (vm_offset_t)kernel_l1pt;
2506	    va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) {
2507		if (pmap_set_pt_cache_mode(kernel_l1pt, va))
2508			printf("pmap_bootstrap: WARNING! wrong cache mode for "
2509			    "primary L1 @ 0x%x\n", va);
2510	}
2511
2512	cpu_dcache_wbinv_all();
2513	cpu_tlb_flushID();
2514	cpu_cpwait();
2515
2516	PMAP_LOCK_INIT(kernel_pmap);
2517	kernel_pmap->pm_active = -1;
2518	kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
2519	TAILQ_INIT(&kernel_pmap->pm_pvlist);
2520
2521	/*
2522	 * Reserve some special page table entries/VA space for temporary
2523	 * mapping of pages.
2524	 */
2525#define SYSMAP(c, p, v, n)						\
2526    v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
2527
2528	pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
2529	pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
2530	pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
2531	pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
2532	size = ((lastaddr - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE;
2533	pmap_alloc_specials(&virtual_avail,
2534	    round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
2535	    &pmap_kernel_l2ptp_kva, NULL);
2536
2537	size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
2538	pmap_alloc_specials(&virtual_avail,
2539	    round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
2540	    &pmap_kernel_l2dtable_kva, NULL);
2541
2542	pmap_alloc_specials(&virtual_avail,
2543	    1, (vm_offset_t*)&_tmppt, NULL);
2544	SLIST_INIT(&l1_list);
2545	TAILQ_INIT(&l1_lru_list);
2546	mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF);
2547	pmap_init_l1(l1, kernel_l1pt);
2548	cpu_dcache_wbinv_all();
2549
2550	virtual_avail = round_page(virtual_avail);
2551	virtual_end = lastaddr;
2552	kernel_vm_end = pmap_curmaxkvaddr;
2553	arm_nocache_startaddr = lastaddr;
2554	mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
2555
2556#ifdef ARM_USE_SMALL_ALLOC
2557	mtx_init(&smallalloc_mtx, "Small alloc page list", NULL, MTX_DEF);
2558	arm_init_smallalloc();
2559#endif
2560	pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
2561}
2562
2563/***************************************************
2564 * Pmap allocation/deallocation routines.
2565 ***************************************************/
2566
2567/*
2568 * Release any resources held by the given physical map.
2569 * Called when a pmap initialized by pmap_pinit is being released.
2570 * Should only be called if the map contains no valid mappings.
2571 */
2572void
2573pmap_release(pmap_t pmap)
2574{
2575	struct pcb *pcb;
2576
2577	pmap_idcache_wbinv_all(pmap);
2578	pmap_tlb_flushID(pmap);
2579	cpu_cpwait();
2580	if (vector_page < KERNBASE) {
2581		struct pcb *curpcb = PCPU_GET(curpcb);
2582		pcb = thread0.td_pcb;
2583		if (pmap_is_current(pmap)) {
2584			/*
2585 			 * Frob the L1 entry corresponding to the vector
2586			 * page so that it contains the kernel pmap's domain
2587			 * number. This will ensure pmap_remove() does not
2588			 * pull the current vector page out from under us.
2589			 */
2590			critical_enter();
2591			*pcb->pcb_pl1vec = pcb->pcb_l1vec;
2592			cpu_domains(pcb->pcb_dacr);
2593			cpu_setttb(pcb->pcb_pagedir);
2594			critical_exit();
2595		}
2596		pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE);
2597		/*
2598		 * Make sure cpu_switch(), et al, DTRT. This is safe to do
2599		 * since this process has no remaining mappings of its own.
2600		 */
2601		curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
2602		curpcb->pcb_l1vec = pcb->pcb_l1vec;
2603		curpcb->pcb_dacr = pcb->pcb_dacr;
2604		curpcb->pcb_pagedir = pcb->pcb_pagedir;
2605
2606	}
2607	pmap_free_l1(pmap);
2608	PMAP_LOCK_DESTROY(pmap);
2609
2610	dprintf("pmap_release()\n");
2611}
2612
2613
2614
2615/*
2616 * Helper function for pmap_grow_l2_bucket()
2617 */
2618static __inline int
2619pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap)
2620{
2621	struct l2_bucket *l2b;
2622	pt_entry_t *ptep;
2623	vm_paddr_t pa;
2624	struct vm_page *pg;
2625
2626	pg = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
2627	if (pg == NULL)
2628		return (1);
2629	pa = VM_PAGE_TO_PHYS(pg);
2630
2631	if (pap)
2632		*pap = pa;
2633
2634	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2635
2636	ptep = &l2b->l2b_kva[l2pte_index(va)];
2637	*ptep = L2_S_PROTO | pa | cache_mode |
2638	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE);
2639	PTE_SYNC(ptep);
2640	return (0);
2641}
2642
2643/*
2644 * This is the same as pmap_alloc_l2_bucket(), except that it is only
2645 * used by pmap_growkernel().
2646 */
2647static __inline struct l2_bucket *
2648pmap_grow_l2_bucket(pmap_t pm, vm_offset_t va)
2649{
2650	struct l2_dtable *l2;
2651	struct l2_bucket *l2b;
2652	struct l1_ttable *l1;
2653	pd_entry_t *pl1pd;
2654	u_short l1idx;
2655	vm_offset_t nva;
2656
2657	l1idx = L1_IDX(va);
2658
2659	if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
2660		/*
2661		 * No mapping at this address, as there is
2662		 * no entry in the L1 table.
2663		 * Need to allocate a new l2_dtable.
2664		 */
2665		nva = pmap_kernel_l2dtable_kva;
2666		if ((nva & PAGE_MASK) == 0) {
2667			/*
2668			 * Need to allocate a backing page
2669			 */
2670			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2671				return (NULL);
2672		}
2673
2674		l2 = (struct l2_dtable *)nva;
2675		nva += sizeof(struct l2_dtable);
2676
2677		if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva &
2678		    PAGE_MASK)) {
2679			/*
2680			 * The new l2_dtable straddles a page boundary.
2681			 * Map in another page to cover it.
2682			 */
2683			if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2684				return (NULL);
2685		}
2686
2687		pmap_kernel_l2dtable_kva = nva;
2688
2689		/*
2690		 * Link it into the parent pmap
2691		 */
2692		pm->pm_l2[L2_IDX(l1idx)] = l2;
2693		memset(l2, 0, sizeof(*l2));
2694	}
2695
2696	l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2697
2698	/*
2699	 * Fetch pointer to the L2 page table associated with the address.
2700	 */
2701	if (l2b->l2b_kva == NULL) {
2702		pt_entry_t *ptep;
2703
2704		/*
2705		 * No L2 page table has been allocated. Chances are, this
2706		 * is because we just allocated the l2_dtable, above.
2707		 */
2708		nva = pmap_kernel_l2ptp_kva;
2709		ptep = (pt_entry_t *)nva;
2710		if ((nva & PAGE_MASK) == 0) {
2711			/*
2712			 * Need to allocate a backing page
2713			 */
2714			if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
2715			    &pmap_kernel_l2ptp_phys))
2716				return (NULL);
2717			PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
2718		}
2719		memset(ptep, 0, L2_TABLE_SIZE_REAL);
2720		l2->l2_occupancy++;
2721		l2b->l2b_kva = ptep;
2722		l2b->l2b_l1idx = l1idx;
2723		l2b->l2b_phys = pmap_kernel_l2ptp_phys;
2724
2725		pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
2726		pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
2727	}
2728
2729	/* Distribute new L1 entry to all other L1s */
2730	SLIST_FOREACH(l1, &l1_list, l1_link) {
2731			pl1pd = &l1->l1_kva[L1_IDX(va)];
2732			*pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) |
2733			    L1_C_PROTO;
2734			PTE_SYNC(pl1pd);
2735	}
2736
2737	return (l2b);
2738}
2739
2740
2741/*
2742 * grow the number of kernel page table entries, if needed
2743 */
2744void
2745pmap_growkernel(vm_offset_t addr)
2746{
2747	pmap_t kpm = pmap_kernel();
2748
2749	if (addr <= pmap_curmaxkvaddr)
2750		return;		/* we are OK */
2751
2752	/*
2753	 * whoops!   we need to add kernel PTPs
2754	 */
2755
2756	/* Map 1MB at a time */
2757	for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE)
2758		pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
2759
2760	/*
2761	 * flush out the cache, expensive but growkernel will happen so
2762	 * rarely
2763	 */
2764	cpu_dcache_wbinv_all();
2765	cpu_tlb_flushD();
2766	cpu_cpwait();
2767	kernel_vm_end = pmap_curmaxkvaddr;
2768
2769}
2770
2771
2772/*
2773 * Remove all pages from specified address space
2774 * this aids process exit speeds.  Also, this code
2775 * is special cased for current process only, but
2776 * can have the more generic (and slightly slower)
2777 * mode enabled.  This is much faster than pmap_remove
2778 * in the case of running down an entire address space.
2779 */
2780void
2781pmap_remove_pages(pmap_t pmap)
2782{
2783	struct pv_entry *pv, *npv;
2784	struct l2_bucket *l2b = NULL;
2785	vm_page_t m;
2786	pt_entry_t *pt;
2787
2788	vm_page_lock_queues();
2789	PMAP_LOCK(pmap);
2790	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2791		if (pv->pv_flags & PVF_WIRED) {
2792			/* The page is wired, cannot remove it now. */
2793			npv = TAILQ_NEXT(pv, pv_plist);
2794			continue;
2795		}
2796		pmap->pm_stats.resident_count--;
2797		l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
2798		KASSERT(l2b != NULL, ("No L2 bucket in pmap_remove_pages"));
2799		pt = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
2800		m = PHYS_TO_VM_PAGE(*pt & L2_ADDR_MASK);
2801#ifdef ARM_USE_SMALL_ALLOC
2802		KASSERT((vm_offset_t)m >= alloc_firstaddr, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
2803#else
2804		KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
2805#endif
2806		*pt = 0;
2807		PTE_SYNC(pt);
2808		npv = TAILQ_NEXT(pv, pv_plist);
2809		pmap_nuke_pv(m, pmap, pv);
2810		if (TAILQ_EMPTY(&m->md.pv_list))
2811			vm_page_flag_clear(m, PG_WRITEABLE);
2812		pmap_free_pv_entry(pv);
2813		pmap_free_l2_bucket(pmap, l2b, 1);
2814	}
2815	vm_page_unlock_queues();
2816	cpu_idcache_wbinv_all();
2817	cpu_tlb_flushID();
2818	cpu_cpwait();
2819	PMAP_UNLOCK(pmap);
2820}
2821
2822
2823/***************************************************
2824 * Low level mapping routines.....
2825 ***************************************************/
2826
2827#ifdef ARM_HAVE_SUPERSECTIONS
2828/* Map a super section into the KVA. */
2829
2830void
2831pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags)
2832{
2833	pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) |
2834	    (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL,
2835	    VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
2836	struct l1_ttable *l1;
2837	vm_offset_t va0, va_end;
2838
2839	KASSERT(((va | pa) & L1_SUP_OFFSET) == 0,
2840	    ("Not a valid super section mapping"));
2841	if (flags & SECTION_CACHE)
2842		pd |= pte_l1_s_cache_mode;
2843	else if (flags & SECTION_PT)
2844		pd |= pte_l1_s_cache_mode_pt;
2845	va0 = va & L1_SUP_FRAME;
2846	va_end = va + L1_SUP_SIZE;
2847	SLIST_FOREACH(l1, &l1_list, l1_link) {
2848		va = va0;
2849		for (; va < va_end; va += L1_S_SIZE) {
2850			l1->l1_kva[L1_IDX(va)] = pd;
2851			PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
2852		}
2853	}
2854}
2855#endif
2856
2857/* Map a section into the KVA. */
2858
2859void
2860pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags)
2861{
2862	pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL,
2863	    VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
2864	struct l1_ttable *l1;
2865
2866	KASSERT(((va | pa) & L1_S_OFFSET) == 0,
2867	    ("Not a valid section mapping"));
2868	if (flags & SECTION_CACHE)
2869		pd |= pte_l1_s_cache_mode;
2870	else if (flags & SECTION_PT)
2871		pd |= pte_l1_s_cache_mode_pt;
2872	SLIST_FOREACH(l1, &l1_list, l1_link) {
2873		l1->l1_kva[L1_IDX(va)] = pd;
2874		PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
2875	}
2876}
2877
2878/*
2879 * add a wired page to the kva
2880 * note that in order for the mapping to take effect -- you
2881 * should do a invltlb after doing the pmap_kenter...
2882 */
2883static PMAP_INLINE void
2884pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags)
2885{
2886	struct l2_bucket *l2b;
2887	pt_entry_t *pte;
2888	pt_entry_t opte;
2889	PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n",
2890	    (uint32_t) va, (uint32_t) pa));
2891
2892
2893	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2894	if (l2b == NULL)
2895		l2b = pmap_grow_l2_bucket(pmap_kernel(), va);
2896	KASSERT(l2b != NULL, ("No L2 Bucket"));
2897	pte = &l2b->l2b_kva[l2pte_index(va)];
2898	opte = *pte;
2899	PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
2900	    (uint32_t) pte, opte, *pte));
2901	if (l2pte_valid(opte)) {
2902		cpu_dcache_wbinv_range(va, PAGE_SIZE);
2903		cpu_tlb_flushD_SE(va);
2904		cpu_cpwait();
2905	} else {
2906		if (opte == 0)
2907			l2b->l2b_occupancy++;
2908	}
2909	*pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL,
2910	    VM_PROT_READ | VM_PROT_WRITE);
2911	if (flags & KENTER_CACHE)
2912		*pte |= pte_l2_s_cache_mode;
2913	if (flags & KENTER_USER)
2914		*pte |= L2_S_PROT_U;
2915	PTE_SYNC(pte);
2916}
2917
2918void
2919pmap_kenter(vm_offset_t va, vm_paddr_t pa)
2920{
2921	pmap_kenter_internal(va, pa, KENTER_CACHE);
2922}
2923
2924void
2925pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa)
2926{
2927
2928	pmap_kenter_internal(va, pa, 0);
2929}
2930
2931void
2932pmap_kenter_user(vm_offset_t va, vm_paddr_t pa)
2933{
2934
2935	pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER);
2936	/*
2937	 * Call pmap_fault_fixup now, to make sure we'll have no exception
2938	 * at the first use of the new address, or bad things will happen,
2939	 * as we use one of these addresses in the exception handlers.
2940	 */
2941	pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1);
2942}
2943
2944/*
2945 * remove a page rom the kernel pagetables
2946 */
2947void
2948pmap_kremove(vm_offset_t va)
2949{
2950	struct l2_bucket *l2b;
2951	pt_entry_t *pte, opte;
2952
2953	l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2954	if (!l2b)
2955		return;
2956	KASSERT(l2b != NULL, ("No L2 Bucket"));
2957	pte = &l2b->l2b_kva[l2pte_index(va)];
2958	opte = *pte;
2959	if (l2pte_valid(opte)) {
2960		cpu_dcache_wbinv_range(va, PAGE_SIZE);
2961		cpu_tlb_flushD_SE(va);
2962		cpu_cpwait();
2963		*pte = 0;
2964	}
2965}
2966
2967
2968/*
2969 *	Used to map a range of physical addresses into kernel
2970 *	virtual address space.
2971 *
2972 *	The value passed in '*virt' is a suggested virtual address for
2973 *	the mapping. Architectures which can support a direct-mapped
2974 *	physical to virtual region can return the appropriate address
2975 *	within that region, leaving '*virt' unchanged. Other
2976 *	architectures should map the pages starting at '*virt' and
2977 *	update '*virt' with the first usable address after the mapped
2978 *	region.
2979 */
2980vm_offset_t
2981pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
2982{
2983#ifdef ARM_USE_SMALL_ALLOC
2984	return (arm_ptovirt(start));
2985#else
2986	vm_offset_t sva = *virt;
2987	vm_offset_t va = sva;
2988
2989	PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, "
2990	    "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end,
2991	    prot));
2992
2993	while (start < end) {
2994		pmap_kenter(va, start);
2995		va += PAGE_SIZE;
2996		start += PAGE_SIZE;
2997	}
2998	*virt = va;
2999	return (sva);
3000#endif
3001}
3002
3003static void
3004pmap_wb_page(vm_page_t m)
3005{
3006	struct pv_entry *pv;
3007
3008	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
3009	    pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, FALSE,
3010		(pv->pv_flags & PVF_WRITE) == 0);
3011}
3012
3013static void
3014pmap_inv_page(vm_page_t m)
3015{
3016	struct pv_entry *pv;
3017
3018	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
3019	    pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, TRUE, TRUE);
3020}
3021/*
3022 * Add a list of wired pages to the kva
3023 * this routine is only used for temporary
3024 * kernel mappings that do not need to have
3025 * page modification or references recorded.
3026 * Note that old mappings are simply written
3027 * over.  The page *must* be wired.
3028 */
3029void
3030pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
3031{
3032	int i;
3033
3034	for (i = 0; i < count; i++) {
3035		pmap_wb_page(m[i]);
3036		pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]),
3037		    KENTER_CACHE);
3038		va += PAGE_SIZE;
3039	}
3040}
3041
3042
3043/*
3044 * this routine jerks page mappings from the
3045 * kernel -- it is meant only for temporary mappings.
3046 */
3047void
3048pmap_qremove(vm_offset_t va, int count)
3049{
3050	vm_paddr_t pa;
3051	int i;
3052
3053	for (i = 0; i < count; i++) {
3054		pa = vtophys(va);
3055		if (pa) {
3056			pmap_inv_page(PHYS_TO_VM_PAGE(pa));
3057			pmap_kremove(va);
3058		}
3059		va += PAGE_SIZE;
3060	}
3061}
3062
3063
3064/*
3065 * pmap_object_init_pt preloads the ptes for a given object
3066 * into the specified pmap.  This eliminates the blast of soft
3067 * faults on process startup and immediately after an mmap.
3068 */
3069void
3070pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
3071    vm_pindex_t pindex, vm_size_t size)
3072{
3073
3074	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
3075	KASSERT(object->type == OBJT_DEVICE,
3076	    ("pmap_object_init_pt: non-device object"));
3077}
3078
3079
3080/*
3081 *	pmap_is_prefaultable:
3082 *
3083 *	Return whether or not the specified virtual address is elgible
3084 *	for prefault.
3085 */
3086boolean_t
3087pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
3088{
3089	pd_entry_t *pde;
3090	pt_entry_t *pte;
3091
3092	if (!pmap_get_pde_pte(pmap, addr, &pde, &pte))
3093		return (FALSE);
3094	KASSERT(pte != NULL, ("Valid mapping but no pte ?"));
3095	if (*pte == 0)
3096		return (TRUE);
3097	return (FALSE);
3098}
3099
3100/*
3101 * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
3102 * Returns TRUE if the mapping exists, else FALSE.
3103 *
3104 * NOTE: This function is only used by a couple of arm-specific modules.
3105 * It is not safe to take any pmap locks here, since we could be right
3106 * in the middle of debugging the pmap anyway...
3107 *
3108 * It is possible for this routine to return FALSE even though a valid
3109 * mapping does exist. This is because we don't lock, so the metadata
3110 * state may be inconsistent.
3111 *
3112 * NOTE: We can return a NULL *ptp in the case where the L1 pde is
3113 * a "section" mapping.
3114 */
3115boolean_t
3116pmap_get_pde_pte(pmap_t pm, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp)
3117{
3118	struct l2_dtable *l2;
3119	pd_entry_t *pl1pd, l1pd;
3120	pt_entry_t *ptep;
3121	u_short l1idx;
3122
3123	if (pm->pm_l1 == NULL)
3124		return (FALSE);
3125
3126	l1idx = L1_IDX(va);
3127	*pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
3128	l1pd = *pl1pd;
3129
3130	if (l1pte_section_p(l1pd)) {
3131		*ptp = NULL;
3132		return (TRUE);
3133	}
3134
3135	if (pm->pm_l2 == NULL)
3136		return (FALSE);
3137
3138	l2 = pm->pm_l2[L2_IDX(l1idx)];
3139
3140	if (l2 == NULL ||
3141	    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3142		return (FALSE);
3143	}
3144
3145	*ptp = &ptep[l2pte_index(va)];
3146	return (TRUE);
3147}
3148
3149/*
3150 *      Routine:        pmap_remove_all
3151 *      Function:
3152 *              Removes this physical page from
3153 *              all physical maps in which it resides.
3154 *              Reflects back modify bits to the pager.
3155 *
3156 *      Notes:
3157 *              Original versions of this routine were very
3158 *              inefficient because they iteratively called
3159 *              pmap_remove (slow...)
3160 */
3161void
3162pmap_remove_all(vm_page_t m)
3163{
3164	pv_entry_t pv;
3165	pt_entry_t *ptep, pte;
3166	struct l2_bucket *l2b;
3167	boolean_t flush = FALSE;
3168	pmap_t curpm;
3169	int flags = 0;
3170
3171#if defined(PMAP_DEBUG)
3172	/*
3173	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
3174	 */
3175	if (m->flags & PG_FICTITIOUS) {
3176		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
3177	}
3178#endif
3179
3180	if (TAILQ_EMPTY(&m->md.pv_list))
3181		return;
3182	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
3183	curpm = vmspace_pmap(curproc->p_vmspace);
3184	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
3185		if (flush == FALSE && (pv->pv_pmap == curpm ||
3186		    pv->pv_pmap == pmap_kernel()))
3187			flush = TRUE;
3188		PMAP_LOCK(pv->pv_pmap);
3189		l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
3190		KASSERT(l2b != NULL, ("No l2 bucket"));
3191		ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
3192		pte = *ptep;
3193		*ptep = 0;
3194		PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
3195		pmap_free_l2_bucket(pv->pv_pmap, l2b, 1);
3196		if (pv->pv_flags & PVF_WIRED)
3197			pv->pv_pmap->pm_stats.wired_count--;
3198		pv->pv_pmap->pm_stats.resident_count--;
3199		flags |= pv->pv_flags;
3200		pmap_nuke_pv(m, pv->pv_pmap, pv);
3201		PMAP_UNLOCK(pv->pv_pmap);
3202		pmap_free_pv_entry(pv);
3203	}
3204
3205	if (flush) {
3206		if (PV_BEEN_EXECD(flags))
3207			pmap_tlb_flushID(curpm);
3208		else
3209			pmap_tlb_flushD(curpm);
3210	}
3211	vm_page_flag_clear(m, PG_WRITEABLE);
3212}
3213
3214
3215/*
3216 *	Set the physical protection on the
3217 *	specified range of this map as requested.
3218 */
3219void
3220pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
3221{
3222	struct l2_bucket *l2b;
3223	pt_entry_t *ptep, pte;
3224	vm_offset_t next_bucket;
3225	u_int flags;
3226	int flush;
3227
3228	if ((prot & VM_PROT_READ) == 0) {
3229		pmap_remove(pm, sva, eva);
3230		return;
3231	}
3232
3233	if (prot & VM_PROT_WRITE) {
3234		/*
3235		 * If this is a read->write transition, just ignore it and let
3236		 * vm_fault() take care of it later.
3237		 */
3238		return;
3239	}
3240
3241	vm_page_lock_queues();
3242	PMAP_LOCK(pm);
3243
3244	/*
3245	 * OK, at this point, we know we're doing write-protect operation.
3246	 * If the pmap is active, write-back the range.
3247	 */
3248	pmap_dcache_wb_range(pm, sva, eva - sva, FALSE, FALSE);
3249
3250	flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1;
3251	flags = 0;
3252
3253	while (sva < eva) {
3254		next_bucket = L2_NEXT_BUCKET(sva);
3255		if (next_bucket > eva)
3256			next_bucket = eva;
3257
3258		l2b = pmap_get_l2_bucket(pm, sva);
3259		if (l2b == NULL) {
3260			sva = next_bucket;
3261			continue;
3262		}
3263
3264		ptep = &l2b->l2b_kva[l2pte_index(sva)];
3265
3266		while (sva < next_bucket) {
3267			if ((pte = *ptep) != 0 && (pte & L2_S_PROT_W) != 0) {
3268				struct vm_page *pg;
3269				u_int f;
3270
3271				pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
3272				pte &= ~L2_S_PROT_W;
3273				*ptep = pte;
3274				PTE_SYNC(ptep);
3275
3276				if (pg != NULL) {
3277					f = pmap_modify_pv(pg, pm, sva,
3278					    PVF_WRITE, 0);
3279					vm_page_dirty(pg);
3280				} else
3281					f = PVF_REF | PVF_EXEC;
3282
3283				if (flush >= 0) {
3284					flush++;
3285					flags |= f;
3286				} else
3287				if (PV_BEEN_EXECD(f))
3288					pmap_tlb_flushID_SE(pm, sva);
3289				else
3290				if (PV_BEEN_REFD(f))
3291					pmap_tlb_flushD_SE(pm, sva);
3292			}
3293
3294			sva += PAGE_SIZE;
3295			ptep++;
3296		}
3297	}
3298
3299
3300	if (flush) {
3301		if (PV_BEEN_EXECD(flags))
3302			pmap_tlb_flushID(pm);
3303		else
3304		if (PV_BEEN_REFD(flags))
3305			pmap_tlb_flushD(pm);
3306	}
3307	vm_page_unlock_queues();
3308
3309 	PMAP_UNLOCK(pm);
3310}
3311
3312
3313/*
3314 *	Insert the given physical page (p) at
3315 *	the specified virtual address (v) in the
3316 *	target physical map with the protection requested.
3317 *
3318 *	If specified, the page will be wired down, meaning
3319 *	that the related pte can not be reclaimed.
3320 *
3321 *	NB:  This is the only routine which MAY NOT lazy-evaluate
3322 *	or lose information.  That is, this routine must actually
3323 *	insert this page into the given map NOW.
3324 */
3325
3326void
3327pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
3328    vm_prot_t prot, boolean_t wired)
3329{
3330
3331	vm_page_lock_queues();
3332	PMAP_LOCK(pmap);
3333	pmap_enter_locked(pmap, va, m, prot, wired, M_WAITOK);
3334	vm_page_unlock_queues();
3335 	PMAP_UNLOCK(pmap);
3336}
3337
3338/*
3339 *	The page queues and pmap must be locked.
3340 */
3341static void
3342pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
3343    boolean_t wired, int flags)
3344{
3345	struct l2_bucket *l2b = NULL;
3346	struct vm_page *opg;
3347	struct pv_entry *pve = NULL;
3348	pt_entry_t *ptep, npte, opte;
3349	u_int nflags;
3350	u_int oflags;
3351	vm_paddr_t pa;
3352
3353	PMAP_ASSERT_LOCKED(pmap);
3354	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
3355	if (va == vector_page) {
3356		pa = systempage.pv_pa;
3357		m = NULL;
3358	} else
3359		pa = VM_PAGE_TO_PHYS(m);
3360	nflags = 0;
3361	if (prot & VM_PROT_WRITE)
3362		nflags |= PVF_WRITE;
3363	if (prot & VM_PROT_EXECUTE)
3364		nflags |= PVF_EXEC;
3365	if (wired)
3366		nflags |= PVF_WIRED;
3367	PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, prot = %x, "
3368	    "wired = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, wired));
3369
3370	if (pmap == pmap_kernel()) {
3371		l2b = pmap_get_l2_bucket(pmap, va);
3372		if (l2b == NULL)
3373			l2b = pmap_grow_l2_bucket(pmap, va);
3374	} else {
3375do_l2b_alloc:
3376		l2b = pmap_alloc_l2_bucket(pmap, va);
3377		if (l2b == NULL) {
3378			if (flags & M_WAITOK) {
3379				PMAP_UNLOCK(pmap);
3380				vm_page_unlock_queues();
3381				VM_WAIT;
3382				vm_page_lock_queues();
3383				PMAP_LOCK(pmap);
3384				goto do_l2b_alloc;
3385			}
3386			return;
3387		}
3388	}
3389
3390	ptep = &l2b->l2b_kva[l2pte_index(va)];
3391
3392	opte = *ptep;
3393	npte = pa;
3394	oflags = 0;
3395	if (opte) {
3396		/*
3397		 * There is already a mapping at this address.
3398		 * If the physical address is different, lookup the
3399		 * vm_page.
3400		 */
3401		if (l2pte_pa(opte) != pa)
3402			opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
3403		else
3404			opg = m;
3405	} else
3406		opg = NULL;
3407
3408	if ((prot & (VM_PROT_ALL)) ||
3409	    (!m || m->md.pvh_attrs & PVF_REF)) {
3410		/*
3411		 * - The access type indicates that we don't need
3412		 *   to do referenced emulation.
3413		 * OR
3414		 * - The physical page has already been referenced
3415		 *   so no need to re-do referenced emulation here.
3416		 */
3417		npte |= L2_S_PROTO;
3418
3419		nflags |= PVF_REF;
3420
3421		if (m && ((prot & VM_PROT_WRITE) != 0 ||
3422		    (m->md.pvh_attrs & PVF_MOD))) {
3423			/*
3424			 * This is a writable mapping, and the
3425			 * page's mod state indicates it has
3426			 * already been modified. Make it
3427			 * writable from the outset.
3428			 */
3429			nflags |= PVF_MOD;
3430			if (!(m->md.pvh_attrs & PVF_MOD))
3431				vm_page_dirty(m);
3432		}
3433		if (m && opte)
3434			vm_page_flag_set(m, PG_REFERENCED);
3435	} else {
3436		/*
3437		 * Need to do page referenced emulation.
3438		 */
3439		npte |= L2_TYPE_INV;
3440	}
3441
3442	if (prot & VM_PROT_WRITE) {
3443		npte |= L2_S_PROT_W;
3444		if (m != NULL)
3445			vm_page_flag_set(m, PG_WRITEABLE);
3446	}
3447	npte |= pte_l2_s_cache_mode;
3448	if (m && m == opg) {
3449		/*
3450		 * We're changing the attrs of an existing mapping.
3451		 */
3452		oflags = pmap_modify_pv(m, pmap, va,
3453		    PVF_WRITE | PVF_EXEC | PVF_WIRED |
3454		    PVF_MOD | PVF_REF, nflags);
3455
3456		/*
3457		 * We may need to flush the cache if we're
3458		 * doing rw-ro...
3459		 */
3460		if (pmap_is_current(pmap) &&
3461		    (oflags & PVF_NC) == 0 &&
3462			    (opte & L2_S_PROT_W) != 0 &&
3463			    (prot & VM_PROT_WRITE) == 0)
3464			cpu_dcache_wb_range(va, PAGE_SIZE);
3465	} else {
3466		/*
3467		 * New mapping, or changing the backing page
3468		 * of an existing mapping.
3469		 */
3470		if (opg) {
3471			/*
3472			 * Replacing an existing mapping with a new one.
3473			 * It is part of our managed memory so we
3474			 * must remove it from the PV list
3475			 */
3476			pve = pmap_remove_pv(opg, pmap, va);
3477			if (m && (m->flags & (PG_UNMANAGED | PG_FICTITIOUS)) &&
3478			    pve)
3479				pmap_free_pv_entry(pve);
3480			else if (!pve &&
3481			    !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))
3482				pve = pmap_get_pv_entry();
3483			KASSERT(pve != NULL || m->flags & (PG_UNMANAGED |
3484			    PG_FICTITIOUS), ("No pv"));
3485			oflags = pve->pv_flags;
3486
3487			/*
3488			 * If the old mapping was valid (ref/mod
3489			 * emulation creates 'invalid' mappings
3490			 * initially) then make sure to frob
3491			 * the cache.
3492			 */
3493			if ((oflags & PVF_NC) == 0 &&
3494			    l2pte_valid(opte)) {
3495				if (PV_BEEN_EXECD(oflags)) {
3496					pmap_idcache_wbinv_range(pmap, va,
3497					    PAGE_SIZE);
3498				} else
3499					if (PV_BEEN_REFD(oflags)) {
3500						pmap_dcache_wb_range(pmap, va,
3501						    PAGE_SIZE, TRUE,
3502						    (oflags & PVF_WRITE) == 0);
3503					}
3504			}
3505		} else if (m && !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))
3506			if ((pve = pmap_get_pv_entry()) == NULL) {
3507				panic("pmap_enter: no pv entries");
3508			}
3509		if (m && !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS))) {
3510			KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
3511			    ("pmap_enter: managed mapping within the clean submap"));
3512			pmap_enter_pv(m, pve, pmap, va, nflags);
3513		}
3514	}
3515	/*
3516	 * Make sure userland mappings get the right permissions
3517	 */
3518	if (pmap != pmap_kernel() && va != vector_page) {
3519		npte |= L2_S_PROT_U;
3520	}
3521
3522	/*
3523	 * Keep the stats up to date
3524	 */
3525	if (opte == 0) {
3526		l2b->l2b_occupancy++;
3527		pmap->pm_stats.resident_count++;
3528	}
3529
3530
3531	/*
3532	 * If this is just a wiring change, the two PTEs will be
3533	 * identical, so there's no need to update the page table.
3534	 */
3535	if (npte != opte) {
3536		boolean_t is_cached = pmap_is_current(pmap);
3537
3538		*ptep = npte;
3539		if (is_cached) {
3540			/*
3541			 * We only need to frob the cache/tlb if this pmap
3542			 * is current
3543			 */
3544			PTE_SYNC(ptep);
3545			if (L1_IDX(va) != L1_IDX(vector_page) &&
3546			    l2pte_valid(npte)) {
3547				/*
3548				 * This mapping is likely to be accessed as
3549				 * soon as we return to userland. Fix up the
3550				 * L1 entry to avoid taking another
3551				 * page/domain fault.
3552				 */
3553				pd_entry_t *pl1pd, l1pd;
3554
3555				pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
3556				l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) |
3557				    L1_C_PROTO;
3558				if (*pl1pd != l1pd) {
3559					*pl1pd = l1pd;
3560					PTE_SYNC(pl1pd);
3561				}
3562			}
3563		}
3564
3565		if (PV_BEEN_EXECD(oflags))
3566			pmap_tlb_flushID_SE(pmap, va);
3567		else if (PV_BEEN_REFD(oflags))
3568			pmap_tlb_flushD_SE(pmap, va);
3569
3570
3571		if (m)
3572			pmap_vac_me_harder(m, pmap, va);
3573	}
3574}
3575
3576/*
3577 * Maps a sequence of resident pages belonging to the same object.
3578 * The sequence begins with the given page m_start.  This page is
3579 * mapped at the given virtual address start.  Each subsequent page is
3580 * mapped at a virtual address that is offset from start by the same
3581 * amount as the page is offset from m_start within the object.  The
3582 * last page in the sequence is the page with the largest offset from
3583 * m_start that can be mapped at a virtual address less than the given
3584 * virtual address end.  Not every virtual page between start and end
3585 * is mapped; only those for which a resident page exists with the
3586 * corresponding offset from m_start are mapped.
3587 */
3588void
3589pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
3590    vm_page_t m_start, vm_prot_t prot)
3591{
3592	vm_page_t m;
3593	vm_pindex_t diff, psize;
3594
3595	psize = atop(end - start);
3596	m = m_start;
3597	PMAP_LOCK(pmap);
3598	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
3599		pmap_enter_locked(pmap, start + ptoa(diff), m, prot &
3600		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE, M_NOWAIT);
3601		m = TAILQ_NEXT(m, listq);
3602	}
3603 	PMAP_UNLOCK(pmap);
3604}
3605
3606/*
3607 * this code makes some *MAJOR* assumptions:
3608 * 1. Current pmap & pmap exists.
3609 * 2. Not wired.
3610 * 3. Read access.
3611 * 4. No page table pages.
3612 * but is *MUCH* faster than pmap_enter...
3613 */
3614
3615void
3616pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
3617{
3618
3619 	PMAP_LOCK(pmap);
3620	pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
3621	    FALSE, M_NOWAIT);
3622 	PMAP_UNLOCK(pmap);
3623}
3624
3625/*
3626 *	Routine:	pmap_change_wiring
3627 *	Function:	Change the wiring attribute for a map/virtual-address
3628 *			pair.
3629 *	In/out conditions:
3630 *			The mapping must already exist in the pmap.
3631 */
3632void
3633pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
3634{
3635	struct l2_bucket *l2b;
3636	pt_entry_t *ptep, pte;
3637	vm_page_t pg;
3638
3639	vm_page_lock_queues();
3640 	PMAP_LOCK(pmap);
3641	l2b = pmap_get_l2_bucket(pmap, va);
3642	KASSERT(l2b, ("No l2b bucket in pmap_change_wiring"));
3643	ptep = &l2b->l2b_kva[l2pte_index(va)];
3644	pte = *ptep;
3645	pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
3646	if (pg)
3647		pmap_modify_pv(pg, pmap, va, PVF_WIRED, wired);
3648	vm_page_unlock_queues();
3649 	PMAP_UNLOCK(pmap);
3650}
3651
3652
3653/*
3654 *	Copy the range specified by src_addr/len
3655 *	from the source map to the range dst_addr/len
3656 *	in the destination map.
3657 *
3658 *	This routine is only advisory and need not do anything.
3659 */
3660void
3661pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
3662    vm_size_t len, vm_offset_t src_addr)
3663{
3664}
3665
3666
3667/*
3668 *	Routine:	pmap_extract
3669 *	Function:
3670 *		Extract the physical page address associated
3671 *		with the given map/virtual_address pair.
3672 */
3673vm_paddr_t
3674pmap_extract(pmap_t pm, vm_offset_t va)
3675{
3676	struct l2_dtable *l2;
3677	pd_entry_t l1pd;
3678	pt_entry_t *ptep, pte;
3679	vm_paddr_t pa;
3680	u_int l1idx;
3681	l1idx = L1_IDX(va);
3682
3683	PMAP_LOCK(pm);
3684	l1pd = pm->pm_l1->l1_kva[l1idx];
3685	if (l1pte_section_p(l1pd)) {
3686		/*
3687		 * These should only happen for pmap_kernel()
3688		 */
3689		KASSERT(pm == pmap_kernel(), ("huh"));
3690		/* XXX: what to do about the bits > 32 ? */
3691		if (l1pd & L1_S_SUPERSEC)
3692			pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
3693		else
3694			pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
3695	} else {
3696		/*
3697		 * Note that we can't rely on the validity of the L1
3698		 * descriptor as an indication that a mapping exists.
3699		 * We have to look it up in the L2 dtable.
3700		 */
3701		l2 = pm->pm_l2[L2_IDX(l1idx)];
3702
3703		if (l2 == NULL ||
3704		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3705			PMAP_UNLOCK(pm);
3706			return (0);
3707		}
3708
3709		ptep = &ptep[l2pte_index(va)];
3710		pte = *ptep;
3711
3712		if (pte == 0) {
3713			PMAP_UNLOCK(pm);
3714			return (0);
3715		}
3716
3717		switch (pte & L2_TYPE_MASK) {
3718		case L2_TYPE_L:
3719			pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
3720			break;
3721
3722		default:
3723			pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
3724			break;
3725		}
3726	}
3727
3728	PMAP_UNLOCK(pm);
3729	return (pa);
3730}
3731
3732/*
3733 * Atomically extract and hold the physical page with the given
3734 * pmap and virtual address pair if that mapping permits the given
3735 * protection.
3736 *
3737 */
3738vm_page_t
3739pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
3740{
3741	struct l2_dtable *l2;
3742	pd_entry_t l1pd;
3743	pt_entry_t *ptep, pte;
3744	vm_paddr_t pa;
3745	vm_page_t m = NULL;
3746	u_int l1idx;
3747	l1idx = L1_IDX(va);
3748
3749	vm_page_lock_queues();
3750 	PMAP_LOCK(pmap);
3751	l1pd = pmap->pm_l1->l1_kva[l1idx];
3752	if (l1pte_section_p(l1pd)) {
3753		/*
3754		 * These should only happen for pmap_kernel()
3755		 */
3756		KASSERT(pmap == pmap_kernel(), ("huh"));
3757		/* XXX: what to do about the bits > 32 ? */
3758		if (l1pd & L1_S_SUPERSEC)
3759			pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
3760		else
3761			pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
3762		if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
3763			m = PHYS_TO_VM_PAGE(pa);
3764			vm_page_hold(m);
3765		}
3766
3767	} else {
3768		/*
3769		 * Note that we can't rely on the validity of the L1
3770		 * descriptor as an indication that a mapping exists.
3771		 * We have to look it up in the L2 dtable.
3772		 */
3773		l2 = pmap->pm_l2[L2_IDX(l1idx)];
3774
3775		if (l2 == NULL ||
3776		    (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3777		 	PMAP_UNLOCK(pmap);
3778			vm_page_unlock_queues();
3779			return (NULL);
3780		}
3781
3782		ptep = &ptep[l2pte_index(va)];
3783		pte = *ptep;
3784
3785		if (pte == 0) {
3786		 	PMAP_UNLOCK(pmap);
3787			vm_page_unlock_queues();
3788			return (NULL);
3789		}
3790		if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
3791			switch (pte & L2_TYPE_MASK) {
3792			case L2_TYPE_L:
3793				pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
3794				break;
3795
3796			default:
3797				pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
3798				break;
3799			}
3800			m = PHYS_TO_VM_PAGE(pa);
3801			vm_page_hold(m);
3802		}
3803	}
3804
3805 	PMAP_UNLOCK(pmap);
3806	vm_page_unlock_queues();
3807	return (m);
3808}
3809
3810/*
3811 * Initialize a preallocated and zeroed pmap structure,
3812 * such as one in a vmspace structure.
3813 */
3814
3815int
3816pmap_pinit(pmap_t pmap)
3817{
3818	PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap));
3819
3820	PMAP_LOCK_INIT(pmap);
3821	pmap_alloc_l1(pmap);
3822	bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
3823
3824	pmap->pm_count = 1;
3825	pmap->pm_active = 0;
3826
3827	TAILQ_INIT(&pmap->pm_pvlist);
3828	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
3829	pmap->pm_stats.resident_count = 1;
3830	if (vector_page < KERNBASE) {
3831		pmap_enter(pmap, vector_page,
3832		    VM_PROT_READ, PHYS_TO_VM_PAGE(systempage.pv_pa),
3833		    VM_PROT_READ, 1);
3834	}
3835	return (1);
3836}
3837
3838
3839/***************************************************
3840 * page management routines.
3841 ***************************************************/
3842
3843
3844static void
3845pmap_free_pv_entry(pv_entry_t pv)
3846{
3847	pv_entry_count--;
3848	uma_zfree(pvzone, pv);
3849}
3850
3851
3852/*
3853 * get a new pv_entry, allocating a block from the system
3854 * when needed.
3855 * the memory allocation is performed bypassing the malloc code
3856 * because of the possibility of allocations at interrupt time.
3857 */
3858static pv_entry_t
3859pmap_get_pv_entry(void)
3860{
3861	pv_entry_t ret_value;
3862
3863	pv_entry_count++;
3864	if (pv_entry_count > pv_entry_high_water)
3865		pagedaemon_wakeup();
3866	ret_value = uma_zalloc(pvzone, M_NOWAIT);
3867	return ret_value;
3868}
3869
3870
3871/*
3872 *	Remove the given range of addresses from the specified map.
3873 *
3874 *	It is assumed that the start and end are properly
3875 *	rounded to the page size.
3876 */
3877#define  PMAP_REMOVE_CLEAN_LIST_SIZE     3
3878void
3879pmap_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
3880{
3881	struct l2_bucket *l2b;
3882	vm_offset_t next_bucket;
3883	pt_entry_t *ptep;
3884	u_int cleanlist_idx, total, cnt;
3885	struct {
3886		vm_offset_t va;
3887		pt_entry_t *pte;
3888	} cleanlist[PMAP_REMOVE_CLEAN_LIST_SIZE];
3889	u_int mappings, is_exec, is_refd;
3890	int flushall = 0;
3891
3892
3893	/*
3894	 * we lock in the pmap => pv_head direction
3895	 */
3896
3897	vm_page_lock_queues();
3898	PMAP_LOCK(pm);
3899	if (!pmap_is_current(pm)) {
3900		cleanlist_idx = PMAP_REMOVE_CLEAN_LIST_SIZE + 1;
3901	} else
3902		cleanlist_idx = 0;
3903
3904	total = 0;
3905	while (sva < eva) {
3906		/*
3907		 * Do one L2 bucket's worth at a time.
3908		 */
3909		next_bucket = L2_NEXT_BUCKET(sva);
3910		if (next_bucket > eva)
3911			next_bucket = eva;
3912
3913		l2b = pmap_get_l2_bucket(pm, sva);
3914		if (l2b == NULL) {
3915			sva = next_bucket;
3916			continue;
3917		}
3918
3919		ptep = &l2b->l2b_kva[l2pte_index(sva)];
3920		mappings = 0;
3921
3922		while (sva < next_bucket) {
3923			struct vm_page *pg;
3924			pt_entry_t pte;
3925			vm_paddr_t pa;
3926
3927			pte = *ptep;
3928
3929			if (pte == 0) {
3930				/*
3931				 * Nothing here, move along
3932				 */
3933				sva += PAGE_SIZE;
3934				ptep++;
3935				continue;
3936			}
3937
3938			pm->pm_stats.resident_count--;
3939			pa = l2pte_pa(pte);
3940			is_exec = 0;
3941			is_refd = 1;
3942
3943			/*
3944			 * Update flags. In a number of circumstances,
3945			 * we could cluster a lot of these and do a
3946			 * number of sequential pages in one go.
3947			 */
3948			if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
3949				struct pv_entry *pve;
3950
3951				pve = pmap_remove_pv(pg, pm, sva);
3952				if (pve) {
3953					is_exec = PV_BEEN_EXECD(pve->pv_flags);
3954					is_refd = PV_BEEN_REFD(pve->pv_flags);
3955					pmap_free_pv_entry(pve);
3956				}
3957			}
3958
3959			if (!l2pte_valid(pte)) {
3960				*ptep = 0;
3961				PTE_SYNC_CURRENT(pm, ptep);
3962				sva += PAGE_SIZE;
3963				ptep++;
3964				mappings++;
3965				continue;
3966			}
3967
3968			if (cleanlist_idx < PMAP_REMOVE_CLEAN_LIST_SIZE) {
3969				/* Add to the clean list. */
3970				cleanlist[cleanlist_idx].pte = ptep;
3971				cleanlist[cleanlist_idx].va =
3972				    sva | (is_exec & 1);
3973				cleanlist_idx++;
3974			} else
3975			if (cleanlist_idx == PMAP_REMOVE_CLEAN_LIST_SIZE) {
3976				/* Nuke everything if needed. */
3977				pmap_idcache_wbinv_all(pm);
3978				pmap_tlb_flushID(pm);
3979
3980				/*
3981				 * Roll back the previous PTE list,
3982				 * and zero out the current PTE.
3983				 */
3984				for (cnt = 0;
3985				     cnt < PMAP_REMOVE_CLEAN_LIST_SIZE; cnt++) {
3986					*cleanlist[cnt].pte = 0;
3987				}
3988				*ptep = 0;
3989				PTE_SYNC(ptep);
3990				cleanlist_idx++;
3991				flushall = 1;
3992			} else {
3993				*ptep = 0;
3994				PTE_SYNC(ptep);
3995					if (is_exec)
3996						pmap_tlb_flushID_SE(pm, sva);
3997					else
3998					if (is_refd)
3999						pmap_tlb_flushD_SE(pm, sva);
4000			}
4001
4002			sva += PAGE_SIZE;
4003			ptep++;
4004			mappings++;
4005		}
4006
4007		/*
4008		 * Deal with any left overs
4009		 */
4010		if (cleanlist_idx <= PMAP_REMOVE_CLEAN_LIST_SIZE) {
4011			total += cleanlist_idx;
4012			for (cnt = 0; cnt < cleanlist_idx; cnt++) {
4013				vm_offset_t clva =
4014				    cleanlist[cnt].va & ~1;
4015				if (cleanlist[cnt].va & 1) {
4016					pmap_idcache_wbinv_range(pm,
4017					    clva, PAGE_SIZE);
4018					pmap_tlb_flushID_SE(pm, clva);
4019				} else {
4020					pmap_dcache_wb_range(pm,
4021					    clva, PAGE_SIZE, TRUE,
4022					    FALSE);
4023					pmap_tlb_flushD_SE(pm, clva);
4024				}
4025				*cleanlist[cnt].pte = 0;
4026				PTE_SYNC_CURRENT(pm, cleanlist[cnt].pte);
4027			}
4028
4029			if (total <= PMAP_REMOVE_CLEAN_LIST_SIZE)
4030				cleanlist_idx = 0;
4031			else {
4032				/*
4033				 * We are removing so much entries it's just
4034				 * easier to flush the whole cache.
4035				 */
4036				cleanlist_idx = PMAP_REMOVE_CLEAN_LIST_SIZE + 1;
4037				pmap_idcache_wbinv_all(pm);
4038				flushall = 1;
4039			}
4040		}
4041
4042		pmap_free_l2_bucket(pm, l2b, mappings);
4043	}
4044
4045	vm_page_unlock_queues();
4046	if (flushall)
4047		cpu_tlb_flushID();
4048 	PMAP_UNLOCK(pm);
4049}
4050
4051
4052
4053
4054/*
4055 * pmap_zero_page()
4056 *
4057 * Zero a given physical page by mapping it at a page hook point.
4058 * In doing the zero page op, the page we zero is mapped cachable, as with
4059 * StrongARM accesses to non-cached pages are non-burst making writing
4060 * _any_ bulk data very slow.
4061 */
4062#if (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 || defined(CPU_XSCALE_CORE3)
4063void
4064pmap_zero_page_generic(vm_paddr_t phys, int off, int size)
4065{
4066#ifdef ARM_USE_SMALL_ALLOC
4067	char *dstpg;
4068#endif
4069
4070#ifdef DEBUG
4071	struct vm_page *pg = PHYS_TO_VM_PAGE(phys);
4072
4073	if (pg->md.pvh_list != NULL)
4074		panic("pmap_zero_page: page has mappings");
4075#endif
4076
4077	if (_arm_bzero && size >= _min_bzero_size &&
4078	    _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
4079		return;
4080
4081#ifdef ARM_USE_SMALL_ALLOC
4082	dstpg = (char *)arm_ptovirt(phys);
4083	if (off || size != PAGE_SIZE) {
4084		bzero(dstpg + off, size);
4085		cpu_dcache_wbinv_range((vm_offset_t)(dstpg + off), size);
4086	} else {
4087		bzero_page((vm_offset_t)dstpg);
4088		cpu_dcache_wbinv_range((vm_offset_t)dstpg, PAGE_SIZE);
4089	}
4090#else
4091
4092	mtx_lock(&cmtx);
4093	/*
4094	 * Hook in the page, zero it, and purge the cache for that
4095	 * zeroed page. Invalidate the TLB as needed.
4096	 */
4097	*cdst_pte = L2_S_PROTO | phys |
4098	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
4099	PTE_SYNC(cdst_pte);
4100	cpu_tlb_flushD_SE(cdstp);
4101	cpu_cpwait();
4102	if (off || size != PAGE_SIZE) {
4103		bzero((void *)(cdstp + off), size);
4104		cpu_dcache_wbinv_range(cdstp + off, size);
4105	} else {
4106		bzero_page(cdstp);
4107		cpu_dcache_wbinv_range(cdstp, PAGE_SIZE);
4108	}
4109	mtx_unlock(&cmtx);
4110#endif
4111}
4112#endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
4113
4114#if ARM_MMU_XSCALE == 1
4115void
4116pmap_zero_page_xscale(vm_paddr_t phys, int off, int size)
4117{
4118#ifdef ARM_USE_SMALL_ALLOC
4119	char *dstpg;
4120#endif
4121
4122	if (_arm_bzero && size >= _min_bzero_size &&
4123	    _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
4124		return;
4125#ifdef ARM_USE_SMALL_ALLOC
4126	dstpg = (char *)arm_ptovirt(phys);
4127	if (off || size != PAGE_SIZE) {
4128		bzero(dstpg + off, size);
4129		cpu_dcache_wbinv_range((vm_offset_t)(dstpg + off), size);
4130	} else {
4131		bzero_page((vm_offset_t)dstpg);
4132		cpu_dcache_wbinv_range((vm_offset_t)dstpg, PAGE_SIZE);
4133	}
4134#else
4135	mtx_lock(&cmtx);
4136	/*
4137	 * Hook in the page, zero it, and purge the cache for that
4138	 * zeroed page. Invalidate the TLB as needed.
4139	 */
4140	*cdst_pte = L2_S_PROTO | phys |
4141	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
4142	    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);	/* mini-data */
4143	PTE_SYNC(cdst_pte);
4144	cpu_tlb_flushD_SE(cdstp);
4145	cpu_cpwait();
4146	if (off || size != PAGE_SIZE)
4147		bzero((void *)(cdstp + off), size);
4148	else
4149		bzero_page(cdstp);
4150	mtx_unlock(&cmtx);
4151	xscale_cache_clean_minidata();
4152#endif
4153}
4154
4155/*
4156 * Change the PTEs for the specified kernel mappings such that they
4157 * will use the mini data cache instead of the main data cache.
4158 */
4159void
4160pmap_use_minicache(vm_offset_t va, vm_size_t size)
4161{
4162	struct l2_bucket *l2b;
4163	pt_entry_t *ptep, *sptep, pte;
4164	vm_offset_t next_bucket, eva;
4165
4166#if (ARM_NMMUS > 1) || defined(CPU_XSCALE_CORE3)
4167	if (xscale_use_minidata == 0)
4168		return;
4169#endif
4170
4171	eva = va + size;
4172
4173	while (va < eva) {
4174		next_bucket = L2_NEXT_BUCKET(va);
4175		if (next_bucket > eva)
4176			next_bucket = eva;
4177
4178		l2b = pmap_get_l2_bucket(pmap_kernel(), va);
4179
4180		sptep = ptep = &l2b->l2b_kva[l2pte_index(va)];
4181
4182		while (va < next_bucket) {
4183			pte = *ptep;
4184			if (!l2pte_minidata(pte)) {
4185				cpu_dcache_wbinv_range(va, PAGE_SIZE);
4186				cpu_tlb_flushD_SE(va);
4187				*ptep = pte & ~L2_B;
4188			}
4189			ptep++;
4190			va += PAGE_SIZE;
4191		}
4192		PTE_SYNC_RANGE(sptep, (u_int)(ptep - sptep));
4193	}
4194	cpu_cpwait();
4195}
4196#endif /* ARM_MMU_XSCALE == 1 */
4197
4198/*
4199 *	pmap_zero_page zeros the specified hardware page by mapping
4200 *	the page into KVM and using bzero to clear its contents.
4201 */
4202void
4203pmap_zero_page(vm_page_t m)
4204{
4205	pmap_zero_page_func(VM_PAGE_TO_PHYS(m), 0, PAGE_SIZE);
4206}
4207
4208
4209/*
4210 *	pmap_zero_page_area zeros the specified hardware page by mapping
4211 *	the page into KVM and using bzero to clear its contents.
4212 *
4213 *	off and size may not cover an area beyond a single hardware page.
4214 */
4215void
4216pmap_zero_page_area(vm_page_t m, int off, int size)
4217{
4218
4219	pmap_zero_page_func(VM_PAGE_TO_PHYS(m), off, size);
4220}
4221
4222
4223/*
4224 *	pmap_zero_page_idle zeros the specified hardware page by mapping
4225 *	the page into KVM and using bzero to clear its contents.  This
4226 *	is intended to be called from the vm_pagezero process only and
4227 *	outside of Giant.
4228 */
4229void
4230pmap_zero_page_idle(vm_page_t m)
4231{
4232
4233	pmap_zero_page(m);
4234}
4235
4236#if 0
4237/*
4238 * pmap_clean_page()
4239 *
4240 * This is a local function used to work out the best strategy to clean
4241 * a single page referenced by its entry in the PV table. It's used by
4242 * pmap_copy_page, pmap_zero page and maybe some others later on.
4243 *
4244 * Its policy is effectively:
4245 *  o If there are no mappings, we don't bother doing anything with the cache.
4246 *  o If there is one mapping, we clean just that page.
4247 *  o If there are multiple mappings, we clean the entire cache.
4248 *
4249 * So that some functions can be further optimised, it returns 0 if it didn't
4250 * clean the entire cache, or 1 if it did.
4251 *
4252 * XXX One bug in this routine is that if the pv_entry has a single page
4253 * mapped at 0x00000000 a whole cache clean will be performed rather than
4254 * just the 1 page. Since this should not occur in everyday use and if it does
4255 * it will just result in not the most efficient clean for the page.
4256 */
4257static int
4258pmap_clean_page(struct pv_entry *pv, boolean_t is_src)
4259{
4260	pmap_t pm, pm_to_clean = NULL;
4261	struct pv_entry *npv;
4262	u_int cache_needs_cleaning = 0;
4263	u_int flags = 0;
4264	vm_offset_t page_to_clean = 0;
4265
4266	if (pv == NULL) {
4267		/* nothing mapped in so nothing to flush */
4268		return (0);
4269	}
4270
4271	/*
4272	 * Since we flush the cache each time we change to a different
4273	 * user vmspace, we only need to flush the page if it is in the
4274	 * current pmap.
4275	 */
4276	if (curthread)
4277		pm = vmspace_pmap(curproc->p_vmspace);
4278	else
4279		pm = pmap_kernel();
4280
4281	for (npv = pv; npv; npv = TAILQ_NEXT(npv, pv_list)) {
4282		if (npv->pv_pmap == pmap_kernel() || npv->pv_pmap == pm) {
4283			flags |= npv->pv_flags;
4284			/*
4285			 * The page is mapped non-cacheable in
4286			 * this map.  No need to flush the cache.
4287			 */
4288			if (npv->pv_flags & PVF_NC) {
4289#ifdef DIAGNOSTIC
4290				if (cache_needs_cleaning)
4291					panic("pmap_clean_page: "
4292					    "cache inconsistency");
4293#endif
4294				break;
4295			} else if (is_src && (npv->pv_flags & PVF_WRITE) == 0)
4296				continue;
4297			if (cache_needs_cleaning) {
4298				page_to_clean = 0;
4299				break;
4300			} else {
4301				page_to_clean = npv->pv_va;
4302				pm_to_clean = npv->pv_pmap;
4303			}
4304			cache_needs_cleaning = 1;
4305		}
4306	}
4307	if (page_to_clean) {
4308		if (PV_BEEN_EXECD(flags))
4309			pmap_idcache_wbinv_range(pm_to_clean, page_to_clean,
4310			    PAGE_SIZE);
4311		else
4312			pmap_dcache_wb_range(pm_to_clean, page_to_clean,
4313			    PAGE_SIZE, !is_src, (flags & PVF_WRITE) == 0);
4314	} else if (cache_needs_cleaning) {
4315		if (PV_BEEN_EXECD(flags))
4316			pmap_idcache_wbinv_all(pm);
4317		else
4318			pmap_dcache_wbinv_all(pm);
4319		return (1);
4320	}
4321	return (0);
4322}
4323#endif
4324
4325/*
4326 *	pmap_copy_page copies the specified (machine independent)
4327 *	page by mapping the page into virtual memory and using
4328 *	bcopy to copy the page, one machine dependent page at a
4329 *	time.
4330 */
4331
4332/*
4333 * pmap_copy_page()
4334 *
4335 * Copy one physical page into another, by mapping the pages into
4336 * hook points. The same comment regarding cachability as in
4337 * pmap_zero_page also applies here.
4338 */
4339#if  (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 || defined (CPU_XSCALE_CORE3)
4340void
4341pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
4342{
4343#if 0
4344	struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
4345#endif
4346#ifdef DEBUG
4347	struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst);
4348
4349	if (dst_pg->md.pvh_list != NULL)
4350		panic("pmap_copy_page: dst page has mappings");
4351#endif
4352
4353
4354	/*
4355	 * Clean the source page.  Hold the source page's lock for
4356	 * the duration of the copy so that no other mappings can
4357	 * be created while we have a potentially aliased mapping.
4358	 */
4359#if 0
4360	/*
4361	 * XXX: Not needed while we call cpu_dcache_wbinv_all() in
4362	 * pmap_copy_page().
4363	 */
4364	(void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
4365#endif
4366	/*
4367	 * Map the pages into the page hook points, copy them, and purge
4368	 * the cache for the appropriate page. Invalidate the TLB
4369	 * as required.
4370	 */
4371	mtx_lock(&cmtx);
4372	*csrc_pte = L2_S_PROTO | src |
4373	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
4374	PTE_SYNC(csrc_pte);
4375	*cdst_pte = L2_S_PROTO | dst |
4376	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
4377	PTE_SYNC(cdst_pte);
4378	cpu_tlb_flushD_SE(csrcp);
4379	cpu_tlb_flushD_SE(cdstp);
4380	cpu_cpwait();
4381	bcopy_page(csrcp, cdstp);
4382	mtx_unlock(&cmtx);
4383	cpu_dcache_inv_range(csrcp, PAGE_SIZE);
4384	cpu_dcache_wbinv_range(cdstp, PAGE_SIZE);
4385}
4386#endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
4387
4388#if ARM_MMU_XSCALE == 1
4389void
4390pmap_copy_page_xscale(vm_paddr_t src, vm_paddr_t dst)
4391{
4392#if 0
4393	/* XXX: Only needed for pmap_clean_page(), which is commented out. */
4394	struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
4395#endif
4396#ifdef DEBUG
4397	struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst);
4398
4399	if (dst_pg->md.pvh_list != NULL)
4400		panic("pmap_copy_page: dst page has mappings");
4401#endif
4402
4403
4404	/*
4405	 * Clean the source page.  Hold the source page's lock for
4406	 * the duration of the copy so that no other mappings can
4407	 * be created while we have a potentially aliased mapping.
4408	 */
4409#if 0
4410	/*
4411	 * XXX: Not needed while we call cpu_dcache_wbinv_all() in
4412	 * pmap_copy_page().
4413	 */
4414	(void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
4415#endif
4416	/*
4417	 * Map the pages into the page hook points, copy them, and purge
4418	 * the cache for the appropriate page. Invalidate the TLB
4419	 * as required.
4420	 */
4421	mtx_lock(&cmtx);
4422	*csrc_pte = L2_S_PROTO | src |
4423	    L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
4424	    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);	/* mini-data */
4425	PTE_SYNC(csrc_pte);
4426	*cdst_pte = L2_S_PROTO | dst |
4427	    L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
4428	    L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);	/* mini-data */
4429	PTE_SYNC(cdst_pte);
4430	cpu_tlb_flushD_SE(csrcp);
4431	cpu_tlb_flushD_SE(cdstp);
4432	cpu_cpwait();
4433	bcopy_page(csrcp, cdstp);
4434	mtx_unlock(&cmtx);
4435	xscale_cache_clean_minidata();
4436}
4437#endif /* ARM_MMU_XSCALE == 1 */
4438
4439void
4440pmap_copy_page(vm_page_t src, vm_page_t dst)
4441{
4442#ifdef ARM_USE_SMALL_ALLOC
4443	vm_offset_t srcpg, dstpg;
4444#endif
4445
4446	cpu_dcache_wbinv_all();
4447	if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size &&
4448	    _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst),
4449	    (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0)
4450		return;
4451#ifdef ARM_USE_SMALL_ALLOC
4452	srcpg = arm_ptovirt(VM_PAGE_TO_PHYS(src));
4453	dstpg = arm_ptovirt(VM_PAGE_TO_PHYS(dst));
4454	bcopy_page(srcpg, dstpg);
4455	cpu_dcache_wbinv_range(dstpg, PAGE_SIZE);
4456#else
4457	pmap_copy_page_func(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
4458#endif
4459}
4460
4461
4462
4463
4464/*
4465 * this routine returns true if a physical page resides
4466 * in the given pmap.
4467 */
4468boolean_t
4469pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
4470{
4471	pv_entry_t pv;
4472	int loops = 0;
4473
4474	if (m->flags & PG_FICTITIOUS)
4475		return (FALSE);
4476
4477	/*
4478	 * Not found, check current mappings returning immediately
4479	 */
4480	for (pv = TAILQ_FIRST(&m->md.pv_list);
4481	    pv;
4482	    pv = TAILQ_NEXT(pv, pv_list)) {
4483	    	if (pv->pv_pmap == pmap) {
4484	    		return (TRUE);
4485	    	}
4486		loops++;
4487		if (loops >= 16)
4488			break;
4489	}
4490	return (FALSE);
4491}
4492
4493/*
4494 *	pmap_page_wired_mappings:
4495 *
4496 *	Return the number of managed mappings to the given physical page
4497 *	that are wired.
4498 */
4499int
4500pmap_page_wired_mappings(vm_page_t m)
4501{
4502	pv_entry_t pv;
4503	int count;
4504
4505	count = 0;
4506	if ((m->flags & PG_FICTITIOUS) != 0)
4507		return (count);
4508	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
4509	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
4510		if ((pv->pv_flags & PVF_WIRED) != 0)
4511			count++;
4512	return (count);
4513}
4514
4515/*
4516 *	pmap_ts_referenced:
4517 *
4518 *	Return the count of reference bits for a page, clearing all of them.
4519 */
4520int
4521pmap_ts_referenced(vm_page_t m)
4522{
4523
4524	if (m->flags & PG_FICTITIOUS)
4525		return (0);
4526	return (pmap_clearbit(m, PVF_REF));
4527}
4528
4529
4530boolean_t
4531pmap_is_modified(vm_page_t m)
4532{
4533
4534	if (m->md.pvh_attrs & PVF_MOD)
4535		return (TRUE);
4536
4537	return(FALSE);
4538}
4539
4540
4541/*
4542 *	Clear the modify bits on the specified physical page.
4543 */
4544void
4545pmap_clear_modify(vm_page_t m)
4546{
4547
4548	if (m->md.pvh_attrs & PVF_MOD)
4549		pmap_clearbit(m, PVF_MOD);
4550}
4551
4552
4553/*
4554 *	pmap_clear_reference:
4555 *
4556 *	Clear the reference bit on the specified physical page.
4557 */
4558void
4559pmap_clear_reference(vm_page_t m)
4560{
4561
4562	if (m->md.pvh_attrs & PVF_REF)
4563		pmap_clearbit(m, PVF_REF);
4564}
4565
4566
4567/*
4568 * Clear the write and modified bits in each of the given page's mappings.
4569 */
4570void
4571pmap_remove_write(vm_page_t m)
4572{
4573
4574	if (m->flags & PG_WRITEABLE)
4575		pmap_clearbit(m, PVF_WRITE);
4576}
4577
4578
4579/*
4580 * perform the pmap work for mincore
4581 */
4582int
4583pmap_mincore(pmap_t pmap, vm_offset_t addr)
4584{
4585	printf("pmap_mincore()\n");
4586
4587	return (0);
4588}
4589
4590
4591vm_offset_t
4592pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
4593{
4594
4595	return(addr);
4596}
4597
4598
4599/*
4600 * Map a set of physical memory pages into the kernel virtual
4601 * address space. Return a pointer to where it is mapped. This
4602 * routine is intended to be used for mapping device memory,
4603 * NOT real memory.
4604 */
4605void *
4606pmap_mapdev(vm_offset_t pa, vm_size_t size)
4607{
4608	vm_offset_t va, tmpva, offset;
4609
4610	offset = pa & PAGE_MASK;
4611	size = roundup(size, PAGE_SIZE);
4612
4613	GIANT_REQUIRED;
4614
4615	va = kmem_alloc_nofault(kernel_map, size);
4616	if (!va)
4617		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
4618	for (tmpva = va; size > 0;) {
4619		pmap_kenter_internal(tmpva, pa, 0);
4620		size -= PAGE_SIZE;
4621		tmpva += PAGE_SIZE;
4622		pa += PAGE_SIZE;
4623	}
4624
4625	return ((void *)(va + offset));
4626}
4627
4628#define BOOTSTRAP_DEBUG
4629
4630/*
4631 * pmap_map_section:
4632 *
4633 *	Create a single section mapping.
4634 */
4635void
4636pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
4637    int prot, int cache)
4638{
4639	pd_entry_t *pde = (pd_entry_t *) l1pt;
4640	pd_entry_t fl;
4641
4642	KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2"));
4643
4644	switch (cache) {
4645	case PTE_NOCACHE:
4646	default:
4647		fl = 0;
4648		break;
4649
4650	case PTE_CACHE:
4651		fl = pte_l1_s_cache_mode;
4652		break;
4653
4654	case PTE_PAGETABLE:
4655		fl = pte_l1_s_cache_mode_pt;
4656		break;
4657	}
4658
4659	pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
4660	    L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL);
4661	PTE_SYNC(&pde[va >> L1_S_SHIFT]);
4662
4663}
4664
4665/*
4666 * pmap_link_l2pt:
4667 *
4668 *	Link the L2 page table specified by l2pv.pv_pa into the L1
4669 *	page table at the slot for "va".
4670 */
4671void
4672pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv)
4673{
4674	pd_entry_t *pde = (pd_entry_t *) l1pt, proto;
4675	u_int slot = va >> L1_S_SHIFT;
4676
4677	proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO;
4678
4679#ifdef VERBOSE_INIT_ARM
4680	printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va);
4681#endif
4682
4683	pde[slot + 0] = proto | (l2pv->pv_pa + 0x000);
4684
4685	PTE_SYNC(&pde[slot]);
4686
4687	SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
4688
4689
4690}
4691
4692/*
4693 * pmap_map_entry
4694 *
4695 * 	Create a single page mapping.
4696 */
4697void
4698pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
4699    int cache)
4700{
4701	pd_entry_t *pde = (pd_entry_t *) l1pt;
4702	pt_entry_t fl;
4703	pt_entry_t *pte;
4704
4705	KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin"));
4706
4707	switch (cache) {
4708	case PTE_NOCACHE:
4709	default:
4710		fl = 0;
4711		break;
4712
4713	case PTE_CACHE:
4714		fl = pte_l2_s_cache_mode;
4715		break;
4716
4717	case PTE_PAGETABLE:
4718		fl = pte_l2_s_cache_mode_pt;
4719		break;
4720	}
4721
4722	if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
4723		panic("pmap_map_entry: no L2 table for VA 0x%08x", va);
4724
4725	pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
4726
4727	if (pte == NULL)
4728		panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va);
4729
4730	pte[l2pte_index(va)] =
4731	    L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | fl;
4732	PTE_SYNC(&pte[l2pte_index(va)]);
4733}
4734
4735/*
4736 * pmap_map_chunk:
4737 *
4738 *	Map a chunk of memory using the most efficient mappings
4739 *	possible (section. large page, small page) into the
4740 *	provided L1 and L2 tables at the specified virtual address.
4741 */
4742vm_size_t
4743pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
4744    vm_size_t size, int prot, int cache)
4745{
4746	pd_entry_t *pde = (pd_entry_t *) l1pt;
4747	pt_entry_t *pte, f1, f2s, f2l;
4748	vm_size_t resid;
4749	int i;
4750
4751	resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
4752
4753	if (l1pt == 0)
4754		panic("pmap_map_chunk: no L1 table provided");
4755
4756#ifdef VERBOSE_INIT_ARM
4757	printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x "
4758	    "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
4759#endif
4760
4761	switch (cache) {
4762	case PTE_NOCACHE:
4763	default:
4764		f1 = 0;
4765		f2l = 0;
4766		f2s = 0;
4767		break;
4768
4769	case PTE_CACHE:
4770		f1 = pte_l1_s_cache_mode;
4771		f2l = pte_l2_l_cache_mode;
4772		f2s = pte_l2_s_cache_mode;
4773		break;
4774
4775	case PTE_PAGETABLE:
4776		f1 = pte_l1_s_cache_mode_pt;
4777		f2l = pte_l2_l_cache_mode_pt;
4778		f2s = pte_l2_s_cache_mode_pt;
4779		break;
4780	}
4781
4782	size = resid;
4783
4784	while (resid > 0) {
4785		/* See if we can use a section mapping. */
4786		if (L1_S_MAPPABLE_P(va, pa, resid)) {
4787#ifdef VERBOSE_INIT_ARM
4788			printf("S");
4789#endif
4790			pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
4791			    L1_S_PROT(PTE_KERNEL, prot) | f1 |
4792			    L1_S_DOM(PMAP_DOMAIN_KERNEL);
4793			PTE_SYNC(&pde[va >> L1_S_SHIFT]);
4794			va += L1_S_SIZE;
4795			pa += L1_S_SIZE;
4796			resid -= L1_S_SIZE;
4797			continue;
4798		}
4799
4800		/*
4801		 * Ok, we're going to use an L2 table.  Make sure
4802		 * one is actually in the corresponding L1 slot
4803		 * for the current VA.
4804		 */
4805		if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
4806			panic("pmap_map_chunk: no L2 table for VA 0x%08x", va);
4807
4808		pte = (pt_entry_t *) kernel_pt_lookup(
4809		    pde[L1_IDX(va)] & L1_C_ADDR_MASK);
4810		if (pte == NULL)
4811			panic("pmap_map_chunk: can't find L2 table for VA"
4812			    "0x%08x", va);
4813		/* See if we can use a L2 large page mapping. */
4814		if (L2_L_MAPPABLE_P(va, pa, resid)) {
4815#ifdef VERBOSE_INIT_ARM
4816			printf("L");
4817#endif
4818			for (i = 0; i < 16; i++) {
4819				pte[l2pte_index(va) + i] =
4820				    L2_L_PROTO | pa |
4821				    L2_L_PROT(PTE_KERNEL, prot) | f2l;
4822				PTE_SYNC(&pte[l2pte_index(va) + i]);
4823			}
4824			va += L2_L_SIZE;
4825			pa += L2_L_SIZE;
4826			resid -= L2_L_SIZE;
4827			continue;
4828		}
4829
4830		/* Use a small page mapping. */
4831#ifdef VERBOSE_INIT_ARM
4832		printf("P");
4833#endif
4834		pte[l2pte_index(va)] =
4835		    L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | f2s;
4836		PTE_SYNC(&pte[l2pte_index(va)]);
4837		va += PAGE_SIZE;
4838		pa += PAGE_SIZE;
4839		resid -= PAGE_SIZE;
4840	}
4841#ifdef VERBOSE_INIT_ARM
4842	printf("\n");
4843#endif
4844	return (size);
4845
4846}
4847
4848/********************** Static device map routines ***************************/
4849
4850static const struct pmap_devmap *pmap_devmap_table;
4851
4852/*
4853 * Register the devmap table.  This is provided in case early console
4854 * initialization needs to register mappings created by bootstrap code
4855 * before pmap_devmap_bootstrap() is called.
4856 */
4857void
4858pmap_devmap_register(const struct pmap_devmap *table)
4859{
4860
4861	pmap_devmap_table = table;
4862}
4863
4864/*
4865 * Map all of the static regions in the devmap table, and remember
4866 * the devmap table so other parts of the kernel can look up entries
4867 * later.
4868 */
4869void
4870pmap_devmap_bootstrap(vm_offset_t l1pt, const struct pmap_devmap *table)
4871{
4872	int i;
4873
4874	pmap_devmap_table = table;
4875
4876	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4877#ifdef VERBOSE_INIT_ARM
4878		printf("devmap: %08x -> %08x @ %08x\n",
4879		    pmap_devmap_table[i].pd_pa,
4880		    pmap_devmap_table[i].pd_pa +
4881			pmap_devmap_table[i].pd_size - 1,
4882		    pmap_devmap_table[i].pd_va);
4883#endif
4884		pmap_map_chunk(l1pt, pmap_devmap_table[i].pd_va,
4885		    pmap_devmap_table[i].pd_pa,
4886		    pmap_devmap_table[i].pd_size,
4887		    pmap_devmap_table[i].pd_prot,
4888		    pmap_devmap_table[i].pd_cache);
4889	}
4890}
4891
4892const struct pmap_devmap *
4893pmap_devmap_find_pa(vm_paddr_t pa, vm_size_t size)
4894{
4895	int i;
4896
4897	if (pmap_devmap_table == NULL)
4898		return (NULL);
4899
4900	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4901		if (pa >= pmap_devmap_table[i].pd_pa &&
4902		    pa + size <= pmap_devmap_table[i].pd_pa +
4903				 pmap_devmap_table[i].pd_size)
4904			return (&pmap_devmap_table[i]);
4905	}
4906
4907	return (NULL);
4908}
4909
4910const struct pmap_devmap *
4911pmap_devmap_find_va(vm_offset_t va, vm_size_t size)
4912{
4913	int i;
4914
4915	if (pmap_devmap_table == NULL)
4916		return (NULL);
4917
4918	for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4919		if (va >= pmap_devmap_table[i].pd_va &&
4920		    va + size <= pmap_devmap_table[i].pd_va +
4921				 pmap_devmap_table[i].pd_size)
4922			return (&pmap_devmap_table[i]);
4923	}
4924
4925	return (NULL);
4926}
4927
4928