mmu_oea.c revision 209369
1/*-
2 * Copyright (c) 2001 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *        This product includes software developed by the NetBSD
19 *        Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 *    contributors may be used to endorse or promote products derived
22 *    from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36/*-
37 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
38 * Copyright (C) 1995, 1996 TooLs GmbH.
39 * All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 *    must display the following acknowledgement:
51 *	This product includes software developed by TooLs GmbH.
52 * 4. The name of TooLs GmbH may not be used to endorse or promote products
53 *    derived from this software without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
58 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 *
66 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
67 */
68/*-
69 * Copyright (C) 2001 Benno Rice.
70 * All rights reserved.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 *    notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 *    notice, this list of conditions and the following disclaimer in the
79 *    documentation and/or other materials provided with the distribution.
80 *
81 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
82 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
83 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
84 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
85 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
86 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
87 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
88 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
89 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
90 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
91 */
92
93#include <sys/cdefs.h>
94__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea.c 209369 2010-06-20 16:56:48Z nwhitehorn $");
95
96/*
97 * Manages physical address maps.
98 *
99 * In addition to hardware address maps, this module is called upon to
100 * provide software-use-only maps which may or may not be stored in the
101 * same form as hardware maps.  These pseudo-maps are used to store
102 * intermediate results from copy operations to and from address spaces.
103 *
104 * Since the information managed by this module is also stored by the
105 * logical address mapping module, this module may throw away valid virtual
106 * to physical mappings at almost any time.  However, invalidations of
107 * mappings must be done as requested.
108 *
109 * In order to cope with hardware architectures which make virtual to
110 * physical map invalidates expensive, this module may delay invalidate
111 * reduced protection operations until such time as they are actually
112 * necessary.  This module is given full information as to which processors
113 * are currently using which maps, and to when physical maps must be made
114 * correct.
115 */
116
117#include "opt_kstack_pages.h"
118
119#include <sys/param.h>
120#include <sys/kernel.h>
121#include <sys/ktr.h>
122#include <sys/lock.h>
123#include <sys/msgbuf.h>
124#include <sys/mutex.h>
125#include <sys/proc.h>
126#include <sys/sysctl.h>
127#include <sys/systm.h>
128#include <sys/vmmeter.h>
129
130#include <dev/ofw/openfirm.h>
131
132#include <vm/vm.h>
133#include <vm/vm_param.h>
134#include <vm/vm_kern.h>
135#include <vm/vm_page.h>
136#include <vm/vm_map.h>
137#include <vm/vm_object.h>
138#include <vm/vm_extern.h>
139#include <vm/vm_pageout.h>
140#include <vm/vm_pager.h>
141#include <vm/uma.h>
142
143#include <machine/cpu.h>
144#include <machine/platform.h>
145#include <machine/bat.h>
146#include <machine/frame.h>
147#include <machine/md_var.h>
148#include <machine/psl.h>
149#include <machine/pte.h>
150#include <machine/smp.h>
151#include <machine/sr.h>
152#include <machine/mmuvar.h>
153
154#include "mmu_if.h"
155
156#define	MOEA_DEBUG
157
158#define TODO	panic("%s: not implemented", __func__);
159
160#define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
161#define	VSID_TO_SR(vsid)	((vsid) & 0xf)
162#define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
163
164#define	PVO_PTEGIDX_MASK	0x007		/* which PTEG slot */
165#define	PVO_PTEGIDX_VALID	0x008		/* slot is valid */
166#define	PVO_WIRED		0x010		/* PVO entry is wired */
167#define	PVO_MANAGED		0x020		/* PVO entry is managed */
168#define	PVO_EXECUTABLE		0x040		/* PVO entry is executable */
169#define	PVO_BOOTSTRAP		0x080		/* PVO entry allocated during
170						   bootstrap */
171#define PVO_FAKE		0x100		/* fictitious phys page */
172#define	PVO_VADDR(pvo)		((pvo)->pvo_vaddr & ~ADDR_POFF)
173#define	PVO_ISEXECUTABLE(pvo)	((pvo)->pvo_vaddr & PVO_EXECUTABLE)
174#define PVO_ISFAKE(pvo)		((pvo)->pvo_vaddr & PVO_FAKE)
175#define	PVO_PTEGIDX_GET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
176#define	PVO_PTEGIDX_ISSET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
177#define	PVO_PTEGIDX_CLR(pvo)	\
178	((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
179#define	PVO_PTEGIDX_SET(pvo, i)	\
180	((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
181
182#define	MOEA_PVO_CHECK(pvo)
183
184struct ofw_map {
185	vm_offset_t	om_va;
186	vm_size_t	om_len;
187	vm_offset_t	om_pa;
188	u_int		om_mode;
189};
190
191/*
192 * Map of physical memory regions.
193 */
194static struct	mem_region *regions;
195static struct	mem_region *pregions;
196u_int           phys_avail_count;
197int		regions_sz, pregions_sz;
198static struct	ofw_map *translations;
199
200extern struct pmap ofw_pmap;
201
202/*
203 * Lock for the pteg and pvo tables.
204 */
205struct mtx	moea_table_mutex;
206
207/* tlbie instruction synchronization */
208static struct mtx tlbie_mtx;
209
210/*
211 * PTEG data.
212 */
213static struct	pteg *moea_pteg_table;
214u_int		moea_pteg_count;
215u_int		moea_pteg_mask;
216
217/*
218 * PVO data.
219 */
220struct	pvo_head *moea_pvo_table;		/* pvo entries by pteg index */
221struct	pvo_head moea_pvo_kunmanaged =
222    LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged);	/* list of unmanaged pages */
223struct	pvo_head moea_pvo_unmanaged =
224    LIST_HEAD_INITIALIZER(moea_pvo_unmanaged);	/* list of unmanaged pages */
225
226uma_zone_t	moea_upvo_zone;	/* zone for pvo entries for unmanaged pages */
227uma_zone_t	moea_mpvo_zone;	/* zone for pvo entries for managed pages */
228
229#define	BPVO_POOL_SIZE	32768
230static struct	pvo_entry *moea_bpvo_pool;
231static int	moea_bpvo_pool_index = 0;
232
233#define	VSID_NBPW	(sizeof(u_int32_t) * 8)
234static u_int	moea_vsid_bitmap[NPMAPS / VSID_NBPW];
235
236static boolean_t moea_initialized = FALSE;
237
238/*
239 * Statistics.
240 */
241u_int	moea_pte_valid = 0;
242u_int	moea_pte_overflow = 0;
243u_int	moea_pte_replacements = 0;
244u_int	moea_pvo_entries = 0;
245u_int	moea_pvo_enter_calls = 0;
246u_int	moea_pvo_remove_calls = 0;
247u_int	moea_pte_spills = 0;
248SYSCTL_INT(_machdep, OID_AUTO, moea_pte_valid, CTLFLAG_RD, &moea_pte_valid,
249    0, "");
250SYSCTL_INT(_machdep, OID_AUTO, moea_pte_overflow, CTLFLAG_RD,
251    &moea_pte_overflow, 0, "");
252SYSCTL_INT(_machdep, OID_AUTO, moea_pte_replacements, CTLFLAG_RD,
253    &moea_pte_replacements, 0, "");
254SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_entries, CTLFLAG_RD, &moea_pvo_entries,
255    0, "");
256SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_enter_calls, CTLFLAG_RD,
257    &moea_pvo_enter_calls, 0, "");
258SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_remove_calls, CTLFLAG_RD,
259    &moea_pvo_remove_calls, 0, "");
260SYSCTL_INT(_machdep, OID_AUTO, moea_pte_spills, CTLFLAG_RD,
261    &moea_pte_spills, 0, "");
262
263/*
264 * Allocate physical memory for use in moea_bootstrap.
265 */
266static vm_offset_t	moea_bootstrap_alloc(vm_size_t, u_int);
267
268/*
269 * PTE calls.
270 */
271static int		moea_pte_insert(u_int, struct pte *);
272
273/*
274 * PVO calls.
275 */
276static int	moea_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *,
277		    vm_offset_t, vm_offset_t, u_int, int);
278static void	moea_pvo_remove(struct pvo_entry *, int);
279static struct	pvo_entry *moea_pvo_find_va(pmap_t, vm_offset_t, int *);
280static struct	pte *moea_pvo_to_pte(const struct pvo_entry *, int);
281
282/*
283 * Utility routines.
284 */
285static void		moea_enter_locked(pmap_t, vm_offset_t, vm_page_t,
286			    vm_prot_t, boolean_t);
287static void		moea_syncicache(vm_offset_t, vm_size_t);
288static boolean_t	moea_query_bit(vm_page_t, int);
289static u_int		moea_clear_bit(vm_page_t, int);
290static void		moea_kremove(mmu_t, vm_offset_t);
291int		moea_pte_spill(vm_offset_t);
292
293/*
294 * Kernel MMU interface
295 */
296void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
297void moea_clear_modify(mmu_t, vm_page_t);
298void moea_clear_reference(mmu_t, vm_page_t);
299void moea_copy_page(mmu_t, vm_page_t, vm_page_t);
300void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
301void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
302    vm_prot_t);
303void moea_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
304vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t);
305vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
306void moea_init(mmu_t);
307boolean_t moea_is_modified(mmu_t, vm_page_t);
308boolean_t moea_is_referenced(mmu_t, vm_page_t);
309boolean_t moea_ts_referenced(mmu_t, vm_page_t);
310vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int);
311boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t);
312int moea_page_wired_mappings(mmu_t, vm_page_t);
313void moea_pinit(mmu_t, pmap_t);
314void moea_pinit0(mmu_t, pmap_t);
315void moea_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
316void moea_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
317void moea_qremove(mmu_t, vm_offset_t, int);
318void moea_release(mmu_t, pmap_t);
319void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
320void moea_remove_all(mmu_t, vm_page_t);
321void moea_remove_write(mmu_t, vm_page_t);
322void moea_zero_page(mmu_t, vm_page_t);
323void moea_zero_page_area(mmu_t, vm_page_t, int, int);
324void moea_zero_page_idle(mmu_t, vm_page_t);
325void moea_activate(mmu_t, struct thread *);
326void moea_deactivate(mmu_t, struct thread *);
327void moea_cpu_bootstrap(mmu_t, int);
328void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
329void *moea_mapdev(mmu_t, vm_offset_t, vm_size_t);
330void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t);
331vm_offset_t moea_kextract(mmu_t, vm_offset_t);
332void moea_kenter(mmu_t, vm_offset_t, vm_offset_t);
333boolean_t moea_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
334static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
335
336static mmu_method_t moea_methods[] = {
337	MMUMETHOD(mmu_change_wiring,	moea_change_wiring),
338	MMUMETHOD(mmu_clear_modify,	moea_clear_modify),
339	MMUMETHOD(mmu_clear_reference,	moea_clear_reference),
340	MMUMETHOD(mmu_copy_page,	moea_copy_page),
341	MMUMETHOD(mmu_enter,		moea_enter),
342	MMUMETHOD(mmu_enter_object,	moea_enter_object),
343	MMUMETHOD(mmu_enter_quick,	moea_enter_quick),
344	MMUMETHOD(mmu_extract,		moea_extract),
345	MMUMETHOD(mmu_extract_and_hold,	moea_extract_and_hold),
346	MMUMETHOD(mmu_init,		moea_init),
347	MMUMETHOD(mmu_is_modified,	moea_is_modified),
348	MMUMETHOD(mmu_is_referenced,	moea_is_referenced),
349	MMUMETHOD(mmu_ts_referenced,	moea_ts_referenced),
350	MMUMETHOD(mmu_map,     		moea_map),
351	MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick),
352	MMUMETHOD(mmu_page_wired_mappings,moea_page_wired_mappings),
353	MMUMETHOD(mmu_pinit,		moea_pinit),
354	MMUMETHOD(mmu_pinit0,		moea_pinit0),
355	MMUMETHOD(mmu_protect,		moea_protect),
356	MMUMETHOD(mmu_qenter,		moea_qenter),
357	MMUMETHOD(mmu_qremove,		moea_qremove),
358	MMUMETHOD(mmu_release,		moea_release),
359	MMUMETHOD(mmu_remove,		moea_remove),
360	MMUMETHOD(mmu_remove_all,      	moea_remove_all),
361	MMUMETHOD(mmu_remove_write,	moea_remove_write),
362	MMUMETHOD(mmu_sync_icache,	moea_sync_icache),
363	MMUMETHOD(mmu_zero_page,       	moea_zero_page),
364	MMUMETHOD(mmu_zero_page_area,	moea_zero_page_area),
365	MMUMETHOD(mmu_zero_page_idle,	moea_zero_page_idle),
366	MMUMETHOD(mmu_activate,		moea_activate),
367	MMUMETHOD(mmu_deactivate,      	moea_deactivate),
368
369	/* Internal interfaces */
370	MMUMETHOD(mmu_bootstrap,       	moea_bootstrap),
371	MMUMETHOD(mmu_cpu_bootstrap,   	moea_cpu_bootstrap),
372	MMUMETHOD(mmu_mapdev,		moea_mapdev),
373	MMUMETHOD(mmu_unmapdev,		moea_unmapdev),
374	MMUMETHOD(mmu_kextract,		moea_kextract),
375	MMUMETHOD(mmu_kenter,		moea_kenter),
376	MMUMETHOD(mmu_dev_direct_mapped,moea_dev_direct_mapped),
377
378	{ 0, 0 }
379};
380
381static mmu_def_t oea_mmu = {
382	MMU_TYPE_OEA,
383	moea_methods,
384	0
385};
386MMU_DEF(oea_mmu);
387
388static void
389tlbie(vm_offset_t va)
390{
391
392	mtx_lock_spin(&tlbie_mtx);
393	__asm __volatile("tlbie %0" :: "r"(va));
394	__asm __volatile("tlbsync");
395	powerpc_sync();
396	mtx_unlock_spin(&tlbie_mtx);
397}
398
399static void
400tlbia(void)
401{
402	vm_offset_t va;
403
404	for (va = 0; va < 0x00040000; va += 0x00001000) {
405		__asm __volatile("tlbie %0" :: "r"(va));
406		powerpc_sync();
407	}
408	__asm __volatile("tlbsync");
409	powerpc_sync();
410}
411
412static __inline int
413va_to_sr(u_int *sr, vm_offset_t va)
414{
415	return (sr[(uintptr_t)va >> ADDR_SR_SHFT]);
416}
417
418static __inline u_int
419va_to_pteg(u_int sr, vm_offset_t addr)
420{
421	u_int hash;
422
423	hash = (sr & SR_VSID_MASK) ^ (((u_int)addr & ADDR_PIDX) >>
424	    ADDR_PIDX_SHFT);
425	return (hash & moea_pteg_mask);
426}
427
428static __inline struct pvo_head *
429pa_to_pvoh(vm_offset_t pa, vm_page_t *pg_p)
430{
431	struct	vm_page *pg;
432
433	pg = PHYS_TO_VM_PAGE(pa);
434
435	if (pg_p != NULL)
436		*pg_p = pg;
437
438	if (pg == NULL)
439		return (&moea_pvo_unmanaged);
440
441	return (&pg->md.mdpg_pvoh);
442}
443
444static __inline struct pvo_head *
445vm_page_to_pvoh(vm_page_t m)
446{
447
448	return (&m->md.mdpg_pvoh);
449}
450
451static __inline void
452moea_attr_clear(vm_page_t m, int ptebit)
453{
454
455	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
456	m->md.mdpg_attrs &= ~ptebit;
457}
458
459static __inline int
460moea_attr_fetch(vm_page_t m)
461{
462
463	return (m->md.mdpg_attrs);
464}
465
466static __inline void
467moea_attr_save(vm_page_t m, int ptebit)
468{
469
470	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
471	m->md.mdpg_attrs |= ptebit;
472}
473
474static __inline int
475moea_pte_compare(const struct pte *pt, const struct pte *pvo_pt)
476{
477	if (pt->pte_hi == pvo_pt->pte_hi)
478		return (1);
479
480	return (0);
481}
482
483static __inline int
484moea_pte_match(struct pte *pt, u_int sr, vm_offset_t va, int which)
485{
486	return (pt->pte_hi & ~PTE_VALID) ==
487	    (((sr & SR_VSID_MASK) << PTE_VSID_SHFT) |
488	    ((va >> ADDR_API_SHFT) & PTE_API) | which);
489}
490
491static __inline void
492moea_pte_create(struct pte *pt, u_int sr, vm_offset_t va, u_int pte_lo)
493{
494
495	mtx_assert(&moea_table_mutex, MA_OWNED);
496
497	/*
498	 * Construct a PTE.  Default to IMB initially.  Valid bit only gets
499	 * set when the real pte is set in memory.
500	 *
501	 * Note: Don't set the valid bit for correct operation of tlb update.
502	 */
503	pt->pte_hi = ((sr & SR_VSID_MASK) << PTE_VSID_SHFT) |
504	    (((va & ADDR_PIDX) >> ADDR_API_SHFT) & PTE_API);
505	pt->pte_lo = pte_lo;
506}
507
508static __inline void
509moea_pte_synch(struct pte *pt, struct pte *pvo_pt)
510{
511
512	mtx_assert(&moea_table_mutex, MA_OWNED);
513	pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF | PTE_CHG);
514}
515
516static __inline void
517moea_pte_clear(struct pte *pt, vm_offset_t va, int ptebit)
518{
519
520	mtx_assert(&moea_table_mutex, MA_OWNED);
521
522	/*
523	 * As shown in Section 7.6.3.2.3
524	 */
525	pt->pte_lo &= ~ptebit;
526	tlbie(va);
527}
528
529static __inline void
530moea_pte_set(struct pte *pt, struct pte *pvo_pt)
531{
532
533	mtx_assert(&moea_table_mutex, MA_OWNED);
534	pvo_pt->pte_hi |= PTE_VALID;
535
536	/*
537	 * Update the PTE as defined in section 7.6.3.1.
538	 * Note that the REF/CHG bits are from pvo_pt and thus should havce
539	 * been saved so this routine can restore them (if desired).
540	 */
541	pt->pte_lo = pvo_pt->pte_lo;
542	powerpc_sync();
543	pt->pte_hi = pvo_pt->pte_hi;
544	powerpc_sync();
545	moea_pte_valid++;
546}
547
548static __inline void
549moea_pte_unset(struct pte *pt, struct pte *pvo_pt, vm_offset_t va)
550{
551
552	mtx_assert(&moea_table_mutex, MA_OWNED);
553	pvo_pt->pte_hi &= ~PTE_VALID;
554
555	/*
556	 * Force the reg & chg bits back into the PTEs.
557	 */
558	powerpc_sync();
559
560	/*
561	 * Invalidate the pte.
562	 */
563	pt->pte_hi &= ~PTE_VALID;
564
565	tlbie(va);
566
567	/*
568	 * Save the reg & chg bits.
569	 */
570	moea_pte_synch(pt, pvo_pt);
571	moea_pte_valid--;
572}
573
574static __inline void
575moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va)
576{
577
578	/*
579	 * Invalidate the PTE
580	 */
581	moea_pte_unset(pt, pvo_pt, va);
582	moea_pte_set(pt, pvo_pt);
583}
584
585/*
586 * Quick sort callout for comparing memory regions.
587 */
588static int	mr_cmp(const void *a, const void *b);
589static int	om_cmp(const void *a, const void *b);
590
591static int
592mr_cmp(const void *a, const void *b)
593{
594	const struct	mem_region *regiona;
595	const struct	mem_region *regionb;
596
597	regiona = a;
598	regionb = b;
599	if (regiona->mr_start < regionb->mr_start)
600		return (-1);
601	else if (regiona->mr_start > regionb->mr_start)
602		return (1);
603	else
604		return (0);
605}
606
607static int
608om_cmp(const void *a, const void *b)
609{
610	const struct	ofw_map *mapa;
611	const struct	ofw_map *mapb;
612
613	mapa = a;
614	mapb = b;
615	if (mapa->om_pa < mapb->om_pa)
616		return (-1);
617	else if (mapa->om_pa > mapb->om_pa)
618		return (1);
619	else
620		return (0);
621}
622
623void
624moea_cpu_bootstrap(mmu_t mmup, int ap)
625{
626	u_int sdr;
627	int i;
628
629	if (ap) {
630		powerpc_sync();
631		__asm __volatile("mtdbatu 0,%0" :: "r"(battable[0].batu));
632		__asm __volatile("mtdbatl 0,%0" :: "r"(battable[0].batl));
633		isync();
634		__asm __volatile("mtibatu 0,%0" :: "r"(battable[0].batu));
635		__asm __volatile("mtibatl 0,%0" :: "r"(battable[0].batl));
636		isync();
637	}
638
639	__asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu));
640	__asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl));
641	isync();
642
643	__asm __volatile("mtibatu 1,%0" :: "r"(0));
644	__asm __volatile("mtdbatu 2,%0" :: "r"(0));
645	__asm __volatile("mtibatu 2,%0" :: "r"(0));
646	__asm __volatile("mtdbatu 3,%0" :: "r"(0));
647	__asm __volatile("mtibatu 3,%0" :: "r"(0));
648	isync();
649
650	for (i = 0; i < 16; i++)
651		mtsrin(i << ADDR_SR_SHFT, EMPTY_SEGMENT);
652
653	__asm __volatile("mtsr %0,%1" :: "n"(KERNEL_SR), "r"(KERNEL_SEGMENT));
654	__asm __volatile("mtsr %0,%1" :: "n"(KERNEL2_SR), "r"(KERNEL2_SEGMENT));
655	powerpc_sync();
656
657	sdr = (u_int)moea_pteg_table | (moea_pteg_mask >> 10);
658	__asm __volatile("mtsdr1 %0" :: "r"(sdr));
659	isync();
660
661	tlbia();
662}
663
664void
665moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
666{
667	ihandle_t	mmui;
668	phandle_t	chosen, mmu;
669	int		sz;
670	int		i, j;
671	int		ofw_mappings;
672	vm_size_t	size, physsz, hwphyssz;
673	vm_offset_t	pa, va, off;
674	void		*dpcpu;
675	register_t	msr;
676
677        /*
678         * Set up BAT0 to map the lowest 256 MB area
679         */
680        battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW);
681        battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs);
682
683        /*
684         * Map PCI memory space.
685         */
686        battable[0x8].batl = BATL(0x80000000, BAT_I|BAT_G, BAT_PP_RW);
687        battable[0x8].batu = BATU(0x80000000, BAT_BL_256M, BAT_Vs);
688
689        battable[0x9].batl = BATL(0x90000000, BAT_I|BAT_G, BAT_PP_RW);
690        battable[0x9].batu = BATU(0x90000000, BAT_BL_256M, BAT_Vs);
691
692        battable[0xa].batl = BATL(0xa0000000, BAT_I|BAT_G, BAT_PP_RW);
693        battable[0xa].batu = BATU(0xa0000000, BAT_BL_256M, BAT_Vs);
694
695        battable[0xb].batl = BATL(0xb0000000, BAT_I|BAT_G, BAT_PP_RW);
696        battable[0xb].batu = BATU(0xb0000000, BAT_BL_256M, BAT_Vs);
697
698        /*
699         * Map obio devices.
700         */
701        battable[0xf].batl = BATL(0xf0000000, BAT_I|BAT_G, BAT_PP_RW);
702        battable[0xf].batu = BATU(0xf0000000, BAT_BL_256M, BAT_Vs);
703
704	/*
705	 * Use an IBAT and a DBAT to map the bottom segment of memory
706	 * where we are. Turn off instruction relocation temporarily
707	 * to prevent faults while reprogramming the IBAT.
708	 */
709	msr = mfmsr();
710	mtmsr(msr & ~PSL_IR);
711	__asm (".balign 32; \n"
712	       "mtibatu 0,%0; mtibatl 0,%1; isync; \n"
713	       "mtdbatu 0,%0; mtdbatl 0,%1; isync"
714	    :: "r"(battable[0].batu), "r"(battable[0].batl));
715	mtmsr(msr);
716
717	/* map pci space */
718	__asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu));
719	__asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl));
720	isync();
721
722	/* set global direct map flag */
723	hw_direct_map = 1;
724
725	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
726	CTR0(KTR_PMAP, "moea_bootstrap: physical memory");
727
728	qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp);
729	for (i = 0; i < pregions_sz; i++) {
730		vm_offset_t pa;
731		vm_offset_t end;
732
733		CTR3(KTR_PMAP, "physregion: %#x - %#x (%#x)",
734			pregions[i].mr_start,
735			pregions[i].mr_start + pregions[i].mr_size,
736			pregions[i].mr_size);
737		/*
738		 * Install entries into the BAT table to allow all
739		 * of physmem to be convered by on-demand BAT entries.
740		 * The loop will sometimes set the same battable element
741		 * twice, but that's fine since they won't be used for
742		 * a while yet.
743		 */
744		pa = pregions[i].mr_start & 0xf0000000;
745		end = pregions[i].mr_start + pregions[i].mr_size;
746		do {
747                        u_int n = pa >> ADDR_SR_SHFT;
748
749			battable[n].batl = BATL(pa, BAT_M, BAT_PP_RW);
750			battable[n].batu = BATU(pa, BAT_BL_256M, BAT_Vs);
751			pa += SEGMENT_LENGTH;
752		} while (pa < end);
753	}
754
755	if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz)
756		panic("moea_bootstrap: phys_avail too small");
757	qsort(regions, regions_sz, sizeof(*regions), mr_cmp);
758	phys_avail_count = 0;
759	physsz = 0;
760	hwphyssz = 0;
761	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
762	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
763		CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start,
764		    regions[i].mr_start + regions[i].mr_size,
765		    regions[i].mr_size);
766		if (hwphyssz != 0 &&
767		    (physsz + regions[i].mr_size) >= hwphyssz) {
768			if (physsz < hwphyssz) {
769				phys_avail[j] = regions[i].mr_start;
770				phys_avail[j + 1] = regions[i].mr_start +
771				    hwphyssz - physsz;
772				physsz = hwphyssz;
773				phys_avail_count++;
774			}
775			break;
776		}
777		phys_avail[j] = regions[i].mr_start;
778		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
779		phys_avail_count++;
780		physsz += regions[i].mr_size;
781	}
782	physmem = btoc(physsz);
783
784	/*
785	 * Allocate PTEG table.
786	 */
787#ifdef PTEGCOUNT
788	moea_pteg_count = PTEGCOUNT;
789#else
790	moea_pteg_count = 0x1000;
791
792	while (moea_pteg_count < physmem)
793		moea_pteg_count <<= 1;
794
795	moea_pteg_count >>= 1;
796#endif /* PTEGCOUNT */
797
798	size = moea_pteg_count * sizeof(struct pteg);
799	CTR2(KTR_PMAP, "moea_bootstrap: %d PTEGs, %d bytes", moea_pteg_count,
800	    size);
801	moea_pteg_table = (struct pteg *)moea_bootstrap_alloc(size, size);
802	CTR1(KTR_PMAP, "moea_bootstrap: PTEG table at %p", moea_pteg_table);
803	bzero((void *)moea_pteg_table, moea_pteg_count * sizeof(struct pteg));
804	moea_pteg_mask = moea_pteg_count - 1;
805
806	/*
807	 * Allocate pv/overflow lists.
808	 */
809	size = sizeof(struct pvo_head) * moea_pteg_count;
810	moea_pvo_table = (struct pvo_head *)moea_bootstrap_alloc(size,
811	    PAGE_SIZE);
812	CTR1(KTR_PMAP, "moea_bootstrap: PVO table at %p", moea_pvo_table);
813	for (i = 0; i < moea_pteg_count; i++)
814		LIST_INIT(&moea_pvo_table[i]);
815
816	/*
817	 * Initialize the lock that synchronizes access to the pteg and pvo
818	 * tables.
819	 */
820	mtx_init(&moea_table_mutex, "pmap table", NULL, MTX_DEF |
821	    MTX_RECURSE);
822
823	mtx_init(&tlbie_mtx, "tlbie", NULL, MTX_SPIN);
824
825	/*
826	 * Initialise the unmanaged pvo pool.
827	 */
828	moea_bpvo_pool = (struct pvo_entry *)moea_bootstrap_alloc(
829		BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0);
830	moea_bpvo_pool_index = 0;
831
832	/*
833	 * Make sure kernel vsid is allocated as well as VSID 0.
834	 */
835	moea_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS - 1)) / VSID_NBPW]
836		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
837	moea_vsid_bitmap[0] |= 1;
838
839	/*
840	 * Set up the Open Firmware pmap and add it's mappings.
841	 */
842	moea_pinit(mmup, &ofw_pmap);
843	ofw_pmap.pm_sr[KERNEL_SR] = KERNEL_SEGMENT;
844	ofw_pmap.pm_sr[KERNEL2_SR] = KERNEL2_SEGMENT;
845	if ((chosen = OF_finddevice("/chosen")) == -1)
846		panic("moea_bootstrap: can't find /chosen");
847	OF_getprop(chosen, "mmu", &mmui, 4);
848	if ((mmu = OF_instance_to_package(mmui)) == -1)
849		panic("moea_bootstrap: can't get mmu package");
850	if ((sz = OF_getproplen(mmu, "translations")) == -1)
851		panic("moea_bootstrap: can't get ofw translation count");
852	translations = NULL;
853	for (i = 0; phys_avail[i] != 0; i += 2) {
854		if (phys_avail[i + 1] >= sz) {
855			translations = (struct ofw_map *)phys_avail[i];
856			break;
857		}
858	}
859	if (translations == NULL)
860		panic("moea_bootstrap: no space to copy translations");
861	bzero(translations, sz);
862	if (OF_getprop(mmu, "translations", translations, sz) == -1)
863		panic("moea_bootstrap: can't get ofw translations");
864	CTR0(KTR_PMAP, "moea_bootstrap: translations");
865	sz /= sizeof(*translations);
866	qsort(translations, sz, sizeof (*translations), om_cmp);
867	for (i = 0, ofw_mappings = 0; i < sz; i++) {
868		CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x",
869		    translations[i].om_pa, translations[i].om_va,
870		    translations[i].om_len);
871
872		/*
873		 * If the mapping is 1:1, let the RAM and device on-demand
874		 * BAT tables take care of the translation.
875		 */
876		if (translations[i].om_va == translations[i].om_pa)
877			continue;
878
879		/* Enter the pages */
880		for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
881			struct	vm_page m;
882
883			m.phys_addr = translations[i].om_pa + off;
884			m.oflags = VPO_BUSY;
885			PMAP_LOCK(&ofw_pmap);
886			moea_enter_locked(&ofw_pmap,
887				   translations[i].om_va + off, &m,
888				   VM_PROT_ALL, 1);
889			PMAP_UNLOCK(&ofw_pmap);
890			ofw_mappings++;
891		}
892	}
893
894	/*
895	 * Calculate the last available physical address.
896	 */
897	for (i = 0; phys_avail[i + 2] != 0; i += 2)
898		;
899	Maxmem = powerpc_btop(phys_avail[i + 1]);
900
901	/*
902	 * Initialize the kernel pmap (which is statically allocated).
903	 */
904	PMAP_LOCK_INIT(kernel_pmap);
905	for (i = 0; i < 16; i++) {
906		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT;
907	}
908	kernel_pmap->pm_sr[KERNEL_SR] = KERNEL_SEGMENT;
909	kernel_pmap->pm_sr[KERNEL2_SR] = KERNEL2_SEGMENT;
910	kernel_pmap->pm_active = ~0;
911
912	moea_cpu_bootstrap(mmup,0);
913
914	pmap_bootstrapped++;
915
916	/*
917	 * Set the start and end of kva.
918	 */
919	virtual_avail = VM_MIN_KERNEL_ADDRESS;
920	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
921
922	/*
923	 * Allocate a kernel stack with a guard page for thread0 and map it
924	 * into the kernel page map.
925	 */
926	pa = moea_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
927	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
928	virtual_avail = va + KSTACK_PAGES * PAGE_SIZE;
929	CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va);
930	thread0.td_kstack = va;
931	thread0.td_kstack_pages = KSTACK_PAGES;
932	for (i = 0; i < KSTACK_PAGES; i++) {
933		moea_kenter(mmup, va, pa);
934		pa += PAGE_SIZE;
935		va += PAGE_SIZE;
936	}
937
938	/*
939	 * Allocate virtual address space for the message buffer.
940	 */
941	pa = msgbuf_phys = moea_bootstrap_alloc(MSGBUF_SIZE, PAGE_SIZE);
942	msgbufp = (struct msgbuf *)virtual_avail;
943	va = virtual_avail;
944	virtual_avail += round_page(MSGBUF_SIZE);
945	while (va < virtual_avail) {
946		moea_kenter(mmup, va, pa);
947		pa += PAGE_SIZE;
948		va += PAGE_SIZE;
949	}
950
951	/*
952	 * Allocate virtual address space for the dynamic percpu area.
953	 */
954	pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
955	dpcpu = (void *)virtual_avail;
956	va = virtual_avail;
957	virtual_avail += DPCPU_SIZE;
958	while (va < virtual_avail) {
959		moea_kenter(mmup, va, pa);
960		pa += PAGE_SIZE;
961		va += PAGE_SIZE;
962	}
963	dpcpu_init(dpcpu, 0);
964}
965
966/*
967 * Activate a user pmap.  The pmap must be activated before it's address
968 * space can be accessed in any way.
969 */
970void
971moea_activate(mmu_t mmu, struct thread *td)
972{
973	pmap_t	pm, pmr;
974
975	/*
976	 * Load all the data we need up front to encourage the compiler to
977	 * not issue any loads while we have interrupts disabled below.
978	 */
979	pm = &td->td_proc->p_vmspace->vm_pmap;
980	pmr = pm->pmap_phys;
981
982	pm->pm_active |= PCPU_GET(cpumask);
983	PCPU_SET(curpmap, pmr);
984}
985
986void
987moea_deactivate(mmu_t mmu, struct thread *td)
988{
989	pmap_t	pm;
990
991	pm = &td->td_proc->p_vmspace->vm_pmap;
992	pm->pm_active &= ~PCPU_GET(cpumask);
993	PCPU_SET(curpmap, NULL);
994}
995
996void
997moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired)
998{
999	struct	pvo_entry *pvo;
1000
1001	PMAP_LOCK(pm);
1002	pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
1003
1004	if (pvo != NULL) {
1005		if (wired) {
1006			if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1007				pm->pm_stats.wired_count++;
1008			pvo->pvo_vaddr |= PVO_WIRED;
1009		} else {
1010			if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
1011				pm->pm_stats.wired_count--;
1012			pvo->pvo_vaddr &= ~PVO_WIRED;
1013		}
1014	}
1015	PMAP_UNLOCK(pm);
1016}
1017
1018void
1019moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
1020{
1021	vm_offset_t	dst;
1022	vm_offset_t	src;
1023
1024	dst = VM_PAGE_TO_PHYS(mdst);
1025	src = VM_PAGE_TO_PHYS(msrc);
1026
1027	kcopy((void *)src, (void *)dst, PAGE_SIZE);
1028}
1029
1030/*
1031 * Zero a page of physical memory by temporarily mapping it into the tlb.
1032 */
1033void
1034moea_zero_page(mmu_t mmu, vm_page_t m)
1035{
1036	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
1037	void *va = (void *)pa;
1038
1039	bzero(va, PAGE_SIZE);
1040}
1041
1042void
1043moea_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
1044{
1045	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
1046	void *va = (void *)(pa + off);
1047
1048	bzero(va, size);
1049}
1050
1051void
1052moea_zero_page_idle(mmu_t mmu, vm_page_t m)
1053{
1054	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
1055	void *va = (void *)pa;
1056
1057	bzero(va, PAGE_SIZE);
1058}
1059
1060/*
1061 * Map the given physical page at the specified virtual address in the
1062 * target pmap with the protection requested.  If specified the page
1063 * will be wired down.
1064 */
1065void
1066moea_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1067	   boolean_t wired)
1068{
1069
1070	vm_page_lock_queues();
1071	PMAP_LOCK(pmap);
1072	moea_enter_locked(pmap, va, m, prot, wired);
1073	vm_page_unlock_queues();
1074	PMAP_UNLOCK(pmap);
1075}
1076
1077/*
1078 * Map the given physical page at the specified virtual address in the
1079 * target pmap with the protection requested.  If specified the page
1080 * will be wired down.
1081 *
1082 * The page queues and pmap must be locked.
1083 */
1084static void
1085moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1086    boolean_t wired)
1087{
1088	struct		pvo_head *pvo_head;
1089	uma_zone_t	zone;
1090	vm_page_t	pg;
1091	u_int		pte_lo, pvo_flags, was_exec, i;
1092	int		error;
1093
1094	if (!moea_initialized) {
1095		pvo_head = &moea_pvo_kunmanaged;
1096		zone = moea_upvo_zone;
1097		pvo_flags = 0;
1098		pg = NULL;
1099		was_exec = PTE_EXEC;
1100	} else {
1101		pvo_head = vm_page_to_pvoh(m);
1102		pg = m;
1103		zone = moea_mpvo_zone;
1104		pvo_flags = PVO_MANAGED;
1105		was_exec = 0;
1106	}
1107	if (pmap_bootstrapped)
1108		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1109	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1110	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1111	    (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object),
1112	    ("moea_enter_locked: page %p is not busy", m));
1113
1114	/* XXX change the pvo head for fake pages */
1115	if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) {
1116		pvo_flags &= ~PVO_MANAGED;
1117		pvo_head = &moea_pvo_kunmanaged;
1118		zone = moea_upvo_zone;
1119	}
1120
1121	/*
1122	 * If this is a managed page, and it's the first reference to the page,
1123	 * clear the execness of the page.  Otherwise fetch the execness.
1124	 */
1125	if ((pg != NULL) && ((m->flags & PG_FICTITIOUS) == 0)) {
1126		if (LIST_EMPTY(pvo_head)) {
1127			moea_attr_clear(pg, PTE_EXEC);
1128		} else {
1129			was_exec = moea_attr_fetch(pg) & PTE_EXEC;
1130		}
1131	}
1132
1133	/*
1134	 * Assume the page is cache inhibited and access is guarded unless
1135	 * it's in our available memory array.
1136	 */
1137	pte_lo = PTE_I | PTE_G;
1138	for (i = 0; i < pregions_sz; i++) {
1139		if ((VM_PAGE_TO_PHYS(m) >= pregions[i].mr_start) &&
1140		    (VM_PAGE_TO_PHYS(m) <
1141			(pregions[i].mr_start + pregions[i].mr_size))) {
1142			pte_lo = PTE_M;
1143			break;
1144		}
1145	}
1146
1147	if (prot & VM_PROT_WRITE) {
1148		pte_lo |= PTE_BW;
1149		if (pmap_bootstrapped &&
1150		    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
1151			vm_page_flag_set(m, PG_WRITEABLE);
1152	} else
1153		pte_lo |= PTE_BR;
1154
1155	if (prot & VM_PROT_EXECUTE)
1156		pvo_flags |= PVO_EXECUTABLE;
1157
1158	if (wired)
1159		pvo_flags |= PVO_WIRED;
1160
1161	if ((m->flags & PG_FICTITIOUS) != 0)
1162		pvo_flags |= PVO_FAKE;
1163
1164	error = moea_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m),
1165	    pte_lo, pvo_flags);
1166
1167	/*
1168	 * Flush the real page from the instruction cache if this page is
1169	 * mapped executable and cacheable and was not previously mapped (or
1170	 * was not mapped executable).
1171	 */
1172	if (error == 0 && (pvo_flags & PVO_EXECUTABLE) &&
1173	    (pte_lo & PTE_I) == 0 && was_exec == 0) {
1174		/*
1175		 * Flush the real memory from the cache.
1176		 */
1177		moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE);
1178		if (pg != NULL)
1179			moea_attr_save(pg, PTE_EXEC);
1180	}
1181
1182	/* XXX syncicache always until problems are sorted */
1183	moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE);
1184}
1185
1186/*
1187 * Maps a sequence of resident pages belonging to the same object.
1188 * The sequence begins with the given page m_start.  This page is
1189 * mapped at the given virtual address start.  Each subsequent page is
1190 * mapped at a virtual address that is offset from start by the same
1191 * amount as the page is offset from m_start within the object.  The
1192 * last page in the sequence is the page with the largest offset from
1193 * m_start that can be mapped at a virtual address less than the given
1194 * virtual address end.  Not every virtual page between start and end
1195 * is mapped; only those for which a resident page exists with the
1196 * corresponding offset from m_start are mapped.
1197 */
1198void
1199moea_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
1200    vm_page_t m_start, vm_prot_t prot)
1201{
1202	vm_page_t m;
1203	vm_pindex_t diff, psize;
1204
1205	psize = atop(end - start);
1206	m = m_start;
1207	vm_page_lock_queues();
1208	PMAP_LOCK(pm);
1209	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1210		moea_enter_locked(pm, start + ptoa(diff), m, prot &
1211		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1212		m = TAILQ_NEXT(m, listq);
1213	}
1214	vm_page_unlock_queues();
1215	PMAP_UNLOCK(pm);
1216}
1217
1218void
1219moea_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
1220    vm_prot_t prot)
1221{
1222
1223	vm_page_lock_queues();
1224	PMAP_LOCK(pm);
1225	moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1226	    FALSE);
1227	vm_page_unlock_queues();
1228	PMAP_UNLOCK(pm);
1229}
1230
1231vm_paddr_t
1232moea_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
1233{
1234	struct	pvo_entry *pvo;
1235	vm_paddr_t pa;
1236
1237	PMAP_LOCK(pm);
1238	pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
1239	if (pvo == NULL)
1240		pa = 0;
1241	else
1242		pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF);
1243	PMAP_UNLOCK(pm);
1244	return (pa);
1245}
1246
1247/*
1248 * Atomically extract and hold the physical page with the given
1249 * pmap and virtual address pair if that mapping permits the given
1250 * protection.
1251 */
1252vm_page_t
1253moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1254{
1255	struct	pvo_entry *pvo;
1256	vm_page_t m;
1257        vm_paddr_t pa;
1258
1259	m = NULL;
1260	pa = 0;
1261	PMAP_LOCK(pmap);
1262retry:
1263	pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL);
1264	if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) &&
1265	    ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW ||
1266	     (prot & VM_PROT_WRITE) == 0)) {
1267		if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa))
1268			goto retry;
1269		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
1270		vm_page_hold(m);
1271	}
1272	PA_UNLOCK_COND(pa);
1273	PMAP_UNLOCK(pmap);
1274	return (m);
1275}
1276
1277void
1278moea_init(mmu_t mmu)
1279{
1280
1281	moea_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1282	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1283	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1284	moea_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry),
1285	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1286	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1287	moea_initialized = TRUE;
1288}
1289
1290boolean_t
1291moea_is_referenced(mmu_t mmu, vm_page_t m)
1292{
1293
1294	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1295	    ("moea_is_referenced: page %p is not managed", m));
1296	return (moea_query_bit(m, PTE_REF));
1297}
1298
1299boolean_t
1300moea_is_modified(mmu_t mmu, vm_page_t m)
1301{
1302
1303	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1304	    ("moea_is_modified: page %p is not managed", m));
1305
1306	/*
1307	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
1308	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
1309	 * is clear, no PTEs can have PTE_CHG set.
1310	 */
1311	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1312	if ((m->oflags & VPO_BUSY) == 0 &&
1313	    (m->flags & PG_WRITEABLE) == 0)
1314		return (FALSE);
1315	return (moea_query_bit(m, PTE_CHG));
1316}
1317
1318void
1319moea_clear_reference(mmu_t mmu, vm_page_t m)
1320{
1321
1322	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1323	    ("moea_clear_reference: page %p is not managed", m));
1324	moea_clear_bit(m, PTE_REF);
1325}
1326
1327void
1328moea_clear_modify(mmu_t mmu, vm_page_t m)
1329{
1330
1331	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1332	    ("moea_clear_modify: page %p is not managed", m));
1333	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1334	KASSERT((m->oflags & VPO_BUSY) == 0,
1335	    ("moea_clear_modify: page %p is busy", m));
1336
1337	/*
1338	 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_CHG
1339	 * set.  If the object containing the page is locked and the page is
1340	 * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
1341	 */
1342	if ((m->flags & PG_WRITEABLE) == 0)
1343		return;
1344	moea_clear_bit(m, PTE_CHG);
1345}
1346
1347/*
1348 * Clear the write and modified bits in each of the given page's mappings.
1349 */
1350void
1351moea_remove_write(mmu_t mmu, vm_page_t m)
1352{
1353	struct	pvo_entry *pvo;
1354	struct	pte *pt;
1355	pmap_t	pmap;
1356	u_int	lo;
1357
1358	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1359	    ("moea_remove_write: page %p is not managed", m));
1360
1361	/*
1362	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
1363	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
1364	 * is clear, no page table entries need updating.
1365	 */
1366	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1367	if ((m->oflags & VPO_BUSY) == 0 &&
1368	    (m->flags & PG_WRITEABLE) == 0)
1369		return;
1370	vm_page_lock_queues();
1371	lo = moea_attr_fetch(m);
1372	powerpc_sync();
1373	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
1374		pmap = pvo->pvo_pmap;
1375		PMAP_LOCK(pmap);
1376		if ((pvo->pvo_pte.pte.pte_lo & PTE_PP) != PTE_BR) {
1377			pt = moea_pvo_to_pte(pvo, -1);
1378			pvo->pvo_pte.pte.pte_lo &= ~PTE_PP;
1379			pvo->pvo_pte.pte.pte_lo |= PTE_BR;
1380			if (pt != NULL) {
1381				moea_pte_synch(pt, &pvo->pvo_pte.pte);
1382				lo |= pvo->pvo_pte.pte.pte_lo;
1383				pvo->pvo_pte.pte.pte_lo &= ~PTE_CHG;
1384				moea_pte_change(pt, &pvo->pvo_pte.pte,
1385				    pvo->pvo_vaddr);
1386				mtx_unlock(&moea_table_mutex);
1387			}
1388		}
1389		PMAP_UNLOCK(pmap);
1390	}
1391	if ((lo & PTE_CHG) != 0) {
1392		moea_attr_clear(m, PTE_CHG);
1393		vm_page_dirty(m);
1394	}
1395	vm_page_flag_clear(m, PG_WRITEABLE);
1396	vm_page_unlock_queues();
1397}
1398
1399/*
1400 *	moea_ts_referenced:
1401 *
1402 *	Return a count of reference bits for a page, clearing those bits.
1403 *	It is not necessary for every reference bit to be cleared, but it
1404 *	is necessary that 0 only be returned when there are truly no
1405 *	reference bits set.
1406 *
1407 *	XXX: The exact number of bits to check and clear is a matter that
1408 *	should be tested and standardized at some point in the future for
1409 *	optimal aging of shared pages.
1410 */
1411boolean_t
1412moea_ts_referenced(mmu_t mmu, vm_page_t m)
1413{
1414
1415	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1416	    ("moea_ts_referenced: page %p is not managed", m));
1417	return (moea_clear_bit(m, PTE_REF));
1418}
1419
1420/*
1421 * Map a wired page into kernel virtual address space.
1422 */
1423void
1424moea_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
1425{
1426	u_int		pte_lo;
1427	int		error;
1428	int		i;
1429
1430#if 0
1431	if (va < VM_MIN_KERNEL_ADDRESS)
1432		panic("moea_kenter: attempt to enter non-kernel address %#x",
1433		    va);
1434#endif
1435
1436	pte_lo = PTE_I | PTE_G;
1437	for (i = 0; i < pregions_sz; i++) {
1438		if ((pa >= pregions[i].mr_start) &&
1439		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
1440			pte_lo = PTE_M;
1441			break;
1442		}
1443	}
1444
1445	PMAP_LOCK(kernel_pmap);
1446	error = moea_pvo_enter(kernel_pmap, moea_upvo_zone,
1447	    &moea_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED);
1448
1449	if (error != 0 && error != ENOENT)
1450		panic("moea_kenter: failed to enter va %#x pa %#x: %d", va,
1451		    pa, error);
1452
1453	/*
1454	 * Flush the real memory from the instruction cache.
1455	 */
1456	if ((pte_lo & (PTE_I | PTE_G)) == 0) {
1457		moea_syncicache(pa, PAGE_SIZE);
1458	}
1459	PMAP_UNLOCK(kernel_pmap);
1460}
1461
1462/*
1463 * Extract the physical page address associated with the given kernel virtual
1464 * address.
1465 */
1466vm_offset_t
1467moea_kextract(mmu_t mmu, vm_offset_t va)
1468{
1469	struct		pvo_entry *pvo;
1470	vm_paddr_t pa;
1471
1472	/*
1473	 * Allow direct mappings on 32-bit OEA
1474	 */
1475	if (va < VM_MIN_KERNEL_ADDRESS) {
1476		return (va);
1477	}
1478
1479	PMAP_LOCK(kernel_pmap);
1480	pvo = moea_pvo_find_va(kernel_pmap, va & ~ADDR_POFF, NULL);
1481	KASSERT(pvo != NULL, ("moea_kextract: no addr found"));
1482	pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF);
1483	PMAP_UNLOCK(kernel_pmap);
1484	return (pa);
1485}
1486
1487/*
1488 * Remove a wired page from kernel virtual address space.
1489 */
1490void
1491moea_kremove(mmu_t mmu, vm_offset_t va)
1492{
1493
1494	moea_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
1495}
1496
1497/*
1498 * Map a range of physical addresses into kernel virtual address space.
1499 *
1500 * The value passed in *virt is a suggested virtual address for the mapping.
1501 * Architectures which can support a direct-mapped physical to virtual region
1502 * can return the appropriate address within that region, leaving '*virt'
1503 * unchanged.  We cannot and therefore do not; *virt is updated with the
1504 * first usable address after the mapped region.
1505 */
1506vm_offset_t
1507moea_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
1508    vm_offset_t pa_end, int prot)
1509{
1510	vm_offset_t	sva, va;
1511
1512	sva = *virt;
1513	va = sva;
1514	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
1515		moea_kenter(mmu, va, pa_start);
1516	*virt = va;
1517	return (sva);
1518}
1519
1520/*
1521 * Returns true if the pmap's pv is one of the first
1522 * 16 pvs linked to from this page.  This count may
1523 * be changed upwards or downwards in the future; it
1524 * is only necessary that true be returned for a small
1525 * subset of pmaps for proper page aging.
1526 */
1527boolean_t
1528moea_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
1529{
1530        int loops;
1531	struct pvo_entry *pvo;
1532	boolean_t rv;
1533
1534	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1535	    ("moea_page_exists_quick: page %p is not managed", m));
1536	loops = 0;
1537	rv = FALSE;
1538	vm_page_lock_queues();
1539	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
1540		if (pvo->pvo_pmap == pmap) {
1541			rv = TRUE;
1542			break;
1543		}
1544		if (++loops >= 16)
1545			break;
1546	}
1547	vm_page_unlock_queues();
1548	return (rv);
1549}
1550
1551/*
1552 * Return the number of managed mappings to the given physical page
1553 * that are wired.
1554 */
1555int
1556moea_page_wired_mappings(mmu_t mmu, vm_page_t m)
1557{
1558	struct pvo_entry *pvo;
1559	int count;
1560
1561	count = 0;
1562	if ((m->flags & PG_FICTITIOUS) != 0)
1563		return (count);
1564	vm_page_lock_queues();
1565	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
1566		if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
1567			count++;
1568	vm_page_unlock_queues();
1569	return (count);
1570}
1571
1572static u_int	moea_vsidcontext;
1573
1574void
1575moea_pinit(mmu_t mmu, pmap_t pmap)
1576{
1577	int	i, mask;
1578	u_int	entropy;
1579
1580	KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap"));
1581	PMAP_LOCK_INIT(pmap);
1582
1583	entropy = 0;
1584	__asm __volatile("mftb %0" : "=r"(entropy));
1585
1586	if ((pmap->pmap_phys = (pmap_t)moea_kextract(mmu, (vm_offset_t)pmap))
1587	    == NULL) {
1588		pmap->pmap_phys = pmap;
1589	}
1590
1591
1592	/*
1593	 * Allocate some segment registers for this pmap.
1594	 */
1595	for (i = 0; i < NPMAPS; i += VSID_NBPW) {
1596		u_int	hash, n;
1597
1598		/*
1599		 * Create a new value by mutiplying by a prime and adding in
1600		 * entropy from the timebase register.  This is to make the
1601		 * VSID more random so that the PT hash function collides
1602		 * less often.  (Note that the prime casues gcc to do shifts
1603		 * instead of a multiply.)
1604		 */
1605		moea_vsidcontext = (moea_vsidcontext * 0x1105) + entropy;
1606		hash = moea_vsidcontext & (NPMAPS - 1);
1607		if (hash == 0)		/* 0 is special, avoid it */
1608			continue;
1609		n = hash >> 5;
1610		mask = 1 << (hash & (VSID_NBPW - 1));
1611		hash = (moea_vsidcontext & 0xfffff);
1612		if (moea_vsid_bitmap[n] & mask) {	/* collision? */
1613			/* anything free in this bucket? */
1614			if (moea_vsid_bitmap[n] == 0xffffffff) {
1615				entropy = (moea_vsidcontext >> 20);
1616				continue;
1617			}
1618			i = ffs(~moea_vsid_bitmap[i]) - 1;
1619			mask = 1 << i;
1620			hash &= 0xfffff & ~(VSID_NBPW - 1);
1621			hash |= i;
1622		}
1623		moea_vsid_bitmap[n] |= mask;
1624		for (i = 0; i < 16; i++)
1625			pmap->pm_sr[i] = VSID_MAKE(i, hash);
1626		return;
1627	}
1628
1629	panic("moea_pinit: out of segments");
1630}
1631
1632/*
1633 * Initialize the pmap associated with process 0.
1634 */
1635void
1636moea_pinit0(mmu_t mmu, pmap_t pm)
1637{
1638
1639	moea_pinit(mmu, pm);
1640	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1641}
1642
1643/*
1644 * Set the physical protection on the specified range of this map as requested.
1645 */
1646void
1647moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
1648    vm_prot_t prot)
1649{
1650	struct	pvo_entry *pvo;
1651	struct	pte *pt;
1652	int	pteidx;
1653
1654	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1655	    ("moea_protect: non current pmap"));
1656
1657	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1658		moea_remove(mmu, pm, sva, eva);
1659		return;
1660	}
1661
1662	vm_page_lock_queues();
1663	PMAP_LOCK(pm);
1664	for (; sva < eva; sva += PAGE_SIZE) {
1665		pvo = moea_pvo_find_va(pm, sva, &pteidx);
1666		if (pvo == NULL)
1667			continue;
1668
1669		if ((prot & VM_PROT_EXECUTE) == 0)
1670			pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
1671
1672		/*
1673		 * Grab the PTE pointer before we diddle with the cached PTE
1674		 * copy.
1675		 */
1676		pt = moea_pvo_to_pte(pvo, pteidx);
1677		/*
1678		 * Change the protection of the page.
1679		 */
1680		pvo->pvo_pte.pte.pte_lo &= ~PTE_PP;
1681		pvo->pvo_pte.pte.pte_lo |= PTE_BR;
1682
1683		/*
1684		 * If the PVO is in the page table, update that pte as well.
1685		 */
1686		if (pt != NULL) {
1687			moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr);
1688			mtx_unlock(&moea_table_mutex);
1689		}
1690	}
1691	vm_page_unlock_queues();
1692	PMAP_UNLOCK(pm);
1693}
1694
1695/*
1696 * Map a list of wired pages into kernel virtual address space.  This is
1697 * intended for temporary mappings which do not need page modification or
1698 * references recorded.  Existing mappings in the region are overwritten.
1699 */
1700void
1701moea_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count)
1702{
1703	vm_offset_t va;
1704
1705	va = sva;
1706	while (count-- > 0) {
1707		moea_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
1708		va += PAGE_SIZE;
1709		m++;
1710	}
1711}
1712
1713/*
1714 * Remove page mappings from kernel virtual address space.  Intended for
1715 * temporary mappings entered by moea_qenter.
1716 */
1717void
1718moea_qremove(mmu_t mmu, vm_offset_t sva, int count)
1719{
1720	vm_offset_t va;
1721
1722	va = sva;
1723	while (count-- > 0) {
1724		moea_kremove(mmu, va);
1725		va += PAGE_SIZE;
1726	}
1727}
1728
1729void
1730moea_release(mmu_t mmu, pmap_t pmap)
1731{
1732        int idx, mask;
1733
1734	/*
1735	 * Free segment register's VSID
1736	 */
1737        if (pmap->pm_sr[0] == 0)
1738                panic("moea_release");
1739
1740        idx = VSID_TO_HASH(pmap->pm_sr[0]) & (NPMAPS-1);
1741        mask = 1 << (idx % VSID_NBPW);
1742        idx /= VSID_NBPW;
1743        moea_vsid_bitmap[idx] &= ~mask;
1744	PMAP_LOCK_DESTROY(pmap);
1745}
1746
1747/*
1748 * Remove the given range of addresses from the specified map.
1749 */
1750void
1751moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1752{
1753	struct	pvo_entry *pvo;
1754	int	pteidx;
1755
1756	vm_page_lock_queues();
1757	PMAP_LOCK(pm);
1758	for (; sva < eva; sva += PAGE_SIZE) {
1759		pvo = moea_pvo_find_va(pm, sva, &pteidx);
1760		if (pvo != NULL) {
1761			moea_pvo_remove(pvo, pteidx);
1762		}
1763	}
1764	PMAP_UNLOCK(pm);
1765	vm_page_unlock_queues();
1766}
1767
1768/*
1769 * Remove physical page from all pmaps in which it resides. moea_pvo_remove()
1770 * will reflect changes in pte's back to the vm_page.
1771 */
1772void
1773moea_remove_all(mmu_t mmu, vm_page_t m)
1774{
1775	struct  pvo_head *pvo_head;
1776	struct	pvo_entry *pvo, *next_pvo;
1777	pmap_t	pmap;
1778
1779	vm_page_lock_queues();
1780	pvo_head = vm_page_to_pvoh(m);
1781	for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
1782		next_pvo = LIST_NEXT(pvo, pvo_vlink);
1783
1784		MOEA_PVO_CHECK(pvo);	/* sanity check */
1785		pmap = pvo->pvo_pmap;
1786		PMAP_LOCK(pmap);
1787		moea_pvo_remove(pvo, -1);
1788		PMAP_UNLOCK(pmap);
1789	}
1790	if ((m->flags & PG_WRITEABLE) && moea_is_modified(mmu, m)) {
1791		moea_attr_clear(m, PTE_CHG);
1792		vm_page_dirty(m);
1793	}
1794	vm_page_flag_clear(m, PG_WRITEABLE);
1795	vm_page_unlock_queues();
1796}
1797
1798/*
1799 * Allocate a physical page of memory directly from the phys_avail map.
1800 * Can only be called from moea_bootstrap before avail start and end are
1801 * calculated.
1802 */
1803static vm_offset_t
1804moea_bootstrap_alloc(vm_size_t size, u_int align)
1805{
1806	vm_offset_t	s, e;
1807	int		i, j;
1808
1809	size = round_page(size);
1810	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1811		if (align != 0)
1812			s = (phys_avail[i] + align - 1) & ~(align - 1);
1813		else
1814			s = phys_avail[i];
1815		e = s + size;
1816
1817		if (s < phys_avail[i] || e > phys_avail[i + 1])
1818			continue;
1819
1820		if (s == phys_avail[i]) {
1821			phys_avail[i] += size;
1822		} else if (e == phys_avail[i + 1]) {
1823			phys_avail[i + 1] -= size;
1824		} else {
1825			for (j = phys_avail_count * 2; j > i; j -= 2) {
1826				phys_avail[j] = phys_avail[j - 2];
1827				phys_avail[j + 1] = phys_avail[j - 1];
1828			}
1829
1830			phys_avail[i + 3] = phys_avail[i + 1];
1831			phys_avail[i + 1] = s;
1832			phys_avail[i + 2] = e;
1833			phys_avail_count++;
1834		}
1835
1836		return (s);
1837	}
1838	panic("moea_bootstrap_alloc: could not allocate memory");
1839}
1840
1841static void
1842moea_syncicache(vm_offset_t pa, vm_size_t len)
1843{
1844	__syncicache((void *)pa, len);
1845}
1846
1847static int
1848moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head,
1849    vm_offset_t va, vm_offset_t pa, u_int pte_lo, int flags)
1850{
1851	struct	pvo_entry *pvo;
1852	u_int	sr;
1853	int	first;
1854	u_int	ptegidx;
1855	int	i;
1856	int     bootstrap;
1857
1858	moea_pvo_enter_calls++;
1859	first = 0;
1860	bootstrap = 0;
1861
1862	/*
1863	 * Compute the PTE Group index.
1864	 */
1865	va &= ~ADDR_POFF;
1866	sr = va_to_sr(pm->pm_sr, va);
1867	ptegidx = va_to_pteg(sr, va);
1868
1869	/*
1870	 * Remove any existing mapping for this page.  Reuse the pvo entry if
1871	 * there is a mapping.
1872	 */
1873	mtx_lock(&moea_table_mutex);
1874	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
1875		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1876			if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa &&
1877			    (pvo->pvo_pte.pte.pte_lo & PTE_PP) ==
1878			    (pte_lo & PTE_PP)) {
1879				mtx_unlock(&moea_table_mutex);
1880				return (0);
1881			}
1882			moea_pvo_remove(pvo, -1);
1883			break;
1884		}
1885	}
1886
1887	/*
1888	 * If we aren't overwriting a mapping, try to allocate.
1889	 */
1890	if (moea_initialized) {
1891		pvo = uma_zalloc(zone, M_NOWAIT);
1892	} else {
1893		if (moea_bpvo_pool_index >= BPVO_POOL_SIZE) {
1894			panic("moea_enter: bpvo pool exhausted, %d, %d, %d",
1895			      moea_bpvo_pool_index, BPVO_POOL_SIZE,
1896			      BPVO_POOL_SIZE * sizeof(struct pvo_entry));
1897		}
1898		pvo = &moea_bpvo_pool[moea_bpvo_pool_index];
1899		moea_bpvo_pool_index++;
1900		bootstrap = 1;
1901	}
1902
1903	if (pvo == NULL) {
1904		mtx_unlock(&moea_table_mutex);
1905		return (ENOMEM);
1906	}
1907
1908	moea_pvo_entries++;
1909	pvo->pvo_vaddr = va;
1910	pvo->pvo_pmap = pm;
1911	LIST_INSERT_HEAD(&moea_pvo_table[ptegidx], pvo, pvo_olink);
1912	pvo->pvo_vaddr &= ~ADDR_POFF;
1913	if (flags & VM_PROT_EXECUTE)
1914		pvo->pvo_vaddr |= PVO_EXECUTABLE;
1915	if (flags & PVO_WIRED)
1916		pvo->pvo_vaddr |= PVO_WIRED;
1917	if (pvo_head != &moea_pvo_kunmanaged)
1918		pvo->pvo_vaddr |= PVO_MANAGED;
1919	if (bootstrap)
1920		pvo->pvo_vaddr |= PVO_BOOTSTRAP;
1921	if (flags & PVO_FAKE)
1922		pvo->pvo_vaddr |= PVO_FAKE;
1923
1924	moea_pte_create(&pvo->pvo_pte.pte, sr, va, pa | pte_lo);
1925
1926	/*
1927	 * Remember if the list was empty and therefore will be the first
1928	 * item.
1929	 */
1930	if (LIST_FIRST(pvo_head) == NULL)
1931		first = 1;
1932	LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
1933
1934	if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED)
1935		pm->pm_stats.wired_count++;
1936	pm->pm_stats.resident_count++;
1937
1938	/*
1939	 * We hope this succeeds but it isn't required.
1940	 */
1941	i = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte);
1942	if (i >= 0) {
1943		PVO_PTEGIDX_SET(pvo, i);
1944	} else {
1945		panic("moea_pvo_enter: overflow");
1946		moea_pte_overflow++;
1947	}
1948	mtx_unlock(&moea_table_mutex);
1949
1950	return (first ? ENOENT : 0);
1951}
1952
1953static void
1954moea_pvo_remove(struct pvo_entry *pvo, int pteidx)
1955{
1956	struct	pte *pt;
1957
1958	/*
1959	 * If there is an active pte entry, we need to deactivate it (and
1960	 * save the ref & cfg bits).
1961	 */
1962	pt = moea_pvo_to_pte(pvo, pteidx);
1963	if (pt != NULL) {
1964		moea_pte_unset(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr);
1965		mtx_unlock(&moea_table_mutex);
1966		PVO_PTEGIDX_CLR(pvo);
1967	} else {
1968		moea_pte_overflow--;
1969	}
1970
1971	/*
1972	 * Update our statistics.
1973	 */
1974	pvo->pvo_pmap->pm_stats.resident_count--;
1975	if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED)
1976		pvo->pvo_pmap->pm_stats.wired_count--;
1977
1978	/*
1979	 * Save the REF/CHG bits into their cache if the page is managed.
1980	 */
1981	if ((pvo->pvo_vaddr & (PVO_MANAGED|PVO_FAKE)) == PVO_MANAGED) {
1982		struct	vm_page *pg;
1983
1984		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
1985		if (pg != NULL) {
1986			moea_attr_save(pg, pvo->pvo_pte.pte.pte_lo &
1987			    (PTE_REF | PTE_CHG));
1988		}
1989	}
1990
1991	/*
1992	 * Remove this PVO from the PV list.
1993	 */
1994	LIST_REMOVE(pvo, pvo_vlink);
1995
1996	/*
1997	 * Remove this from the overflow list and return it to the pool
1998	 * if we aren't going to reuse it.
1999	 */
2000	LIST_REMOVE(pvo, pvo_olink);
2001	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
2002		uma_zfree(pvo->pvo_vaddr & PVO_MANAGED ? moea_mpvo_zone :
2003		    moea_upvo_zone, pvo);
2004	moea_pvo_entries--;
2005	moea_pvo_remove_calls++;
2006}
2007
2008static __inline int
2009moea_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx)
2010{
2011	int	pteidx;
2012
2013	/*
2014	 * We can find the actual pte entry without searching by grabbing
2015	 * the PTEG index from 3 unused bits in pte_lo[11:9] and by
2016	 * noticing the HID bit.
2017	 */
2018	pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo);
2019	if (pvo->pvo_pte.pte.pte_hi & PTE_HID)
2020		pteidx ^= moea_pteg_mask * 8;
2021
2022	return (pteidx);
2023}
2024
2025static struct pvo_entry *
2026moea_pvo_find_va(pmap_t pm, vm_offset_t va, int *pteidx_p)
2027{
2028	struct	pvo_entry *pvo;
2029	int	ptegidx;
2030	u_int	sr;
2031
2032	va &= ~ADDR_POFF;
2033	sr = va_to_sr(pm->pm_sr, va);
2034	ptegidx = va_to_pteg(sr, va);
2035
2036	mtx_lock(&moea_table_mutex);
2037	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
2038		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
2039			if (pteidx_p)
2040				*pteidx_p = moea_pvo_pte_index(pvo, ptegidx);
2041			break;
2042		}
2043	}
2044	mtx_unlock(&moea_table_mutex);
2045
2046	return (pvo);
2047}
2048
2049static struct pte *
2050moea_pvo_to_pte(const struct pvo_entry *pvo, int pteidx)
2051{
2052	struct	pte *pt;
2053
2054	/*
2055	 * If we haven't been supplied the ptegidx, calculate it.
2056	 */
2057	if (pteidx == -1) {
2058		int	ptegidx;
2059		u_int	sr;
2060
2061		sr = va_to_sr(pvo->pvo_pmap->pm_sr, pvo->pvo_vaddr);
2062		ptegidx = va_to_pteg(sr, pvo->pvo_vaddr);
2063		pteidx = moea_pvo_pte_index(pvo, ptegidx);
2064	}
2065
2066	pt = &moea_pteg_table[pteidx >> 3].pt[pteidx & 7];
2067	mtx_lock(&moea_table_mutex);
2068
2069	if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) {
2070		panic("moea_pvo_to_pte: pvo %p has valid pte in pvo but no "
2071		    "valid pte index", pvo);
2072	}
2073
2074	if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) {
2075		panic("moea_pvo_to_pte: pvo %p has valid pte index in pvo "
2076		    "pvo but no valid pte", pvo);
2077	}
2078
2079	if ((pt->pte_hi ^ (pvo->pvo_pte.pte.pte_hi & ~PTE_VALID)) == PTE_VALID) {
2080		if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0) {
2081			panic("moea_pvo_to_pte: pvo %p has valid pte in "
2082			    "moea_pteg_table %p but invalid in pvo", pvo, pt);
2083		}
2084
2085		if (((pt->pte_lo ^ pvo->pvo_pte.pte.pte_lo) & ~(PTE_CHG|PTE_REF))
2086		    != 0) {
2087			panic("moea_pvo_to_pte: pvo %p pte does not match "
2088			    "pte %p in moea_pteg_table", pvo, pt);
2089		}
2090
2091		mtx_assert(&moea_table_mutex, MA_OWNED);
2092		return (pt);
2093	}
2094
2095	if (pvo->pvo_pte.pte.pte_hi & PTE_VALID) {
2096		panic("moea_pvo_to_pte: pvo %p has invalid pte %p in "
2097		    "moea_pteg_table but valid in pvo", pvo, pt);
2098	}
2099
2100	mtx_unlock(&moea_table_mutex);
2101	return (NULL);
2102}
2103
2104/*
2105 * XXX: THIS STUFF SHOULD BE IN pte.c?
2106 */
2107int
2108moea_pte_spill(vm_offset_t addr)
2109{
2110	struct	pvo_entry *source_pvo, *victim_pvo;
2111	struct	pvo_entry *pvo;
2112	int	ptegidx, i, j;
2113	u_int	sr;
2114	struct	pteg *pteg;
2115	struct	pte *pt;
2116
2117	moea_pte_spills++;
2118
2119	sr = mfsrin(addr);
2120	ptegidx = va_to_pteg(sr, addr);
2121
2122	/*
2123	 * Have to substitute some entry.  Use the primary hash for this.
2124	 * Use low bits of timebase as random generator.
2125	 */
2126	pteg = &moea_pteg_table[ptegidx];
2127	mtx_lock(&moea_table_mutex);
2128	__asm __volatile("mftb %0" : "=r"(i));
2129	i &= 7;
2130	pt = &pteg->pt[i];
2131
2132	source_pvo = NULL;
2133	victim_pvo = NULL;
2134	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
2135		/*
2136		 * We need to find a pvo entry for this address.
2137		 */
2138		MOEA_PVO_CHECK(pvo);
2139		if (source_pvo == NULL &&
2140		    moea_pte_match(&pvo->pvo_pte.pte, sr, addr,
2141		    pvo->pvo_pte.pte.pte_hi & PTE_HID)) {
2142			/*
2143			 * Now found an entry to be spilled into the pteg.
2144			 * The PTE is now valid, so we know it's active.
2145			 */
2146			j = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte);
2147
2148			if (j >= 0) {
2149				PVO_PTEGIDX_SET(pvo, j);
2150				moea_pte_overflow--;
2151				MOEA_PVO_CHECK(pvo);
2152				mtx_unlock(&moea_table_mutex);
2153				return (1);
2154			}
2155
2156			source_pvo = pvo;
2157
2158			if (victim_pvo != NULL)
2159				break;
2160		}
2161
2162		/*
2163		 * We also need the pvo entry of the victim we are replacing
2164		 * so save the R & C bits of the PTE.
2165		 */
2166		if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL &&
2167		    moea_pte_compare(pt, &pvo->pvo_pte.pte)) {
2168			victim_pvo = pvo;
2169			if (source_pvo != NULL)
2170				break;
2171		}
2172	}
2173
2174	if (source_pvo == NULL) {
2175		mtx_unlock(&moea_table_mutex);
2176		return (0);
2177	}
2178
2179	if (victim_pvo == NULL) {
2180		if ((pt->pte_hi & PTE_HID) == 0)
2181			panic("moea_pte_spill: victim p-pte (%p) has no pvo"
2182			    "entry", pt);
2183
2184		/*
2185		 * If this is a secondary PTE, we need to search it's primary
2186		 * pvo bucket for the matching PVO.
2187		 */
2188		LIST_FOREACH(pvo, &moea_pvo_table[ptegidx ^ moea_pteg_mask],
2189		    pvo_olink) {
2190			MOEA_PVO_CHECK(pvo);
2191			/*
2192			 * We also need the pvo entry of the victim we are
2193			 * replacing so save the R & C bits of the PTE.
2194			 */
2195			if (moea_pte_compare(pt, &pvo->pvo_pte.pte)) {
2196				victim_pvo = pvo;
2197				break;
2198			}
2199		}
2200
2201		if (victim_pvo == NULL)
2202			panic("moea_pte_spill: victim s-pte (%p) has no pvo"
2203			    "entry", pt);
2204	}
2205
2206	/*
2207	 * We are invalidating the TLB entry for the EA we are replacing even
2208	 * though it's valid.  If we don't, we lose any ref/chg bit changes
2209	 * contained in the TLB entry.
2210	 */
2211	source_pvo->pvo_pte.pte.pte_hi &= ~PTE_HID;
2212
2213	moea_pte_unset(pt, &victim_pvo->pvo_pte.pte, victim_pvo->pvo_vaddr);
2214	moea_pte_set(pt, &source_pvo->pvo_pte.pte);
2215
2216	PVO_PTEGIDX_CLR(victim_pvo);
2217	PVO_PTEGIDX_SET(source_pvo, i);
2218	moea_pte_replacements++;
2219
2220	MOEA_PVO_CHECK(victim_pvo);
2221	MOEA_PVO_CHECK(source_pvo);
2222
2223	mtx_unlock(&moea_table_mutex);
2224	return (1);
2225}
2226
2227static int
2228moea_pte_insert(u_int ptegidx, struct pte *pvo_pt)
2229{
2230	struct	pte *pt;
2231	int	i;
2232
2233	mtx_assert(&moea_table_mutex, MA_OWNED);
2234
2235	/*
2236	 * First try primary hash.
2237	 */
2238	for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
2239		if ((pt->pte_hi & PTE_VALID) == 0) {
2240			pvo_pt->pte_hi &= ~PTE_HID;
2241			moea_pte_set(pt, pvo_pt);
2242			return (i);
2243		}
2244	}
2245
2246	/*
2247	 * Now try secondary hash.
2248	 */
2249	ptegidx ^= moea_pteg_mask;
2250
2251	for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
2252		if ((pt->pte_hi & PTE_VALID) == 0) {
2253			pvo_pt->pte_hi |= PTE_HID;
2254			moea_pte_set(pt, pvo_pt);
2255			return (i);
2256		}
2257	}
2258
2259	panic("moea_pte_insert: overflow");
2260	return (-1);
2261}
2262
2263static boolean_t
2264moea_query_bit(vm_page_t m, int ptebit)
2265{
2266	struct	pvo_entry *pvo;
2267	struct	pte *pt;
2268
2269	if (moea_attr_fetch(m) & ptebit)
2270		return (TRUE);
2271
2272	vm_page_lock_queues();
2273	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2274		MOEA_PVO_CHECK(pvo);	/* sanity check */
2275
2276		/*
2277		 * See if we saved the bit off.  If so, cache it and return
2278		 * success.
2279		 */
2280		if (pvo->pvo_pte.pte.pte_lo & ptebit) {
2281			moea_attr_save(m, ptebit);
2282			MOEA_PVO_CHECK(pvo);	/* sanity check */
2283			vm_page_unlock_queues();
2284			return (TRUE);
2285		}
2286	}
2287
2288	/*
2289	 * No luck, now go through the hard part of looking at the PTEs
2290	 * themselves.  Sync so that any pending REF/CHG bits are flushed to
2291	 * the PTEs.
2292	 */
2293	powerpc_sync();
2294	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2295		MOEA_PVO_CHECK(pvo);	/* sanity check */
2296
2297		/*
2298		 * See if this pvo has a valid PTE.  if so, fetch the
2299		 * REF/CHG bits from the valid PTE.  If the appropriate
2300		 * ptebit is set, cache it and return success.
2301		 */
2302		pt = moea_pvo_to_pte(pvo, -1);
2303		if (pt != NULL) {
2304			moea_pte_synch(pt, &pvo->pvo_pte.pte);
2305			mtx_unlock(&moea_table_mutex);
2306			if (pvo->pvo_pte.pte.pte_lo & ptebit) {
2307				moea_attr_save(m, ptebit);
2308				MOEA_PVO_CHECK(pvo);	/* sanity check */
2309				vm_page_unlock_queues();
2310				return (TRUE);
2311			}
2312		}
2313	}
2314
2315	vm_page_unlock_queues();
2316	return (FALSE);
2317}
2318
2319static u_int
2320moea_clear_bit(vm_page_t m, int ptebit)
2321{
2322	u_int	count;
2323	struct	pvo_entry *pvo;
2324	struct	pte *pt;
2325
2326	vm_page_lock_queues();
2327
2328	/*
2329	 * Clear the cached value.
2330	 */
2331	moea_attr_clear(m, ptebit);
2332
2333	/*
2334	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
2335	 * we can reset the right ones).  note that since the pvo entries and
2336	 * list heads are accessed via BAT0 and are never placed in the page
2337	 * table, we don't have to worry about further accesses setting the
2338	 * REF/CHG bits.
2339	 */
2340	powerpc_sync();
2341
2342	/*
2343	 * For each pvo entry, clear the pvo's ptebit.  If this pvo has a
2344	 * valid pte clear the ptebit from the valid pte.
2345	 */
2346	count = 0;
2347	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2348		MOEA_PVO_CHECK(pvo);	/* sanity check */
2349		pt = moea_pvo_to_pte(pvo, -1);
2350		if (pt != NULL) {
2351			moea_pte_synch(pt, &pvo->pvo_pte.pte);
2352			if (pvo->pvo_pte.pte.pte_lo & ptebit) {
2353				count++;
2354				moea_pte_clear(pt, PVO_VADDR(pvo), ptebit);
2355			}
2356			mtx_unlock(&moea_table_mutex);
2357		}
2358		pvo->pvo_pte.pte.pte_lo &= ~ptebit;
2359		MOEA_PVO_CHECK(pvo);	/* sanity check */
2360	}
2361
2362	vm_page_unlock_queues();
2363	return (count);
2364}
2365
2366/*
2367 * Return true if the physical range is encompassed by the battable[idx]
2368 */
2369static int
2370moea_bat_mapped(int idx, vm_offset_t pa, vm_size_t size)
2371{
2372	u_int prot;
2373	u_int32_t start;
2374	u_int32_t end;
2375	u_int32_t bat_ble;
2376
2377	/*
2378	 * Return immediately if not a valid mapping
2379	 */
2380	if (!battable[idx].batu & BAT_Vs)
2381		return (EINVAL);
2382
2383	/*
2384	 * The BAT entry must be cache-inhibited, guarded, and r/w
2385	 * so it can function as an i/o page
2386	 */
2387	prot = battable[idx].batl & (BAT_I|BAT_G|BAT_PP_RW);
2388	if (prot != (BAT_I|BAT_G|BAT_PP_RW))
2389		return (EPERM);
2390
2391	/*
2392	 * The address should be within the BAT range. Assume that the
2393	 * start address in the BAT has the correct alignment (thus
2394	 * not requiring masking)
2395	 */
2396	start = battable[idx].batl & BAT_PBS;
2397	bat_ble = (battable[idx].batu & ~(BAT_EBS)) | 0x03;
2398	end = start | (bat_ble << 15) | 0x7fff;
2399
2400	if ((pa < start) || ((pa + size) > end))
2401		return (ERANGE);
2402
2403	return (0);
2404}
2405
2406boolean_t
2407moea_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
2408{
2409	int i;
2410
2411	/*
2412	 * This currently does not work for entries that
2413	 * overlap 256M BAT segments.
2414	 */
2415
2416	for(i = 0; i < 16; i++)
2417		if (moea_bat_mapped(i, pa, size) == 0)
2418			return (0);
2419
2420	return (EFAULT);
2421}
2422
2423/*
2424 * Map a set of physical memory pages into the kernel virtual
2425 * address space. Return a pointer to where it is mapped. This
2426 * routine is intended to be used for mapping device memory,
2427 * NOT real memory.
2428 */
2429void *
2430moea_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
2431{
2432	vm_offset_t va, tmpva, ppa, offset;
2433	int i;
2434
2435	ppa = trunc_page(pa);
2436	offset = pa & PAGE_MASK;
2437	size = roundup(offset + size, PAGE_SIZE);
2438
2439	GIANT_REQUIRED;
2440
2441	/*
2442	 * If the physical address lies within a valid BAT table entry,
2443	 * return the 1:1 mapping. This currently doesn't work
2444	 * for regions that overlap 256M BAT segments.
2445	 */
2446	for (i = 0; i < 16; i++) {
2447		if (moea_bat_mapped(i, pa, size) == 0)
2448			return ((void *) pa);
2449	}
2450
2451	va = kmem_alloc_nofault(kernel_map, size);
2452	if (!va)
2453		panic("moea_mapdev: Couldn't alloc kernel virtual memory");
2454
2455	for (tmpva = va; size > 0;) {
2456		moea_kenter(mmu, tmpva, ppa);
2457		tlbie(tmpva);
2458		size -= PAGE_SIZE;
2459		tmpva += PAGE_SIZE;
2460		ppa += PAGE_SIZE;
2461	}
2462
2463	return ((void *)(va + offset));
2464}
2465
2466void
2467moea_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
2468{
2469	vm_offset_t base, offset;
2470
2471	/*
2472	 * If this is outside kernel virtual space, then it's a
2473	 * battable entry and doesn't require unmapping
2474	 */
2475	if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= virtual_end)) {
2476		base = trunc_page(va);
2477		offset = va & PAGE_MASK;
2478		size = roundup(offset + size, PAGE_SIZE);
2479		kmem_free(kernel_map, base, size);
2480	}
2481}
2482
2483static void
2484moea_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
2485{
2486	struct pvo_entry *pvo;
2487	vm_offset_t lim;
2488	vm_paddr_t pa;
2489	vm_size_t len;
2490
2491	PMAP_LOCK(pm);
2492	while (sz > 0) {
2493		lim = round_page(va);
2494		len = MIN(lim - va, sz);
2495		pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
2496		if (pvo != NULL) {
2497			pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) |
2498			    (va & ADDR_POFF);
2499			moea_syncicache(pa, len);
2500		}
2501		va += len;
2502		sz -= len;
2503	}
2504	PMAP_UNLOCK(pm);
2505}
2506