pmap.c revision 187149
1/*-
2 * Copyright (C) 2007-2008 Semihalf, Rafal Jaworowski <raj@semihalf.com>
3 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Some hw specific parts of this pmap were derived or influenced
29 * by NetBSD's ibm4xx pmap module. More generic code is shared with
30 * a few other pmap modules from the FreeBSD tree.
31 */
32
33 /*
34  * VM layout notes:
35  *
36  * Kernel and user threads run within one common virtual address space
37  * defined by AS=0.
38  *
39  * Virtual address space layout:
40  * -----------------------------
41  * 0x0000_0000 - 0xbfff_efff	: user process
42  * 0xc000_0000 - 0xc1ff_ffff	: kernel reserved
43  *   0xc000_0000 - kernelend	: kernel code &data
44  *   0xc1ff_c000 - 0xc200_0000	: kstack0
45  * 0xc200_0000 - 0xffef_ffff	: KVA
46  *   0xc200_0000 - 0xc200_3fff : reserved for page zero/copy
47  *   0xc200_4000 - ptbl buf end: reserved for ptbl bufs
48  *   ptbl buf end- 0xffef_ffff	: actual free KVA space
49  * 0xfff0_0000 - 0xffff_ffff	: I/O devices region
50  */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD: head/sys/powerpc/booke/pmap.c 187149 2009-01-13 15:41:58Z raj $");
54
55#include <sys/types.h>
56#include <sys/param.h>
57#include <sys/malloc.h>
58#include <sys/ktr.h>
59#include <sys/proc.h>
60#include <sys/user.h>
61#include <sys/queue.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/msgbuf.h>
65#include <sys/lock.h>
66#include <sys/mutex.h>
67#include <sys/vmmeter.h>
68
69#include <vm/vm.h>
70#include <vm/vm_page.h>
71#include <vm/vm_kern.h>
72#include <vm/vm_pageout.h>
73#include <vm/vm_extern.h>
74#include <vm/vm_object.h>
75#include <vm/vm_param.h>
76#include <vm/vm_map.h>
77#include <vm/vm_pager.h>
78#include <vm/uma.h>
79
80#include <machine/cpu.h>
81#include <machine/pcb.h>
82#include <machine/powerpc.h>
83
84#include <machine/tlb.h>
85#include <machine/spr.h>
86#include <machine/vmparam.h>
87#include <machine/md_var.h>
88#include <machine/mmuvar.h>
89#include <machine/pmap.h>
90#include <machine/pte.h>
91
92#include "mmu_if.h"
93
94#define DEBUG
95#undef DEBUG
96
97#ifdef  DEBUG
98#define debugf(fmt, args...) printf(fmt, ##args)
99#else
100#define debugf(fmt, args...)
101#endif
102
103#define TODO			panic("%s: not implemented", __func__);
104#define memmove(d, s, l)	bcopy(s, d, l)
105
106#include "opt_sched.h"
107#ifndef SCHED_4BSD
108#error "e500 only works with SCHED_4BSD which uses a global scheduler lock."
109#endif
110extern struct mtx sched_lock;
111
112/* Kernel physical load address. */
113extern uint32_t kernload;
114
115struct mem_region availmem_regions[MEM_REGIONS];
116int availmem_regions_sz;
117
118/* Reserved KVA space and mutex for mmu_booke_zero_page. */
119static vm_offset_t zero_page_va;
120static struct mtx zero_page_mutex;
121
122static struct mtx tlbivax_mutex;
123
124/*
125 * Reserved KVA space for mmu_booke_zero_page_idle. This is used
126 * by idle thred only, no lock required.
127 */
128static vm_offset_t zero_page_idle_va;
129
130/* Reserved KVA space and mutex for mmu_booke_copy_page. */
131static vm_offset_t copy_page_src_va;
132static vm_offset_t copy_page_dst_va;
133static struct mtx copy_page_mutex;
134
135/**************************************************************************/
136/* PMAP */
137/**************************************************************************/
138
139static void mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t,
140    vm_prot_t, boolean_t);
141
142unsigned int kptbl_min;		/* Index of the first kernel ptbl. */
143unsigned int kernel_ptbls;	/* Number of KVA ptbls. */
144
145static int pagedaemon_waken;
146
147/*
148 * If user pmap is processed with mmu_booke_remove and the resident count
149 * drops to 0, there are no more pages to remove, so we need not continue.
150 */
151#define PMAP_REMOVE_DONE(pmap) \
152	((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
153
154extern void tlb_lock(uint32_t *);
155extern void tlb_unlock(uint32_t *);
156extern void tid_flush(tlbtid_t);
157
158/**************************************************************************/
159/* TLB and TID handling */
160/**************************************************************************/
161
162/* Translation ID busy table */
163static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1];
164
165/*
166 * TLB0 capabilities (entry, way numbers etc.). These can vary between e500
167 * core revisions and should be read from h/w registers during early config.
168 */
169uint32_t tlb0_entries;
170uint32_t tlb0_ways;
171uint32_t tlb0_entries_per_way;
172
173#define TLB0_ENTRIES		(tlb0_entries)
174#define TLB0_WAYS		(tlb0_ways)
175#define TLB0_ENTRIES_PER_WAY	(tlb0_entries_per_way)
176
177#define TLB1_ENTRIES 16
178
179/* In-ram copy of the TLB1 */
180static tlb_entry_t tlb1[TLB1_ENTRIES];
181
182/* Next free entry in the TLB1 */
183static unsigned int tlb1_idx;
184
185static tlbtid_t tid_alloc(struct pmap *);
186
187static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
188
189static int tlb1_set_entry(vm_offset_t, vm_offset_t, vm_size_t, uint32_t);
190static void tlb1_write_entry(unsigned int);
191static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
192static vm_size_t tlb1_mapin_region(vm_offset_t, vm_offset_t, vm_size_t);
193
194static vm_size_t tsize2size(unsigned int);
195static unsigned int size2tsize(vm_size_t);
196static unsigned int ilog2(unsigned int);
197
198static void set_mas4_defaults(void);
199
200static inline void tlb0_flush_entry(vm_offset_t);
201static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int);
202
203/**************************************************************************/
204/* Page table management */
205/**************************************************************************/
206
207/* Data for the pv entry allocation mechanism */
208static uma_zone_t pvzone;
209static struct vm_object pvzone_obj;
210static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
211
212#define PV_ENTRY_ZONE_MIN	2048	/* min pv entries in uma zone */
213
214#ifndef PMAP_SHPGPERPROC
215#define PMAP_SHPGPERPROC	200
216#endif
217
218static void ptbl_init(void);
219static struct ptbl_buf *ptbl_buf_alloc(void);
220static void ptbl_buf_free(struct ptbl_buf *);
221static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
222
223static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int);
224static void ptbl_free(mmu_t, pmap_t, unsigned int);
225static void ptbl_hold(mmu_t, pmap_t, unsigned int);
226static int ptbl_unhold(mmu_t, pmap_t, unsigned int);
227
228static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t);
229static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
230static void pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t);
231static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t);
232
233static pv_entry_t pv_alloc(void);
234static void pv_free(pv_entry_t);
235static void pv_insert(pmap_t, vm_offset_t, vm_page_t);
236static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
237
238/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
239#define PTBL_BUFS		(128 * 16)
240
241struct ptbl_buf {
242	TAILQ_ENTRY(ptbl_buf) link;	/* list link */
243	vm_offset_t kva;		/* va of mapping */
244};
245
246/* ptbl free list and a lock used for access synchronization. */
247static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
248static struct mtx ptbl_buf_freelist_lock;
249
250/* Base address of kva space allocated fot ptbl bufs. */
251static vm_offset_t ptbl_buf_pool_vabase;
252
253/* Pointer to ptbl_buf structures. */
254static struct ptbl_buf *ptbl_bufs;
255
256/*
257 * Kernel MMU interface
258 */
259static void		mmu_booke_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
260static void		mmu_booke_clear_modify(mmu_t, vm_page_t);
261static void		mmu_booke_clear_reference(mmu_t, vm_page_t);
262static void		mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t,
263    vm_offset_t);
264static void		mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t);
265static void		mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t,
266    vm_prot_t, boolean_t);
267static void		mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
268    vm_page_t, vm_prot_t);
269static void		mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t,
270    vm_prot_t);
271static vm_paddr_t	mmu_booke_extract(mmu_t, pmap_t, vm_offset_t);
272static vm_page_t	mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t,
273    vm_prot_t);
274static void		mmu_booke_init(mmu_t);
275static boolean_t	mmu_booke_is_modified(mmu_t, vm_page_t);
276static boolean_t	mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
277static boolean_t	mmu_booke_ts_referenced(mmu_t, vm_page_t);
278static vm_offset_t	mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t,
279    int);
280static int		mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t);
281static void		mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t,
282    vm_object_t, vm_pindex_t, vm_size_t);
283static boolean_t	mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t);
284static void		mmu_booke_page_init(mmu_t, vm_page_t);
285static int		mmu_booke_page_wired_mappings(mmu_t, vm_page_t);
286static void		mmu_booke_pinit(mmu_t, pmap_t);
287static void		mmu_booke_pinit0(mmu_t, pmap_t);
288static void		mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
289    vm_prot_t);
290static void		mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
291static void		mmu_booke_qremove(mmu_t, vm_offset_t, int);
292static void		mmu_booke_release(mmu_t, pmap_t);
293static void		mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
294static void		mmu_booke_remove_all(mmu_t, vm_page_t);
295static void		mmu_booke_remove_write(mmu_t, vm_page_t);
296static void		mmu_booke_zero_page(mmu_t, vm_page_t);
297static void		mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int);
298static void		mmu_booke_zero_page_idle(mmu_t, vm_page_t);
299static void		mmu_booke_activate(mmu_t, struct thread *);
300static void		mmu_booke_deactivate(mmu_t, struct thread *);
301static void		mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
302static void		*mmu_booke_mapdev(mmu_t, vm_offset_t, vm_size_t);
303static void		mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t);
304static vm_offset_t	mmu_booke_kextract(mmu_t, vm_offset_t);
305static void		mmu_booke_kenter(mmu_t, vm_offset_t, vm_offset_t);
306static void		mmu_booke_kremove(mmu_t, vm_offset_t);
307static boolean_t	mmu_booke_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
308static boolean_t	mmu_booke_page_executable(mmu_t, vm_page_t);
309
310static mmu_method_t mmu_booke_methods[] = {
311	/* pmap dispatcher interface */
312	MMUMETHOD(mmu_change_wiring,	mmu_booke_change_wiring),
313	MMUMETHOD(mmu_clear_modify,	mmu_booke_clear_modify),
314	MMUMETHOD(mmu_clear_reference,	mmu_booke_clear_reference),
315	MMUMETHOD(mmu_copy,		mmu_booke_copy),
316	MMUMETHOD(mmu_copy_page,	mmu_booke_copy_page),
317	MMUMETHOD(mmu_enter,		mmu_booke_enter),
318	MMUMETHOD(mmu_enter_object,	mmu_booke_enter_object),
319	MMUMETHOD(mmu_enter_quick,	mmu_booke_enter_quick),
320	MMUMETHOD(mmu_extract,		mmu_booke_extract),
321	MMUMETHOD(mmu_extract_and_hold,	mmu_booke_extract_and_hold),
322	MMUMETHOD(mmu_init,		mmu_booke_init),
323	MMUMETHOD(mmu_is_modified,	mmu_booke_is_modified),
324	MMUMETHOD(mmu_is_prefaultable,	mmu_booke_is_prefaultable),
325	MMUMETHOD(mmu_ts_referenced,	mmu_booke_ts_referenced),
326	MMUMETHOD(mmu_map,		mmu_booke_map),
327	MMUMETHOD(mmu_mincore,		mmu_booke_mincore),
328	MMUMETHOD(mmu_object_init_pt,	mmu_booke_object_init_pt),
329	MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick),
330	MMUMETHOD(mmu_page_init,	mmu_booke_page_init),
331	MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings),
332	MMUMETHOD(mmu_pinit,		mmu_booke_pinit),
333	MMUMETHOD(mmu_pinit0,		mmu_booke_pinit0),
334	MMUMETHOD(mmu_protect,		mmu_booke_protect),
335	MMUMETHOD(mmu_qenter,		mmu_booke_qenter),
336	MMUMETHOD(mmu_qremove,		mmu_booke_qremove),
337	MMUMETHOD(mmu_release,		mmu_booke_release),
338	MMUMETHOD(mmu_remove,		mmu_booke_remove),
339	MMUMETHOD(mmu_remove_all,	mmu_booke_remove_all),
340	MMUMETHOD(mmu_remove_write,	mmu_booke_remove_write),
341	MMUMETHOD(mmu_zero_page,	mmu_booke_zero_page),
342	MMUMETHOD(mmu_zero_page_area,	mmu_booke_zero_page_area),
343	MMUMETHOD(mmu_zero_page_idle,	mmu_booke_zero_page_idle),
344	MMUMETHOD(mmu_activate,		mmu_booke_activate),
345	MMUMETHOD(mmu_deactivate,	mmu_booke_deactivate),
346
347	/* Internal interfaces */
348	MMUMETHOD(mmu_bootstrap,	mmu_booke_bootstrap),
349	MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped),
350	MMUMETHOD(mmu_mapdev,		mmu_booke_mapdev),
351	MMUMETHOD(mmu_kenter,		mmu_booke_kenter),
352	MMUMETHOD(mmu_kextract,		mmu_booke_kextract),
353/*	MMUMETHOD(mmu_kremove,		mmu_booke_kremove),	*/
354	MMUMETHOD(mmu_page_executable,	mmu_booke_page_executable),
355	MMUMETHOD(mmu_unmapdev,		mmu_booke_unmapdev),
356
357	{ 0, 0 }
358};
359
360static mmu_def_t booke_mmu = {
361	MMU_TYPE_BOOKE,
362	mmu_booke_methods,
363	0
364};
365MMU_DEF(booke_mmu);
366
367/* Return number of entries in TLB0. */
368static __inline void
369tlb0_get_tlbconf(void)
370{
371	uint32_t tlb0_cfg;
372
373	tlb0_cfg = mfspr(SPR_TLB0CFG);
374	tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK;
375	tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT;
376	tlb0_entries_per_way = tlb0_entries / tlb0_ways;
377}
378
379/* Initialize pool of kva ptbl buffers. */
380static void
381ptbl_init(void)
382{
383	int i;
384
385	//debugf("ptbl_init: s (ptbl_bufs = 0x%08x size 0x%08x)\n",
386	//		(u_int32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS);
387	//debugf("ptbl_init: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)\n",
388	//		ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE);
389
390	mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
391	TAILQ_INIT(&ptbl_buf_freelist);
392
393	for (i = 0; i < PTBL_BUFS; i++) {
394		ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
395		TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
396	}
397
398	//debugf("ptbl_init: e\n");
399}
400
401/* Get a ptbl_buf from the freelist. */
402static struct ptbl_buf *
403ptbl_buf_alloc(void)
404{
405	struct ptbl_buf *buf;
406
407	//debugf("ptbl_buf_alloc: s\n");
408
409	mtx_lock(&ptbl_buf_freelist_lock);
410	buf = TAILQ_FIRST(&ptbl_buf_freelist);
411	if (buf != NULL)
412		TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
413	mtx_unlock(&ptbl_buf_freelist_lock);
414
415	//debugf("ptbl_buf_alloc: e (buf = 0x%08x)\n", (u_int32_t)buf);
416	return (buf);
417}
418
419/* Return ptbl buff to free pool. */
420static void
421ptbl_buf_free(struct ptbl_buf *buf)
422{
423
424	CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
425
426	mtx_lock(&ptbl_buf_freelist_lock);
427	TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
428	mtx_unlock(&ptbl_buf_freelist_lock);
429}
430
431/*
432 * Search the list of allocated ptbl bufs and find on list of allocated ptbls
433 */
434static void
435ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl)
436{
437	struct ptbl_buf *pbuf;
438
439	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
440
441	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
442
443	TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link)
444		if (pbuf->kva == (vm_offset_t)ptbl) {
445			/* Remove from pmap ptbl buf list. */
446			TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
447
448			/* Free corresponding ptbl buf. */
449			ptbl_buf_free(pbuf);
450			break;
451		}
452}
453
454/* Allocate page table. */
455static pte_t *
456ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
457{
458	vm_page_t mtbl[PTBL_PAGES];
459	vm_page_t m;
460	struct ptbl_buf *pbuf;
461	unsigned int pidx;
462	pte_t *ptbl;
463	int i;
464
465	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
466	    (pmap == kernel_pmap), pdir_idx);
467
468	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
469	    ("ptbl_alloc: invalid pdir_idx"));
470	KASSERT((pmap->pm_pdir[pdir_idx] == NULL),
471	    ("pte_alloc: valid ptbl entry exists!"));
472
473	pbuf = ptbl_buf_alloc();
474	if (pbuf == NULL)
475		panic("pte_alloc: couldn't alloc kernel virtual memory");
476
477	ptbl = (pte_t *)pbuf->kva;
478
479	CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
480
481	/* Allocate ptbl pages, this will sleep! */
482	for (i = 0; i < PTBL_PAGES; i++) {
483		pidx = (PTBL_PAGES * pdir_idx) + i;
484		while ((m = vm_page_alloc(NULL, pidx,
485		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
486
487			PMAP_UNLOCK(pmap);
488			vm_page_unlock_queues();
489			VM_WAIT;
490			vm_page_lock_queues();
491			PMAP_LOCK(pmap);
492		}
493		mtbl[i] = m;
494	}
495
496	/* Map allocated pages into kernel_pmap. */
497	mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES);
498
499	/* Zero whole ptbl. */
500	bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE);
501
502	/* Add pbuf to the pmap ptbl bufs list. */
503	TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
504
505	return (ptbl);
506}
507
508/* Free ptbl pages and invalidate pdir entry. */
509static void
510ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
511{
512	pte_t *ptbl;
513	vm_paddr_t pa;
514	vm_offset_t va;
515	vm_page_t m;
516	int i;
517
518	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
519	    (pmap == kernel_pmap), pdir_idx);
520
521	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
522	    ("ptbl_free: invalid pdir_idx"));
523
524	ptbl = pmap->pm_pdir[pdir_idx];
525
526	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
527
528	KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
529
530	/*
531	 * Invalidate the pdir entry as soon as possible, so that other CPUs
532	 * don't attempt to look up the page tables we are releasing.
533	 */
534	mtx_lock_spin(&tlbivax_mutex);
535
536	pmap->pm_pdir[pdir_idx] = NULL;
537
538	mtx_unlock_spin(&tlbivax_mutex);
539
540	for (i = 0; i < PTBL_PAGES; i++) {
541		va = ((vm_offset_t)ptbl + (i * PAGE_SIZE));
542		pa = pte_vatopa(mmu, kernel_pmap, va);
543		m = PHYS_TO_VM_PAGE(pa);
544		vm_page_free_zero(m);
545		atomic_subtract_int(&cnt.v_wire_count, 1);
546		mmu_booke_kremove(mmu, va);
547	}
548
549	ptbl_free_pmap_ptbl(pmap, ptbl);
550}
551
552/*
553 * Decrement ptbl pages hold count and attempt to free ptbl pages.
554 * Called when removing pte entry from ptbl.
555 *
556 * Return 1 if ptbl pages were freed.
557 */
558static int
559ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
560{
561	pte_t *ptbl;
562	vm_paddr_t pa;
563	vm_page_t m;
564	int i;
565
566	//int su = (pmap == kernel_pmap);
567	//debugf("ptbl_unhold: s (pmap = %08x su = %d pdir_idx = %d)\n",
568	//		(u_int32_t)pmap, su, pdir_idx);
569
570	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
571	    ("ptbl_unhold: invalid pdir_idx"));
572	KASSERT((pmap != kernel_pmap),
573	    ("ptbl_unhold: unholding kernel ptbl!"));
574
575	ptbl = pmap->pm_pdir[pdir_idx];
576
577	//debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl);
578	KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS),
579	    ("ptbl_unhold: non kva ptbl"));
580
581	/* decrement hold count */
582	for (i = 0; i < PTBL_PAGES; i++) {
583		pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE));
584		m = PHYS_TO_VM_PAGE(pa);
585		m->wire_count--;
586	}
587
588	/*
589	 * Free ptbl pages if there are no pte etries in this ptbl.
590	 * wire_count has the same value for all ptbl pages, so check
591	 * the last page.
592	 */
593	if (m->wire_count == 0) {
594		ptbl_free(mmu, pmap, pdir_idx);
595
596		//debugf("ptbl_unhold: e (freed ptbl)\n");
597		return (1);
598	}
599
600	//debugf("ptbl_unhold: e\n");
601	return (0);
602}
603
604/*
605 * Increment hold count for ptbl pages. This routine is used when
606 * new pte entry is being inserted into ptbl.
607 */
608static void
609ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
610{
611	vm_paddr_t pa;
612	pte_t *ptbl;
613	vm_page_t m;
614	int i;
615
616	//debugf("ptbl_hold: s (pmap = 0x%08x pdir_idx = %d)\n", (u_int32_t)pmap, pdir_idx);
617
618	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
619	    ("ptbl_hold: invalid pdir_idx"));
620	KASSERT((pmap != kernel_pmap),
621	    ("ptbl_hold: holding kernel ptbl!"));
622
623	ptbl = pmap->pm_pdir[pdir_idx];
624
625	KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
626
627	for (i = 0; i < PTBL_PAGES; i++) {
628		pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE));
629		m = PHYS_TO_VM_PAGE(pa);
630		m->wire_count++;
631	}
632
633	//debugf("ptbl_hold: e\n");
634}
635
636/* Allocate pv_entry structure. */
637pv_entry_t
638pv_alloc(void)
639{
640	pv_entry_t pv;
641
642	debugf("pv_alloc: s\n");
643
644	pv_entry_count++;
645	if ((pv_entry_count > pv_entry_high_water) && (pagedaemon_waken == 0)) {
646		pagedaemon_waken = 1;
647		wakeup (&vm_pages_needed);
648	}
649	pv = uma_zalloc(pvzone, M_NOWAIT);
650
651	debugf("pv_alloc: e\n");
652	return (pv);
653}
654
655/* Free pv_entry structure. */
656static __inline void
657pv_free(pv_entry_t pve)
658{
659	//debugf("pv_free: s\n");
660
661	pv_entry_count--;
662	uma_zfree(pvzone, pve);
663
664	//debugf("pv_free: e\n");
665}
666
667
668/* Allocate and initialize pv_entry structure. */
669static void
670pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m)
671{
672	pv_entry_t pve;
673
674	//int su = (pmap == kernel_pmap);
675	//debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su,
676	//	(u_int32_t)pmap, va, (u_int32_t)m);
677
678	pve = pv_alloc();
679	if (pve == NULL)
680		panic("pv_insert: no pv entries!");
681
682	pve->pv_pmap = pmap;
683	pve->pv_va = va;
684
685	/* add to pv_list */
686	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
687	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
688
689	TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link);
690
691	//debugf("pv_insert: e\n");
692}
693
694/* Destroy pv entry. */
695static void
696pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
697{
698	pv_entry_t pve;
699
700	//int su = (pmap == kernel_pmap);
701	//debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va);
702
703	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
704	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
705
706	/* find pv entry */
707	TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) {
708		if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
709			/* remove from pv_list */
710			TAILQ_REMOVE(&m->md.pv_list, pve, pv_link);
711			if (TAILQ_EMPTY(&m->md.pv_list))
712				vm_page_flag_clear(m, PG_WRITEABLE);
713
714			/* free pv entry struct */
715			pv_free(pve);
716
717			break;
718		}
719	}
720
721	//debugf("pv_remove: e\n");
722}
723
724/*
725 * Clean pte entry, try to free page table page if requested.
726 *
727 * Return 1 if ptbl pages were freed, otherwise return 0.
728 */
729static int
730pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags)
731{
732	unsigned int pdir_idx = PDIR_IDX(va);
733	unsigned int ptbl_idx = PTBL_IDX(va);
734	vm_page_t m;
735	pte_t *ptbl;
736	pte_t *pte;
737
738	//int su = (pmap == kernel_pmap);
739	//debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n",
740	//		su, (u_int32_t)pmap, va, flags);
741
742	ptbl = pmap->pm_pdir[pdir_idx];
743	KASSERT(ptbl, ("pte_remove: null ptbl"));
744
745	pte = &ptbl[ptbl_idx];
746
747	if (pte == NULL || !PTE_ISVALID(pte))
748		return (0);
749
750	/* Get vm_page_t for mapped pte. */
751	m = PHYS_TO_VM_PAGE(PTE_PA(pte));
752
753	if (PTE_ISWIRED(pte))
754		pmap->pm_stats.wired_count--;
755
756	if (!PTE_ISFAKE(pte)) {
757		/* Handle managed entry. */
758		if (PTE_ISMANAGED(pte)) {
759
760			/* Handle modified pages. */
761			if (PTE_ISMODIFIED(pte))
762				vm_page_dirty(m);
763
764			/* Referenced pages. */
765			if (PTE_ISREFERENCED(pte))
766				vm_page_flag_set(m, PG_REFERENCED);
767
768			/* Remove pv_entry from pv_list. */
769			pv_remove(pmap, va, m);
770		}
771	}
772
773	mtx_lock_spin(&tlbivax_mutex);
774
775	tlb0_flush_entry(va);
776	pte->flags = 0;
777	pte->rpn = 0;
778
779	mtx_unlock_spin(&tlbivax_mutex);
780
781	pmap->pm_stats.resident_count--;
782
783	if (flags & PTBL_UNHOLD) {
784		//debugf("pte_remove: e (unhold)\n");
785		return (ptbl_unhold(mmu, pmap, pdir_idx));
786	}
787
788	//debugf("pte_remove: e\n");
789	return (0);
790}
791
792/*
793 * Insert PTE for a given page and virtual address.
794 */
795static void
796pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags)
797{
798	unsigned int pdir_idx = PDIR_IDX(va);
799	unsigned int ptbl_idx = PTBL_IDX(va);
800	pte_t *ptbl, *pte;
801
802	CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__,
803	    pmap == kernel_pmap, pmap, va);
804
805	/* Get the page table pointer. */
806	ptbl = pmap->pm_pdir[pdir_idx];
807
808	if (ptbl == NULL) {
809		/* Allocate page table pages. */
810		ptbl = ptbl_alloc(mmu, pmap, pdir_idx);
811	} else {
812		/*
813		 * Check if there is valid mapping for requested
814		 * va, if there is, remove it.
815		 */
816		pte = &pmap->pm_pdir[pdir_idx][ptbl_idx];
817		if (PTE_ISVALID(pte)) {
818			pte_remove(mmu, pmap, va, PTBL_HOLD);
819		} else {
820			/*
821			 * pte is not used, increment hold count
822			 * for ptbl pages.
823			 */
824			if (pmap != kernel_pmap)
825				ptbl_hold(mmu, pmap, pdir_idx);
826		}
827	}
828
829	/*
830	 * Insert pv_entry into pv_list for mapped page if part of managed
831	 * memory.
832	 */
833        if ((m->flags & PG_FICTITIOUS) == 0) {
834		if ((m->flags & PG_UNMANAGED) == 0) {
835			flags |= PTE_MANAGED;
836
837			/* Create and insert pv entry. */
838			pv_insert(pmap, va, m);
839		}
840        } else {
841		flags |= PTE_FAKE;
842	}
843
844	pmap->pm_stats.resident_count++;
845
846	mtx_lock_spin(&tlbivax_mutex);
847
848	tlb0_flush_entry(va);
849	if (pmap->pm_pdir[pdir_idx] == NULL) {
850		/*
851		 * If we just allocated a new page table, hook it in
852		 * the pdir.
853		 */
854		pmap->pm_pdir[pdir_idx] = ptbl;
855	}
856	pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]);
857	pte->rpn = VM_PAGE_TO_PHYS(m) & ~PTE_PA_MASK;
858	pte->flags |= (PTE_VALID | flags);
859
860	mtx_unlock_spin(&tlbivax_mutex);
861}
862
863/* Return the pa for the given pmap/va. */
864static vm_paddr_t
865pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va)
866{
867	vm_paddr_t pa = 0;
868	pte_t *pte;
869
870	pte = pte_find(mmu, pmap, va);
871	if ((pte != NULL) && PTE_ISVALID(pte))
872		pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
873	return (pa);
874}
875
876/* Get a pointer to a PTE in a page table. */
877static pte_t *
878pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
879{
880	unsigned int pdir_idx = PDIR_IDX(va);
881	unsigned int ptbl_idx = PTBL_IDX(va);
882
883	KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
884
885	if (pmap->pm_pdir[pdir_idx])
886		return (&(pmap->pm_pdir[pdir_idx][ptbl_idx]));
887
888	return (NULL);
889}
890
891/**************************************************************************/
892/* PMAP related */
893/**************************************************************************/
894
895/*
896 * This is called during e500_init, before the system is really initialized.
897 */
898static void
899mmu_booke_bootstrap(mmu_t mmu, vm_offset_t kernelstart, vm_offset_t kernelend)
900{
901	vm_offset_t phys_kernelend;
902	struct mem_region *mp, *mp1;
903	int cnt, i, j;
904	u_int s, e, sz;
905	u_int phys_avail_count;
906	vm_size_t physsz, hwphyssz, kstack0_sz;
907	vm_offset_t kernel_pdir, kstack0;
908	vm_paddr_t kstack0_phys;
909
910	debugf("mmu_booke_bootstrap: entered\n");
911
912	/* Initialize invalidation mutex */
913	mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
914
915	/* Read TLB0 size and associativity. */
916	tlb0_get_tlbconf();
917
918	/* Align kernel start and end address (kernel image). */
919	kernelstart = trunc_page(kernelstart);
920	kernelend = round_page(kernelend);
921
922	/* Allocate space for the message buffer. */
923	msgbufp = (struct msgbuf *)kernelend;
924	kernelend += MSGBUF_SIZE;
925	debugf(" msgbufp at 0x%08x end = 0x%08x\n", (uint32_t)msgbufp,
926	    kernelend);
927
928	kernelend = round_page(kernelend);
929
930	/* Allocate space for ptbl_bufs. */
931	ptbl_bufs = (struct ptbl_buf *)kernelend;
932	kernelend += sizeof(struct ptbl_buf) * PTBL_BUFS;
933	debugf(" ptbl_bufs at 0x%08x end = 0x%08x\n", (uint32_t)ptbl_bufs,
934	    kernelend);
935
936	kernelend = round_page(kernelend);
937
938	/* Allocate PTE tables for kernel KVA. */
939	kernel_pdir = kernelend;
940	kernel_ptbls = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS +
941	    PDIR_SIZE - 1) / PDIR_SIZE;
942	kernelend += kernel_ptbls * PTBL_PAGES * PAGE_SIZE;
943	debugf(" kernel ptbls: %d\n", kernel_ptbls);
944	debugf(" kernel pdir at 0x%08x end = 0x%08x\n", kernel_pdir, kernelend);
945
946	debugf(" kernelend: 0x%08x\n", kernelend);
947	if (kernelend - kernelstart > 0x1000000) {
948		kernelend = (kernelend + 0x3fffff) & ~0x3fffff;
949		tlb1_mapin_region(kernelstart + 0x1000000,
950		    kernload + 0x1000000, kernelend - kernelstart - 0x1000000);
951	} else
952		kernelend = (kernelend + 0xffffff) & ~0xffffff;
953
954	debugf(" updated kernelend: 0x%08x\n", kernelend);
955
956	/*
957	 * Clear the structures - note we can only do it safely after the
958	 * possible additional TLB1 translations are in place (above) so that
959	 * all range up to the currently calculated 'kernelend' is covered.
960	 */
961	memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE);
962	memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE);
963
964	/*******************************************************/
965	/* Set the start and end of kva. */
966	/*******************************************************/
967	virtual_avail = kernelend;
968	virtual_end = VM_MAX_KERNEL_ADDRESS;
969
970	/* Allocate KVA space for page zero/copy operations. */
971	zero_page_va = virtual_avail;
972	virtual_avail += PAGE_SIZE;
973	zero_page_idle_va = virtual_avail;
974	virtual_avail += PAGE_SIZE;
975	copy_page_src_va = virtual_avail;
976	virtual_avail += PAGE_SIZE;
977	copy_page_dst_va = virtual_avail;
978	virtual_avail += PAGE_SIZE;
979	debugf("zero_page_va = 0x%08x\n", zero_page_va);
980	debugf("zero_page_idle_va = 0x%08x\n", zero_page_idle_va);
981	debugf("copy_page_src_va = 0x%08x\n", copy_page_src_va);
982	debugf("copy_page_dst_va = 0x%08x\n", copy_page_dst_va);
983
984	/* Initialize page zero/copy mutexes. */
985	mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
986	mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
987
988	/* Allocate KVA space for ptbl bufs. */
989	ptbl_buf_pool_vabase = virtual_avail;
990	virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
991	debugf("ptbl_buf_pool_vabase = 0x%08x end = 0x%08x\n",
992	    ptbl_buf_pool_vabase, virtual_avail);
993
994	/* Calculate corresponding physical addresses for the kernel region. */
995	phys_kernelend = kernload + (kernelend - kernelstart);
996	debugf("kernel image and allocated data:\n");
997	debugf(" kernload    = 0x%08x\n", kernload);
998	debugf(" kernelstart = 0x%08x\n", kernelstart);
999	debugf(" kernelend   = 0x%08x\n", kernelend);
1000	debugf(" kernel size = 0x%08x\n", kernelend - kernelstart);
1001
1002	if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz)
1003		panic("mmu_booke_bootstrap: phys_avail too small");
1004
1005	/*
1006	 * Removed kernel physical address range from avail
1007	 * regions list. Page align all regions.
1008	 * Non-page aligned memory isn't very interesting to us.
1009	 * Also, sort the entries for ascending addresses.
1010	 */
1011	sz = 0;
1012	cnt = availmem_regions_sz;
1013	debugf("processing avail regions:\n");
1014	for (mp = availmem_regions; mp->mr_size; mp++) {
1015		s = mp->mr_start;
1016		e = mp->mr_start + mp->mr_size;
1017		debugf(" %08x-%08x -> ", s, e);
1018		/* Check whether this region holds all of the kernel. */
1019		if (s < kernload && e > phys_kernelend) {
1020			availmem_regions[cnt].mr_start = phys_kernelend;
1021			availmem_regions[cnt++].mr_size = e - phys_kernelend;
1022			e = kernload;
1023		}
1024		/* Look whether this regions starts within the kernel. */
1025		if (s >= kernload && s < phys_kernelend) {
1026			if (e <= phys_kernelend)
1027				goto empty;
1028			s = phys_kernelend;
1029		}
1030		/* Now look whether this region ends within the kernel. */
1031		if (e > kernload && e <= phys_kernelend) {
1032			if (s >= kernload)
1033				goto empty;
1034			e = kernload;
1035		}
1036		/* Now page align the start and size of the region. */
1037		s = round_page(s);
1038		e = trunc_page(e);
1039		if (e < s)
1040			e = s;
1041		sz = e - s;
1042		debugf("%08x-%08x = %x\n", s, e, sz);
1043
1044		/* Check whether some memory is left here. */
1045		if (sz == 0) {
1046		empty:
1047			memmove(mp, mp + 1,
1048			    (cnt - (mp - availmem_regions)) * sizeof(*mp));
1049			cnt--;
1050			mp--;
1051			continue;
1052		}
1053
1054		/* Do an insertion sort. */
1055		for (mp1 = availmem_regions; mp1 < mp; mp1++)
1056			if (s < mp1->mr_start)
1057				break;
1058		if (mp1 < mp) {
1059			memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1);
1060			mp1->mr_start = s;
1061			mp1->mr_size = sz;
1062		} else {
1063			mp->mr_start = s;
1064			mp->mr_size = sz;
1065		}
1066	}
1067	availmem_regions_sz = cnt;
1068
1069	/*******************************************************/
1070	/* Steal physical memory for kernel stack from the end */
1071	/* of the first avail region                           */
1072	/*******************************************************/
1073	kstack0_sz = KSTACK_PAGES * PAGE_SIZE;
1074	kstack0_phys = availmem_regions[0].mr_start +
1075	    availmem_regions[0].mr_size;
1076	kstack0_phys -= kstack0_sz;
1077	availmem_regions[0].mr_size -= kstack0_sz;
1078
1079	/*******************************************************/
1080	/* Fill in phys_avail table, based on availmem_regions */
1081	/*******************************************************/
1082	phys_avail_count = 0;
1083	physsz = 0;
1084	hwphyssz = 0;
1085	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
1086
1087	debugf("fill in phys_avail:\n");
1088	for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) {
1089
1090		debugf(" region: 0x%08x - 0x%08x (0x%08x)\n",
1091		    availmem_regions[i].mr_start,
1092		    availmem_regions[i].mr_start + availmem_regions[i].mr_size,
1093		    availmem_regions[i].mr_size);
1094
1095		if (hwphyssz != 0 &&
1096		    (physsz + availmem_regions[i].mr_size) >= hwphyssz) {
1097			debugf(" hw.physmem adjust\n");
1098			if (physsz < hwphyssz) {
1099				phys_avail[j] = availmem_regions[i].mr_start;
1100				phys_avail[j + 1] =
1101				    availmem_regions[i].mr_start +
1102				    hwphyssz - physsz;
1103				physsz = hwphyssz;
1104				phys_avail_count++;
1105			}
1106			break;
1107		}
1108
1109		phys_avail[j] = availmem_regions[i].mr_start;
1110		phys_avail[j + 1] = availmem_regions[i].mr_start +
1111		    availmem_regions[i].mr_size;
1112		phys_avail_count++;
1113		physsz += availmem_regions[i].mr_size;
1114	}
1115	physmem = btoc(physsz);
1116
1117	/* Calculate the last available physical address. */
1118	for (i = 0; phys_avail[i + 2] != 0; i += 2)
1119		;
1120	Maxmem = powerpc_btop(phys_avail[i + 1]);
1121
1122	debugf("Maxmem = 0x%08lx\n", Maxmem);
1123	debugf("phys_avail_count = %d\n", phys_avail_count);
1124	debugf("physsz = 0x%08x physmem = %ld (0x%08lx)\n", physsz, physmem, physmem);
1125
1126	/*******************************************************/
1127	/* Initialize (statically allocated) kernel pmap. */
1128	/*******************************************************/
1129	PMAP_LOCK_INIT(kernel_pmap);
1130	kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE;
1131
1132	debugf("kernel_pmap = 0x%08x\n", (uint32_t)kernel_pmap);
1133	debugf("kptbl_min = %d, kernel_ptbls = %d\n", kptbl_min, kernel_ptbls);
1134	debugf("kernel pdir range: 0x%08x - 0x%08x\n",
1135	    kptbl_min * PDIR_SIZE, (kptbl_min + kernel_ptbls) * PDIR_SIZE - 1);
1136
1137	/* Initialize kernel pdir */
1138	for (i = 0; i < kernel_ptbls; i++)
1139		kernel_pmap->pm_pdir[kptbl_min + i] =
1140		    (pte_t *)(kernel_pdir + (i * PAGE_SIZE * PTBL_PAGES));
1141
1142	for (i = 0; i < MAXCPU; i++) {
1143		kernel_pmap->pm_tid[i] = TID_KERNEL;
1144
1145		/* Initialize each CPU's tidbusy entry 0 with kernel_pmap */
1146		tidbusy[i][0] = kernel_pmap;
1147	}
1148	/* Mark kernel_pmap active on all CPUs */
1149	kernel_pmap->pm_active = ~0;
1150
1151	/*******************************************************/
1152	/* Final setup */
1153	/*******************************************************/
1154
1155	/* Enter kstack0 into kernel map, provide guard page */
1156	kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1157	thread0.td_kstack = kstack0;
1158	thread0.td_kstack_pages = KSTACK_PAGES;
1159
1160	debugf("kstack_sz = 0x%08x\n", kstack0_sz);
1161	debugf("kstack0_phys at 0x%08x - 0x%08x\n",
1162	    kstack0_phys, kstack0_phys + kstack0_sz);
1163	debugf("kstack0 at 0x%08x - 0x%08x\n", kstack0, kstack0 + kstack0_sz);
1164
1165	virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz;
1166	for (i = 0; i < KSTACK_PAGES; i++) {
1167		mmu_booke_kenter(mmu, kstack0, kstack0_phys);
1168		kstack0 += PAGE_SIZE;
1169		kstack0_phys += PAGE_SIZE;
1170	}
1171
1172	debugf("virtual_avail = %08x\n", virtual_avail);
1173	debugf("virtual_end   = %08x\n", virtual_end);
1174
1175	debugf("mmu_booke_bootstrap: exit\n");
1176}
1177
1178/*
1179 * Get the physical page address for the given pmap/virtual address.
1180 */
1181static vm_paddr_t
1182mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va)
1183{
1184	vm_paddr_t pa;
1185
1186	PMAP_LOCK(pmap);
1187	pa = pte_vatopa(mmu, pmap, va);
1188	PMAP_UNLOCK(pmap);
1189
1190	return (pa);
1191}
1192
1193/*
1194 * Extract the physical page address associated with the given
1195 * kernel virtual address.
1196 */
1197static vm_paddr_t
1198mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
1199{
1200
1201	return (pte_vatopa(mmu, kernel_pmap, va));
1202}
1203
1204/*
1205 * Initialize the pmap module.
1206 * Called by vm_init, to initialize any structures that the pmap
1207 * system needs to map virtual memory.
1208 */
1209static void
1210mmu_booke_init(mmu_t mmu)
1211{
1212	int shpgperproc = PMAP_SHPGPERPROC;
1213
1214	//debugf("mmu_booke_init: s\n");
1215
1216	/*
1217	 * Initialize the address space (zone) for the pv entries.  Set a
1218	 * high water mark so that the system can recover from excessive
1219	 * numbers of pv entries.
1220	 */
1221	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
1222	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1223
1224	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1225	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
1226
1227	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
1228	pv_entry_high_water = 9 * (pv_entry_max / 10);
1229
1230	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
1231
1232	/* Pre-fill pvzone with initial number of pv entries. */
1233	uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN);
1234
1235	/* Initialize ptbl allocation. */
1236	ptbl_init();
1237
1238	//debugf("mmu_booke_init: e\n");
1239}
1240
1241/*
1242 * Map a list of wired pages into kernel virtual address space.  This is
1243 * intended for temporary mappings which do not need page modification or
1244 * references recorded.  Existing mappings in the region are overwritten.
1245 */
1246static void
1247mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count)
1248{
1249	vm_offset_t va;
1250
1251	//debugf("mmu_booke_qenter: s (sva = 0x%08x count = %d)\n", sva, count);
1252
1253	va = sva;
1254	while (count-- > 0) {
1255		mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
1256		va += PAGE_SIZE;
1257		m++;
1258	}
1259
1260	//debugf("mmu_booke_qenter: e\n");
1261}
1262
1263/*
1264 * Remove page mappings from kernel virtual address space.  Intended for
1265 * temporary mappings entered by mmu_booke_qenter.
1266 */
1267static void
1268mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count)
1269{
1270	vm_offset_t va;
1271
1272	//debugf("mmu_booke_qremove: s (sva = 0x%08x count = %d)\n", sva, count);
1273
1274	va = sva;
1275	while (count-- > 0) {
1276		mmu_booke_kremove(mmu, va);
1277		va += PAGE_SIZE;
1278	}
1279
1280	//debugf("mmu_booke_qremove: e\n");
1281}
1282
1283/*
1284 * Map a wired page into kernel virtual address space.
1285 */
1286static void
1287mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
1288{
1289	unsigned int pdir_idx = PDIR_IDX(va);
1290	unsigned int ptbl_idx = PTBL_IDX(va);
1291	u_int32_t flags;
1292	pte_t *pte;
1293
1294	//debugf("mmu_booke_kenter: s (pdir_idx = %d ptbl_idx = %d va=0x%08x pa=0x%08x)\n",
1295	//		pdir_idx, ptbl_idx, va, pa);
1296
1297	KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)),
1298			("mmu_booke_kenter: invalid va"));
1299
1300#if 0
1301	/* assume IO mapping, set I, G bits */
1302	flags = (PTE_G | PTE_I | PTE_FAKE);
1303
1304	/* if mapping is within system memory, do not set I, G bits */
1305	for (i = 0; i < totalmem_regions_sz; i++) {
1306		if ((pa >= totalmem_regions[i].mr_start) &&
1307				(pa < (totalmem_regions[i].mr_start +
1308				       totalmem_regions[i].mr_size))) {
1309			flags &= ~(PTE_I | PTE_G | PTE_FAKE);
1310			break;
1311		}
1312	}
1313#else
1314	flags = 0;
1315#endif
1316
1317	flags |= (PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID);
1318	flags |= PTE_M;
1319
1320	pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]);
1321
1322	mtx_lock_spin(&tlbivax_mutex);
1323
1324	if (PTE_ISVALID(pte)) {
1325
1326		CTR1(KTR_PMAP, "%s: replacing entry!", __func__);
1327
1328		/* Flush entry from TLB0 */
1329		tlb0_flush_entry(va);
1330	}
1331
1332	pte->rpn = pa & ~PTE_PA_MASK;
1333	pte->flags = flags;
1334
1335	//debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x "
1336	//		"pa=0x%08x rpn=0x%08x flags=0x%08x\n",
1337	//		pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags);
1338
1339	/* Flush the real memory from the instruction cache. */
1340	if ((flags & (PTE_I | PTE_G)) == 0) {
1341		__syncicache((void *)va, PAGE_SIZE);
1342	}
1343
1344	mtx_unlock_spin(&tlbivax_mutex);
1345}
1346
1347/*
1348 * Remove a page from kernel page table.
1349 */
1350static void
1351mmu_booke_kremove(mmu_t mmu, vm_offset_t va)
1352{
1353	unsigned int pdir_idx = PDIR_IDX(va);
1354	unsigned int ptbl_idx = PTBL_IDX(va);
1355	pte_t *pte;
1356
1357//	CTR2(KTR_PMAP,("%s: s (va = 0x%08x)\n", __func__, va));
1358
1359	KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
1360	    (va <= VM_MAX_KERNEL_ADDRESS)),
1361	    ("mmu_booke_kremove: invalid va"));
1362
1363	pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]);
1364
1365	if (!PTE_ISVALID(pte)) {
1366
1367		CTR1(KTR_PMAP, "%s: invalid pte", __func__);
1368
1369		return;
1370	}
1371
1372	mtx_lock_spin(&tlbivax_mutex);
1373
1374	/* Invalidate entry in TLB0, update PTE. */
1375	tlb0_flush_entry(va);
1376	pte->flags = 0;
1377	pte->rpn = 0;
1378
1379	mtx_unlock_spin(&tlbivax_mutex);
1380}
1381
1382/*
1383 * Initialize pmap associated with process 0.
1384 */
1385static void
1386mmu_booke_pinit0(mmu_t mmu, pmap_t pmap)
1387{
1388	//debugf("mmu_booke_pinit0: s (pmap = 0x%08x)\n", (u_int32_t)pmap);
1389	mmu_booke_pinit(mmu, pmap);
1390	PCPU_SET(curpmap, pmap);
1391	//debugf("mmu_booke_pinit0: e\n");
1392}
1393
1394/*
1395 * Initialize a preallocated and zeroed pmap structure,
1396 * such as one in a vmspace structure.
1397 */
1398static void
1399mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
1400{
1401	int i;
1402
1403	CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
1404	    curthread->td_proc->p_pid, curthread->td_proc->p_comm);
1405
1406	KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
1407
1408	PMAP_LOCK_INIT(pmap);
1409	for (i = 0; i < MAXCPU; i++)
1410		pmap->pm_tid[i] = TID_NONE;
1411	pmap->pm_active = 0;
1412	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1413	bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
1414	TAILQ_INIT(&pmap->pm_ptbl_list);
1415}
1416
1417/*
1418 * Release any resources held by the given physical map.
1419 * Called when a pmap initialized by mmu_booke_pinit is being released.
1420 * Should only be called if the map contains no valid mappings.
1421 */
1422static void
1423mmu_booke_release(mmu_t mmu, pmap_t pmap)
1424{
1425
1426	//debugf("mmu_booke_release: s\n");
1427
1428	PMAP_LOCK_DESTROY(pmap);
1429
1430	//debugf("mmu_booke_release: e\n");
1431}
1432
1433#if 0
1434/* Not needed, kernel page tables are statically allocated. */
1435void
1436mmu_booke_growkernel(vm_offset_t maxkvaddr)
1437{
1438}
1439#endif
1440
1441/*
1442 * Insert the given physical page at the specified virtual address in the
1443 * target physical map with the protection requested. If specified the page
1444 * will be wired down.
1445 */
1446static void
1447mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
1448    vm_prot_t prot, boolean_t wired)
1449{
1450	vm_page_lock_queues();
1451	PMAP_LOCK(pmap);
1452	mmu_booke_enter_locked(mmu, pmap, va, m, prot, wired);
1453	vm_page_unlock_queues();
1454	PMAP_UNLOCK(pmap);
1455}
1456
1457static void
1458mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
1459    vm_prot_t prot, boolean_t wired)
1460{
1461	pte_t *pte;
1462	vm_paddr_t pa;
1463	u_int32_t flags;
1464	int su, sync;
1465
1466	pa = VM_PAGE_TO_PHYS(m);
1467	su = (pmap == kernel_pmap);
1468	sync = 0;
1469
1470	//debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x "
1471	//		"pa=0x%08x prot=0x%08x wired=%d)\n",
1472	//		(u_int32_t)pmap, su, pmap->pm_tid,
1473	//		(u_int32_t)m, va, pa, prot, wired);
1474
1475	if (su) {
1476		KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)),
1477				("mmu_booke_enter_locked: kernel pmap, non kernel va"));
1478	} else {
1479		KASSERT((va <= VM_MAXUSER_ADDRESS),
1480				("mmu_booke_enter_locked: user pmap, non user va"));
1481	}
1482
1483	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1484
1485	/*
1486	 * If there is an existing mapping, and the physical address has not
1487	 * changed, must be protection or wiring change.
1488	 */
1489	if (((pte = pte_find(mmu, pmap, va)) != NULL) &&
1490	    (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) {
1491
1492		/*
1493		 * Before actually updating pte->flags we calculate and
1494		 * prepare its new value in a helper var.
1495		 */
1496		flags = pte->flags;
1497		flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED);
1498
1499		/* Wiring change, just update stats. */
1500		if (wired) {
1501			if (!PTE_ISWIRED(pte)) {
1502				flags |= PTE_WIRED;
1503				pmap->pm_stats.wired_count++;
1504			}
1505		} else {
1506			if (PTE_ISWIRED(pte)) {
1507				flags &= ~PTE_WIRED;
1508				pmap->pm_stats.wired_count--;
1509			}
1510		}
1511
1512		if (prot & VM_PROT_WRITE) {
1513			/* Add write permissions. */
1514			flags |= PTE_SW;
1515			if (!su)
1516				flags |= PTE_UW;
1517		} else {
1518			/* Handle modified pages, sense modify status. */
1519
1520			/*
1521			 * The PTE_MODIFIED flag could be set by underlying
1522			 * TLB misses since we last read it (above), possibly
1523			 * other CPUs could update it so we check in the PTE
1524			 * directly rather than rely on that saved local flags
1525			 * copy.
1526			 */
1527			if (PTE_ISMODIFIED(pte))
1528				vm_page_dirty(m);
1529		}
1530
1531		if (prot & VM_PROT_EXECUTE) {
1532			flags |= PTE_SX;
1533			if (!su)
1534				flags |= PTE_UX;
1535
1536			/*
1537			 * Check existing flags for execute permissions: if we
1538			 * are turning execute permissions on, icache should
1539			 * be flushed.
1540			 */
1541			if ((flags & (PTE_UX | PTE_SX)) == 0)
1542				sync++;
1543		}
1544
1545		flags &= ~PTE_REFERENCED;
1546
1547		/*
1548		 * The new flags value is all calculated -- only now actually
1549		 * update the PTE.
1550		 */
1551		mtx_lock_spin(&tlbivax_mutex);
1552
1553		tlb0_flush_entry(va);
1554		pte->flags = flags;
1555
1556		mtx_unlock_spin(&tlbivax_mutex);
1557
1558	} else {
1559		/*
1560		 * If there is an existing mapping, but it's for a different
1561		 * physical address, pte_enter() will delete the old mapping.
1562		 */
1563		//if ((pte != NULL) && PTE_ISVALID(pte))
1564		//	debugf("mmu_booke_enter_locked: replace\n");
1565		//else
1566		//	debugf("mmu_booke_enter_locked: new\n");
1567
1568		/* Now set up the flags and install the new mapping. */
1569		flags = (PTE_SR | PTE_VALID);
1570		flags |= PTE_M;
1571
1572		if (!su)
1573			flags |= PTE_UR;
1574
1575		if (prot & VM_PROT_WRITE) {
1576			flags |= PTE_SW;
1577			if (!su)
1578				flags |= PTE_UW;
1579		}
1580
1581		if (prot & VM_PROT_EXECUTE) {
1582			flags |= PTE_SX;
1583			if (!su)
1584				flags |= PTE_UX;
1585		}
1586
1587		/* If its wired update stats. */
1588		if (wired) {
1589			pmap->pm_stats.wired_count++;
1590			flags |= PTE_WIRED;
1591		}
1592
1593		pte_enter(mmu, pmap, m, va, flags);
1594
1595		/* Flush the real memory from the instruction cache. */
1596		if (prot & VM_PROT_EXECUTE)
1597			sync++;
1598	}
1599
1600	if (sync && (su || pmap == PCPU_GET(curpmap))) {
1601		__syncicache((void *)va, PAGE_SIZE);
1602		sync = 0;
1603	}
1604
1605	if (sync) {
1606		/* Create a temporary mapping. */
1607		pmap = PCPU_GET(curpmap);
1608
1609		va = 0;
1610		pte = pte_find(mmu, pmap, va);
1611		KASSERT(pte == NULL, ("%s:%d", __func__, __LINE__));
1612
1613		flags = PTE_SR | PTE_VALID | PTE_UR | PTE_M;
1614
1615		pte_enter(mmu, pmap, m, va, flags);
1616		__syncicache((void *)va, PAGE_SIZE);
1617		pte_remove(mmu, pmap, va, PTBL_UNHOLD);
1618	}
1619
1620	//debugf("mmu_booke_enter_locked: e\n");
1621}
1622
1623/*
1624 * Maps a sequence of resident pages belonging to the same object.
1625 * The sequence begins with the given page m_start.  This page is
1626 * mapped at the given virtual address start.  Each subsequent page is
1627 * mapped at a virtual address that is offset from start by the same
1628 * amount as the page is offset from m_start within the object.  The
1629 * last page in the sequence is the page with the largest offset from
1630 * m_start that can be mapped at a virtual address less than the given
1631 * virtual address end.  Not every virtual page between start and end
1632 * is mapped; only those for which a resident page exists with the
1633 * corresponding offset from m_start are mapped.
1634 */
1635static void
1636mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start,
1637    vm_offset_t end, vm_page_t m_start, vm_prot_t prot)
1638{
1639	vm_page_t m;
1640	vm_pindex_t diff, psize;
1641
1642	psize = atop(end - start);
1643	m = m_start;
1644	PMAP_LOCK(pmap);
1645	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1646		mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, prot &
1647		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1648		m = TAILQ_NEXT(m, listq);
1649	}
1650	PMAP_UNLOCK(pmap);
1651}
1652
1653static void
1654mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
1655    vm_prot_t prot)
1656{
1657
1658	//debugf("mmu_booke_enter_quick: s\n");
1659
1660	PMAP_LOCK(pmap);
1661	mmu_booke_enter_locked(mmu, pmap, va, m,
1662	    prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1663	PMAP_UNLOCK(pmap);
1664
1665	//debugf("mmu_booke_enter_quick e\n");
1666}
1667
1668/*
1669 * Remove the given range of addresses from the specified map.
1670 *
1671 * It is assumed that the start and end are properly rounded to the page size.
1672 */
1673static void
1674mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva)
1675{
1676	pte_t *pte;
1677	u_int8_t hold_flag;
1678
1679	int su = (pmap == kernel_pmap);
1680
1681	//debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n",
1682	//		su, (u_int32_t)pmap, pmap->pm_tid, va, endva);
1683
1684	if (su) {
1685		KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)),
1686		    ("mmu_booke_enter: kernel pmap, non kernel va"));
1687	} else {
1688		KASSERT((va <= VM_MAXUSER_ADDRESS),
1689		    ("mmu_booke_enter: user pmap, non user va"));
1690	}
1691
1692	if (PMAP_REMOVE_DONE(pmap)) {
1693		//debugf("mmu_booke_remove: e (empty)\n");
1694		return;
1695	}
1696
1697	hold_flag = PTBL_HOLD_FLAG(pmap);
1698	//debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag);
1699
1700	vm_page_lock_queues();
1701	PMAP_LOCK(pmap);
1702	for (; va < endva; va += PAGE_SIZE) {
1703		pte = pte_find(mmu, pmap, va);
1704		if ((pte != NULL) && PTE_ISVALID(pte))
1705			pte_remove(mmu, pmap, va, hold_flag);
1706	}
1707	PMAP_UNLOCK(pmap);
1708	vm_page_unlock_queues();
1709
1710	//debugf("mmu_booke_remove: e\n");
1711}
1712
1713/*
1714 * Remove physical page from all pmaps in which it resides.
1715 */
1716static void
1717mmu_booke_remove_all(mmu_t mmu, vm_page_t m)
1718{
1719	pv_entry_t pv, pvn;
1720	u_int8_t hold_flag;
1721
1722	//debugf("mmu_booke_remove_all: s\n");
1723
1724	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1725
1726	for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) {
1727		pvn = TAILQ_NEXT(pv, pv_link);
1728
1729		PMAP_LOCK(pv->pv_pmap);
1730		hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap);
1731		pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag);
1732		PMAP_UNLOCK(pv->pv_pmap);
1733	}
1734	vm_page_flag_clear(m, PG_WRITEABLE);
1735
1736	//debugf("mmu_booke_remove_all: e\n");
1737}
1738
1739/*
1740 * Map a range of physical addresses into kernel virtual address space.
1741 *
1742 * The value passed in *virt is a suggested virtual address for the mapping.
1743 * Architectures which can support a direct-mapped physical to virtual region
1744 * can return the appropriate address within that region, leaving '*virt'
1745 * unchanged.  We cannot and therefore do not; *virt is updated with the
1746 * first usable address after the mapped region.
1747 */
1748static vm_offset_t
1749mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
1750    vm_offset_t pa_end, int prot)
1751{
1752	vm_offset_t sva = *virt;
1753	vm_offset_t va = sva;
1754
1755	//debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n",
1756	//		sva, pa_start, pa_end);
1757
1758	while (pa_start < pa_end) {
1759		mmu_booke_kenter(mmu, va, pa_start);
1760		va += PAGE_SIZE;
1761		pa_start += PAGE_SIZE;
1762	}
1763	*virt = va;
1764
1765	//debugf("mmu_booke_map: e (va = 0x%08x)\n", va);
1766	return (sva);
1767}
1768
1769/*
1770 * The pmap must be activated before it's address space can be accessed in any
1771 * way.
1772 */
1773static void
1774mmu_booke_activate(mmu_t mmu, struct thread *td)
1775{
1776	pmap_t pmap;
1777
1778	pmap = &td->td_proc->p_vmspace->vm_pmap;
1779
1780	CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%08x)",
1781	    __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
1782
1783	KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!"));
1784
1785	mtx_lock_spin(&sched_lock);
1786
1787	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
1788	PCPU_SET(curpmap, pmap);
1789
1790	if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE)
1791		tid_alloc(pmap);
1792
1793	/* Load PID0 register with pmap tid value. */
1794	mtspr(SPR_PID0, pmap->pm_tid[PCPU_GET(cpuid)]);
1795	__asm __volatile("isync");
1796
1797	mtx_unlock_spin(&sched_lock);
1798
1799	CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__,
1800	    pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm);
1801}
1802
1803/*
1804 * Deactivate the specified process's address space.
1805 */
1806static void
1807mmu_booke_deactivate(mmu_t mmu, struct thread *td)
1808{
1809	pmap_t pmap;
1810
1811	pmap = &td->td_proc->p_vmspace->vm_pmap;
1812
1813	CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x",
1814	    __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
1815
1816	atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask));
1817	PCPU_SET(curpmap, NULL);
1818}
1819
1820/*
1821 * Copy the range specified by src_addr/len
1822 * from the source map to the range dst_addr/len
1823 * in the destination map.
1824 *
1825 * This routine is only advisory and need not do anything.
1826 */
1827static void
1828mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1829    vm_size_t len, vm_offset_t src_addr)
1830{
1831
1832}
1833
1834/*
1835 * Set the physical protection on the specified range of this map as requested.
1836 */
1837static void
1838mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
1839    vm_prot_t prot)
1840{
1841	vm_offset_t va;
1842	vm_page_t m;
1843	pte_t *pte;
1844
1845	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1846		mmu_booke_remove(mmu, pmap, sva, eva);
1847		return;
1848	}
1849
1850	if (prot & VM_PROT_WRITE)
1851		return;
1852
1853	vm_page_lock_queues();
1854	PMAP_LOCK(pmap);
1855	for (va = sva; va < eva; va += PAGE_SIZE) {
1856		if ((pte = pte_find(mmu, pmap, va)) != NULL) {
1857			if (PTE_ISVALID(pte)) {
1858				m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1859
1860				mtx_lock_spin(&tlbivax_mutex);
1861
1862				/* Handle modified pages. */
1863				if (PTE_ISMODIFIED(pte))
1864					vm_page_dirty(m);
1865
1866				/* Referenced pages. */
1867				if (PTE_ISREFERENCED(pte))
1868					vm_page_flag_set(m, PG_REFERENCED);
1869
1870				tlb0_flush_entry(va);
1871				pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED |
1872				    PTE_REFERENCED);
1873
1874				mtx_unlock_spin(&tlbivax_mutex);
1875			}
1876		}
1877	}
1878	PMAP_UNLOCK(pmap);
1879	vm_page_unlock_queues();
1880}
1881
1882/*
1883 * Clear the write and modified bits in each of the given page's mappings.
1884 */
1885static void
1886mmu_booke_remove_write(mmu_t mmu, vm_page_t m)
1887{
1888	pv_entry_t pv;
1889	pte_t *pte;
1890
1891	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1892	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1893	    (m->flags & PG_WRITEABLE) == 0)
1894		return;
1895
1896	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1897		PMAP_LOCK(pv->pv_pmap);
1898		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
1899			if (PTE_ISVALID(pte)) {
1900				m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1901
1902				mtx_lock_spin(&tlbivax_mutex);
1903
1904				/* Handle modified pages. */
1905				if (PTE_ISMODIFIED(pte))
1906					vm_page_dirty(m);
1907
1908				/* Referenced pages. */
1909				if (PTE_ISREFERENCED(pte))
1910					vm_page_flag_set(m, PG_REFERENCED);
1911
1912				/* Flush mapping from TLB0. */
1913				pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED |
1914				    PTE_REFERENCED);
1915
1916				mtx_unlock_spin(&tlbivax_mutex);
1917			}
1918		}
1919		PMAP_UNLOCK(pv->pv_pmap);
1920	}
1921	vm_page_flag_clear(m, PG_WRITEABLE);
1922}
1923
1924static boolean_t
1925mmu_booke_page_executable(mmu_t mmu, vm_page_t m)
1926{
1927	pv_entry_t pv;
1928	pte_t *pte;
1929	boolean_t executable;
1930
1931	executable = FALSE;
1932	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1933		PMAP_LOCK(pv->pv_pmap);
1934		pte = pte_find(mmu, pv->pv_pmap, pv->pv_va);
1935		if (pte != NULL && PTE_ISVALID(pte) && (pte->flags & PTE_UX))
1936			executable = TRUE;
1937		PMAP_UNLOCK(pv->pv_pmap);
1938		if (executable)
1939			break;
1940	}
1941
1942	return (executable);
1943}
1944
1945/*
1946 * Atomically extract and hold the physical page with the given
1947 * pmap and virtual address pair if that mapping permits the given
1948 * protection.
1949 */
1950static vm_page_t
1951mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va,
1952    vm_prot_t prot)
1953{
1954	pte_t *pte;
1955	vm_page_t m;
1956	u_int32_t pte_wbit;
1957
1958	m = NULL;
1959	vm_page_lock_queues();
1960	PMAP_LOCK(pmap);
1961	pte = pte_find(mmu, pmap, va);
1962
1963	if ((pte != NULL) && PTE_ISVALID(pte)) {
1964		if (pmap == kernel_pmap)
1965			pte_wbit = PTE_SW;
1966		else
1967			pte_wbit = PTE_UW;
1968
1969		if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) {
1970			m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1971			vm_page_hold(m);
1972		}
1973	}
1974
1975	vm_page_unlock_queues();
1976	PMAP_UNLOCK(pmap);
1977	return (m);
1978}
1979
1980/*
1981 * Initialize a vm_page's machine-dependent fields.
1982 */
1983static void
1984mmu_booke_page_init(mmu_t mmu, vm_page_t m)
1985{
1986
1987	TAILQ_INIT(&m->md.pv_list);
1988}
1989
1990/*
1991 * mmu_booke_zero_page_area zeros the specified hardware page by
1992 * mapping it into virtual memory and using bzero to clear
1993 * its contents.
1994 *
1995 * off and size must reside within a single page.
1996 */
1997static void
1998mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
1999{
2000	vm_offset_t va;
2001
2002	//debugf("mmu_booke_zero_page_area: s\n");
2003
2004	mtx_lock(&zero_page_mutex);
2005	va = zero_page_va;
2006
2007	mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
2008	bzero((caddr_t)va + off, size);
2009	mmu_booke_kremove(mmu, va);
2010
2011	mtx_unlock(&zero_page_mutex);
2012
2013	//debugf("mmu_booke_zero_page_area: e\n");
2014}
2015
2016/*
2017 * mmu_booke_zero_page zeros the specified hardware page.
2018 */
2019static void
2020mmu_booke_zero_page(mmu_t mmu, vm_page_t m)
2021{
2022
2023	//debugf("mmu_booke_zero_page: s\n");
2024	mmu_booke_zero_page_area(mmu, m, 0, PAGE_SIZE);
2025	//debugf("mmu_booke_zero_page: e\n");
2026}
2027
2028/*
2029 * mmu_booke_copy_page copies the specified (machine independent) page by
2030 * mapping the page into virtual memory and using memcopy to copy the page,
2031 * one machine dependent page at a time.
2032 */
2033static void
2034mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm)
2035{
2036	vm_offset_t sva, dva;
2037
2038	sva = copy_page_src_va;
2039	dva = copy_page_dst_va;
2040
2041	mtx_lock(&copy_page_mutex);
2042	mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm));
2043	mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm));
2044	memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
2045	mmu_booke_kremove(mmu, dva);
2046	mmu_booke_kremove(mmu, sva);
2047	mtx_unlock(&copy_page_mutex);
2048}
2049
2050#if 0
2051/*
2052 * Remove all pages from specified address space, this aids process exit
2053 * speeds. This is much faster than mmu_booke_remove in the case of running
2054 * down an entire address space. Only works for the current pmap.
2055 */
2056void
2057mmu_booke_remove_pages(pmap_t pmap)
2058{
2059}
2060#endif
2061
2062/*
2063 * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it
2064 * into virtual memory and using bzero to clear its contents. This is intended
2065 * to be called from the vm_pagezero process only and outside of Giant. No
2066 * lock is required.
2067 */
2068static void
2069mmu_booke_zero_page_idle(mmu_t mmu, vm_page_t m)
2070{
2071	vm_offset_t va;
2072
2073	//debugf("mmu_booke_zero_page_idle: s\n");
2074
2075	va = zero_page_idle_va;
2076	mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
2077	bzero((caddr_t)va, PAGE_SIZE);
2078	mmu_booke_kremove(mmu, va);
2079
2080	//debugf("mmu_booke_zero_page_idle: e\n");
2081}
2082
2083/*
2084 * Return whether or not the specified physical page was modified
2085 * in any of physical maps.
2086 */
2087static boolean_t
2088mmu_booke_is_modified(mmu_t mmu, vm_page_t m)
2089{
2090	pte_t *pte;
2091	pv_entry_t pv;
2092
2093	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2094	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2095		return (FALSE);
2096
2097	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2098		PMAP_LOCK(pv->pv_pmap);
2099		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
2100			if (!PTE_ISVALID(pte))
2101				goto make_sure_to_unlock;
2102
2103			if (PTE_ISMODIFIED(pte)) {
2104				PMAP_UNLOCK(pv->pv_pmap);
2105				return (TRUE);
2106			}
2107		}
2108make_sure_to_unlock:
2109		PMAP_UNLOCK(pv->pv_pmap);
2110	}
2111	return (FALSE);
2112}
2113
2114/*
2115 * Return whether or not the specified virtual address is elgible
2116 * for prefault.
2117 */
2118static boolean_t
2119mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr)
2120{
2121
2122	return (FALSE);
2123}
2124
2125/*
2126 * Clear the modify bits on the specified physical page.
2127 */
2128static void
2129mmu_booke_clear_modify(mmu_t mmu, vm_page_t m)
2130{
2131	pte_t *pte;
2132	pv_entry_t pv;
2133
2134	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2135	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2136		return;
2137
2138	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2139		PMAP_LOCK(pv->pv_pmap);
2140		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
2141			if (!PTE_ISVALID(pte))
2142				goto make_sure_to_unlock;
2143
2144			mtx_lock_spin(&tlbivax_mutex);
2145
2146			if (pte->flags & (PTE_SW | PTE_UW | PTE_MODIFIED)) {
2147				tlb0_flush_entry(pv->pv_va);
2148				pte->flags &= ~(PTE_SW | PTE_UW | PTE_MODIFIED |
2149				    PTE_REFERENCED);
2150			}
2151
2152			mtx_unlock_spin(&tlbivax_mutex);
2153		}
2154make_sure_to_unlock:
2155		PMAP_UNLOCK(pv->pv_pmap);
2156	}
2157}
2158
2159/*
2160 * Return a count of reference bits for a page, clearing those bits.
2161 * It is not necessary for every reference bit to be cleared, but it
2162 * is necessary that 0 only be returned when there are truly no
2163 * reference bits set.
2164 *
2165 * XXX: The exact number of bits to check and clear is a matter that
2166 * should be tested and standardized at some point in the future for
2167 * optimal aging of shared pages.
2168 */
2169static int
2170mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m)
2171{
2172	pte_t *pte;
2173	pv_entry_t pv;
2174	int count;
2175
2176	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2177	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2178		return (0);
2179
2180	count = 0;
2181	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2182		PMAP_LOCK(pv->pv_pmap);
2183		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
2184			if (!PTE_ISVALID(pte))
2185				goto make_sure_to_unlock;
2186
2187			if (PTE_ISREFERENCED(pte)) {
2188				mtx_lock_spin(&tlbivax_mutex);
2189
2190				tlb0_flush_entry(pv->pv_va);
2191				pte->flags &= ~PTE_REFERENCED;
2192
2193				mtx_unlock_spin(&tlbivax_mutex);
2194
2195				if (++count > 4) {
2196					PMAP_UNLOCK(pv->pv_pmap);
2197					break;
2198				}
2199			}
2200		}
2201make_sure_to_unlock:
2202		PMAP_UNLOCK(pv->pv_pmap);
2203	}
2204	return (count);
2205}
2206
2207/*
2208 * Clear the reference bit on the specified physical page.
2209 */
2210static void
2211mmu_booke_clear_reference(mmu_t mmu, vm_page_t m)
2212{
2213	pte_t *pte;
2214	pv_entry_t pv;
2215
2216	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2217	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2218		return;
2219
2220	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2221		PMAP_LOCK(pv->pv_pmap);
2222		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
2223			if (!PTE_ISVALID(pte))
2224				goto make_sure_to_unlock;
2225
2226			if (PTE_ISREFERENCED(pte)) {
2227				mtx_lock_spin(&tlbivax_mutex);
2228
2229				tlb0_flush_entry(pv->pv_va);
2230				pte->flags &= ~PTE_REFERENCED;
2231
2232				mtx_unlock_spin(&tlbivax_mutex);
2233			}
2234		}
2235make_sure_to_unlock:
2236		PMAP_UNLOCK(pv->pv_pmap);
2237	}
2238}
2239
2240/*
2241 * Change wiring attribute for a map/virtual-address pair.
2242 */
2243static void
2244mmu_booke_change_wiring(mmu_t mmu, pmap_t pmap, vm_offset_t va, boolean_t wired)
2245{
2246	pte_t *pte;;
2247
2248	PMAP_LOCK(pmap);
2249	if ((pte = pte_find(mmu, pmap, va)) != NULL) {
2250		if (wired) {
2251			if (!PTE_ISWIRED(pte)) {
2252				pte->flags |= PTE_WIRED;
2253				pmap->pm_stats.wired_count++;
2254			}
2255		} else {
2256			if (PTE_ISWIRED(pte)) {
2257				pte->flags &= ~PTE_WIRED;
2258				pmap->pm_stats.wired_count--;
2259			}
2260		}
2261	}
2262	PMAP_UNLOCK(pmap);
2263}
2264
2265/*
2266 * Return true if the pmap's pv is one of the first 16 pvs linked to from this
2267 * page.  This count may be changed upwards or downwards in the future; it is
2268 * only necessary that true be returned for a small subset of pmaps for proper
2269 * page aging.
2270 */
2271static boolean_t
2272mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
2273{
2274	pv_entry_t pv;
2275	int loops;
2276
2277	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2278	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2279		return (FALSE);
2280
2281	loops = 0;
2282	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2283
2284		if (pv->pv_pmap == pmap)
2285			return (TRUE);
2286
2287		if (++loops >= 16)
2288			break;
2289	}
2290	return (FALSE);
2291}
2292
2293/*
2294 * Return the number of managed mappings to the given physical page that are
2295 * wired.
2296 */
2297static int
2298mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m)
2299{
2300	pv_entry_t pv;
2301	pte_t *pte;
2302	int count = 0;
2303
2304	if ((m->flags & PG_FICTITIOUS) != 0)
2305		return (count);
2306	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2307
2308	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2309		PMAP_LOCK(pv->pv_pmap);
2310		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL)
2311			if (PTE_ISVALID(pte) && PTE_ISWIRED(pte))
2312				count++;
2313		PMAP_UNLOCK(pv->pv_pmap);
2314	}
2315
2316	return (count);
2317}
2318
2319static int
2320mmu_booke_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
2321{
2322	int i;
2323	vm_offset_t va;
2324
2325	/*
2326	 * This currently does not work for entries that
2327	 * overlap TLB1 entries.
2328	 */
2329	for (i = 0; i < tlb1_idx; i ++) {
2330		if (tlb1_iomapped(i, pa, size, &va) == 0)
2331			return (0);
2332	}
2333
2334	return (EFAULT);
2335}
2336
2337/*
2338 * Map a set of physical memory pages into the kernel virtual address space.
2339 * Return a pointer to where it is mapped. This routine is intended to be used
2340 * for mapping device memory, NOT real memory.
2341 */
2342static void *
2343mmu_booke_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
2344{
2345	void *res;
2346	uintptr_t va;
2347	vm_size_t sz;
2348
2349	va = (pa >= 0x80000000) ? pa : (0xe2000000 + pa);
2350	res = (void *)va;
2351
2352	do {
2353		sz = 1 << (ilog2(size) & ~1);
2354		if (bootverbose)
2355			printf("Wiring VA=%x to PA=%x (size=%x), "
2356			    "using TLB1[%d]\n", va, pa, sz, tlb1_idx);
2357		tlb1_set_entry(va, pa, sz, _TLB_ENTRY_IO);
2358		size -= sz;
2359		pa += sz;
2360		va += sz;
2361	} while (size > 0);
2362
2363	return (res);
2364}
2365
2366/*
2367 * 'Unmap' a range mapped by mmu_booke_mapdev().
2368 */
2369static void
2370mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
2371{
2372	vm_offset_t base, offset;
2373
2374	//debugf("mmu_booke_unmapdev: s (va = 0x%08x)\n", va);
2375
2376	/*
2377	 * Unmap only if this is inside kernel virtual space.
2378	 */
2379	if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) {
2380		base = trunc_page(va);
2381		offset = va & PAGE_MASK;
2382		size = roundup(offset + size, PAGE_SIZE);
2383		kmem_free(kernel_map, base, size);
2384	}
2385
2386	//debugf("mmu_booke_unmapdev: e\n");
2387}
2388
2389/*
2390 * mmu_booke_object_init_pt preloads the ptes for a given object
2391 * into the specified pmap. This eliminates the blast of soft
2392 * faults on process startup and immediately after an mmap.
2393 */
2394static void
2395mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr,
2396    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2397{
2398	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2399	KASSERT(object->type == OBJT_DEVICE,
2400	    ("mmu_booke_object_init_pt: non-device object"));
2401}
2402
2403/*
2404 * Perform the pmap work for mincore.
2405 */
2406static int
2407mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr)
2408{
2409
2410	TODO;
2411	return (0);
2412}
2413
2414/**************************************************************************/
2415/* TID handling */
2416/**************************************************************************/
2417
2418/*
2419 * Allocate a TID. If necessary, steal one from someone else.
2420 * The new TID is flushed from the TLB before returning.
2421 */
2422static tlbtid_t
2423tid_alloc(pmap_t pmap)
2424{
2425	tlbtid_t tid;
2426	int thiscpu;
2427
2428	KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap"));
2429
2430	CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap);
2431
2432	thiscpu = PCPU_GET(cpuid);
2433
2434	tid = PCPU_GET(tid_next);
2435	if (tid > TID_MAX)
2436		tid = TID_MIN;
2437	PCPU_SET(tid_next, tid + 1);
2438
2439	/* If we are stealing TID then clear the relevant pmap's field */
2440	if (tidbusy[thiscpu][tid] != NULL) {
2441
2442		CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid);
2443
2444		tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE;
2445
2446		/* Flush all entries from TLB0 matching this TID. */
2447		tid_flush(tid);
2448	}
2449
2450	tidbusy[thiscpu][tid] = pmap;
2451	pmap->pm_tid[thiscpu] = tid;
2452	__asm __volatile("msync; isync");
2453
2454	CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid,
2455	    PCPU_GET(tid_next));
2456
2457	return (tid);
2458}
2459
2460/**************************************************************************/
2461/* TLB0 handling */
2462/**************************************************************************/
2463
2464static void
2465tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3,
2466    uint32_t mas7)
2467{
2468	int as;
2469	char desc[3];
2470	tlbtid_t tid;
2471	vm_size_t size;
2472	unsigned int tsize;
2473
2474	desc[2] = '\0';
2475	if (mas1 & MAS1_VALID)
2476		desc[0] = 'V';
2477	else
2478		desc[0] = ' ';
2479
2480	if (mas1 & MAS1_IPROT)
2481		desc[1] = 'P';
2482	else
2483		desc[1] = ' ';
2484
2485	as = (mas1 & MAS1_TS_MASK) ? 1 : 0;
2486	tid = MAS1_GETTID(mas1);
2487
2488	tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
2489	size = 0;
2490	if (tsize)
2491		size = tsize2size(tsize);
2492
2493	debugf("%3d: (%s) [AS=%d] "
2494	    "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x "
2495	    "mas2(va) = 0x%08x mas3(pa) = 0x%08x mas7 = 0x%08x\n",
2496	    i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7);
2497}
2498
2499/* Convert TLB0 va and way number to tlb0[] table index. */
2500static inline unsigned int
2501tlb0_tableidx(vm_offset_t va, unsigned int way)
2502{
2503	unsigned int idx;
2504
2505	idx = (way * TLB0_ENTRIES_PER_WAY);
2506	idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT;
2507	return (idx);
2508}
2509
2510/*
2511 * Invalidate TLB0 entry.
2512 */
2513static inline void
2514tlb0_flush_entry(vm_offset_t va)
2515{
2516
2517	CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va);
2518
2519	mtx_assert(&tlbivax_mutex, MA_OWNED);
2520
2521	__asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK));
2522	__asm __volatile("isync; msync");
2523	__asm __volatile("tlbsync; msync");
2524
2525	CTR1(KTR_PMAP, "%s: e", __func__);
2526}
2527
2528/* Print out contents of the MAS registers for each TLB0 entry */
2529void
2530tlb0_print_tlbentries(void)
2531{
2532	uint32_t mas0, mas1, mas2, mas3, mas7;
2533	int entryidx, way, idx;
2534
2535	debugf("TLB0 entries:\n");
2536	for (way = 0; way < TLB0_WAYS; way ++)
2537		for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) {
2538
2539			mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
2540			mtspr(SPR_MAS0, mas0);
2541			__asm __volatile("isync");
2542
2543			mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT;
2544			mtspr(SPR_MAS2, mas2);
2545
2546			__asm __volatile("isync; tlbre");
2547
2548			mas1 = mfspr(SPR_MAS1);
2549			mas2 = mfspr(SPR_MAS2);
2550			mas3 = mfspr(SPR_MAS3);
2551			mas7 = mfspr(SPR_MAS7);
2552
2553			idx = tlb0_tableidx(mas2, way);
2554			tlb_print_entry(idx, mas1, mas2, mas3, mas7);
2555		}
2556}
2557
2558/**************************************************************************/
2559/* TLB1 handling */
2560/**************************************************************************/
2561
2562/*
2563 * TLB1 mapping notes:
2564 *
2565 * TLB1[0]	CCSRBAR
2566 * TLB1[1]	Kernel text and data.
2567 * TLB1[2-15]	Additional kernel text and data mappings (if required), PCI
2568 *		windows, other devices mappings.
2569 */
2570
2571/*
2572 * Write given entry to TLB1 hardware.
2573 * Use 32 bit pa, clear 4 high-order bits of RPN (mas7).
2574 */
2575static void
2576tlb1_write_entry(unsigned int idx)
2577{
2578	u_int32_t mas0, mas7;
2579
2580	//debugf("tlb1_write_entry: s\n");
2581
2582	/* Clear high order RPN bits */
2583	mas7 = 0;
2584
2585	/* Select entry */
2586	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx);
2587	//debugf("tlb1_write_entry: mas0 = 0x%08x\n", mas0);
2588
2589	mtspr(SPR_MAS0, mas0);
2590	__asm volatile("isync");
2591	mtspr(SPR_MAS1, tlb1[idx].mas1);
2592	__asm volatile("isync");
2593	mtspr(SPR_MAS2, tlb1[idx].mas2);
2594	__asm volatile("isync");
2595	mtspr(SPR_MAS3, tlb1[idx].mas3);
2596	__asm volatile("isync");
2597	mtspr(SPR_MAS7, mas7);
2598	__asm volatile("isync; tlbwe; isync; msync");
2599
2600	//debugf("tlb1_write_entry: e\n");;
2601}
2602
2603/*
2604 * Return the largest uint value log such that 2^log <= num.
2605 */
2606static unsigned int
2607ilog2(unsigned int num)
2608{
2609	int lz;
2610
2611	__asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num));
2612	return (31 - lz);
2613}
2614
2615/*
2616 * Convert TLB TSIZE value to mapped region size.
2617 */
2618static vm_size_t
2619tsize2size(unsigned int tsize)
2620{
2621
2622	/*
2623	 * size = 4^tsize KB
2624	 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10)
2625	 */
2626
2627	return ((1 << (2 * tsize)) * 1024);
2628}
2629
2630/*
2631 * Convert region size (must be power of 4) to TLB TSIZE value.
2632 */
2633static unsigned int
2634size2tsize(vm_size_t size)
2635{
2636
2637	return (ilog2(size) / 2 - 5);
2638}
2639
2640/*
2641 * Register permanent kernel mapping in TLB1.
2642 *
2643 * Entries are created starting from index 0 (current free entry is
2644 * kept in tlb1_idx) and are not supposed to be invalidated.
2645 */
2646static int
2647tlb1_set_entry(vm_offset_t va, vm_offset_t pa, vm_size_t size,
2648    uint32_t flags)
2649{
2650	uint32_t ts, tid;
2651	int tsize;
2652
2653	if (tlb1_idx >= TLB1_ENTRIES) {
2654		printf("tlb1_set_entry: TLB1 full!\n");
2655		return (-1);
2656	}
2657
2658	/* Convert size to TSIZE */
2659	tsize = size2tsize(size);
2660
2661	tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK;
2662	/* XXX TS is hard coded to 0 for now as we only use single address space */
2663	ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
2664
2665	/* XXX LOCK tlb1[] */
2666
2667	tlb1[tlb1_idx].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
2668	tlb1[tlb1_idx].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
2669	tlb1[tlb1_idx].mas2 = (va & MAS2_EPN_MASK) | flags;
2670
2671	/* Set supervisor RWX permission bits */
2672	tlb1[tlb1_idx].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
2673
2674	tlb1_write_entry(tlb1_idx++);
2675
2676	/* XXX UNLOCK tlb1[] */
2677
2678	/*
2679	 * XXX in general TLB1 updates should be propagated between CPUs,
2680	 * since current design assumes to have the same TLB1 set-up on all
2681	 * cores.
2682	 */
2683	return (0);
2684}
2685
2686static int
2687tlb1_entry_size_cmp(const void *a, const void *b)
2688{
2689	const vm_size_t *sza;
2690	const vm_size_t *szb;
2691
2692	sza = a;
2693	szb = b;
2694	if (*sza > *szb)
2695		return (-1);
2696	else if (*sza < *szb)
2697		return (1);
2698	else
2699		return (0);
2700}
2701
2702/*
2703 * Mapin contiguous RAM region into the TLB1 using maximum of
2704 * KERNEL_REGION_MAX_TLB_ENTRIES entries.
2705 *
2706 * If necessarry round up last entry size and return total size
2707 * used by all allocated entries.
2708 */
2709vm_size_t
2710tlb1_mapin_region(vm_offset_t va, vm_offset_t pa, vm_size_t size)
2711{
2712	vm_size_t entry_size[KERNEL_REGION_MAX_TLB_ENTRIES];
2713	vm_size_t mapped_size, sz, esz;
2714	unsigned int log;
2715	int i;
2716
2717	debugf("tlb1_mapin_region:\n");
2718	debugf(" region size = 0x%08x va = 0x%08x pa = 0x%08x\n", size, va, pa);
2719
2720	mapped_size = 0;
2721	sz = size;
2722	memset(entry_size, 0, sizeof(entry_size));
2723
2724	/* Calculate entry sizes. */
2725	for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES && sz > 0; i++) {
2726
2727		/* Largest region that is power of 4 and fits within size */
2728		log = ilog2(sz) / 2;
2729		esz = 1 << (2 * log);
2730
2731		/* If this is last entry cover remaining size. */
2732		if (i ==  KERNEL_REGION_MAX_TLB_ENTRIES - 1) {
2733			while (esz < sz)
2734				esz = esz << 2;
2735		}
2736
2737		entry_size[i] = esz;
2738		mapped_size += esz;
2739		if (esz < sz)
2740			sz -= esz;
2741		else
2742			sz = 0;
2743	}
2744
2745	/* Sort entry sizes, required to get proper entry address alignment. */
2746	qsort(entry_size, KERNEL_REGION_MAX_TLB_ENTRIES,
2747	    sizeof(vm_size_t), tlb1_entry_size_cmp);
2748
2749	/* Load TLB1 entries. */
2750	for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES; i++) {
2751		esz = entry_size[i];
2752		if (!esz)
2753			break;
2754		debugf("  entry %d: sz  = 0x%08x (va = 0x%08x pa = 0x%08x)\n",
2755		    tlb1_idx, esz, va, pa);
2756		tlb1_set_entry(va, pa, esz, _TLB_ENTRY_MEM);
2757
2758		va += esz;
2759		pa += esz;
2760	}
2761
2762	debugf(" mapped size 0x%08x (wasted space 0x%08x)\n",
2763	    mapped_size, mapped_size - size);
2764
2765	return (mapped_size);
2766}
2767
2768/*
2769 * TLB1 initialization routine, to be called after the very first
2770 * assembler level setup done in locore.S.
2771 */
2772void
2773tlb1_init(vm_offset_t ccsrbar)
2774{
2775	uint32_t mas0;
2776
2777	/* TBL1[1] is used to map the kernel. Save that entry. */
2778	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(1);
2779	mtspr(SPR_MAS0, mas0);
2780	__asm __volatile("isync; tlbre");
2781
2782	tlb1[1].mas1 = mfspr(SPR_MAS1);
2783	tlb1[1].mas2 = mfspr(SPR_MAS2);
2784	tlb1[1].mas3 = mfspr(SPR_MAS3);
2785
2786	/* Map in CCSRBAR in TLB1[0] */
2787	tlb1_idx = 0;
2788	tlb1_set_entry(CCSRBAR_VA, ccsrbar, CCSRBAR_SIZE, _TLB_ENTRY_IO);
2789	/*
2790	 * Set the next available TLB1 entry index. Note TLB[1] is reserved
2791	 * for initial mapping of kernel text+data, which was set early in
2792	 * locore, we need to skip this [busy] entry.
2793	 */
2794	tlb1_idx = 2;
2795
2796	/* Setup TLB miss defaults */
2797	set_mas4_defaults();
2798}
2799
2800/*
2801 * Setup MAS4 defaults.
2802 * These values are loaded to MAS0-2 on a TLB miss.
2803 */
2804static void
2805set_mas4_defaults(void)
2806{
2807	u_int32_t mas4;
2808
2809	/* Defaults: TLB0, PID0, TSIZED=4K */
2810	mas4 = MAS4_TLBSELD0;
2811	mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK;
2812
2813	mtspr(SPR_MAS4, mas4);
2814	__asm volatile("isync");
2815}
2816
2817/*
2818 * Print out contents of the MAS registers for each TLB1 entry
2819 */
2820void
2821tlb1_print_tlbentries(void)
2822{
2823	uint32_t mas0, mas1, mas2, mas3, mas7;
2824	int i;
2825
2826	debugf("TLB1 entries:\n");
2827	for (i = 0; i < TLB1_ENTRIES; i++) {
2828
2829		mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
2830		mtspr(SPR_MAS0, mas0);
2831
2832		__asm __volatile("isync; tlbre");
2833
2834		mas1 = mfspr(SPR_MAS1);
2835		mas2 = mfspr(SPR_MAS2);
2836		mas3 = mfspr(SPR_MAS3);
2837		mas7 = mfspr(SPR_MAS7);
2838
2839		tlb_print_entry(i, mas1, mas2, mas3, mas7);
2840	}
2841}
2842
2843/*
2844 * Print out contents of the in-ram tlb1 table.
2845 */
2846void
2847tlb1_print_entries(void)
2848{
2849	int i;
2850
2851	debugf("tlb1[] table entries:\n");
2852	for (i = 0; i < TLB1_ENTRIES; i++)
2853		tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3, 0);
2854}
2855
2856/*
2857 * Return 0 if the physical IO range is encompassed by one of the
2858 * the TLB1 entries, otherwise return related error code.
2859 */
2860static int
2861tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va)
2862{
2863	u_int32_t prot;
2864	vm_paddr_t pa_start;
2865	vm_paddr_t pa_end;
2866	unsigned int entry_tsize;
2867	vm_size_t entry_size;
2868
2869	*va = (vm_offset_t)NULL;
2870
2871	/* Skip invalid entries */
2872	if (!(tlb1[i].mas1 & MAS1_VALID))
2873		return (EINVAL);
2874
2875	/*
2876	 * The entry must be cache-inhibited, guarded, and r/w
2877	 * so it can function as an i/o page
2878	 */
2879	prot = tlb1[i].mas2 & (MAS2_I | MAS2_G);
2880	if (prot != (MAS2_I | MAS2_G))
2881		return (EPERM);
2882
2883	prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW);
2884	if (prot != (MAS3_SR | MAS3_SW))
2885		return (EPERM);
2886
2887	/* The address should be within the entry range. */
2888	entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
2889	KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
2890
2891	entry_size = tsize2size(entry_tsize);
2892	pa_start = tlb1[i].mas3 & MAS3_RPN;
2893	pa_end = pa_start + entry_size - 1;
2894
2895	if ((pa < pa_start) || ((pa + size) > pa_end))
2896		return (ERANGE);
2897
2898	/* Return virtual address of this mapping. */
2899	*va = (tlb1[i].mas2 & MAS2_EPN_MASK) + (pa - pa_start);
2900	return (0);
2901}
2902