pmap.c revision 106994
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *      This product includes software developed by the University of
24 *      California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
42 * $FreeBSD: head/sys/sparc64/sparc64/pmap.c 106994 2002-11-17 01:17:07Z jake $
43 */
44
45/*
46 * Manages physical address maps.
47 *
48 * In addition to hardware address maps, this module is called upon to
49 * provide software-use-only maps which may or may not be stored in the
50 * same form as hardware maps.  These pseudo-maps are used to store
51 * intermediate results from copy operations to and from address spaces.
52 *
53 * Since the information managed by this module is also stored by the
54 * logical address mapping module, this module may throw away valid virtual
55 * to physical mappings at almost any time.  However, invalidations of
56 * mappings must be done as requested.
57 *
58 * In order to cope with hardware architectures which make virtual to
59 * physical map invalidates expensive, this module may delay invalidate
60 * reduced protection operations until such time as they are actually
61 * necessary.  This module is given full information as to which processors
62 * are currently using which maps, and to when physical maps must be made
63 * correct.
64 */
65
66#include "opt_msgbuf.h"
67#include "opt_pmap.h"
68
69#include <sys/param.h>
70#include <sys/kernel.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/msgbuf.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/smp.h>
77#include <sys/sysctl.h>
78#include <sys/systm.h>
79#include <sys/vmmeter.h>
80
81#include <dev/ofw/openfirm.h>
82
83#include <vm/vm.h>
84#include <vm/vm_param.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/uma.h>
93
94#include <machine/cache.h>
95#include <machine/frame.h>
96#include <machine/instr.h>
97#include <machine/md_var.h>
98#include <machine/metadata.h>
99#include <machine/ofw_mem.h>
100#include <machine/smp.h>
101#include <machine/tlb.h>
102#include <machine/tte.h>
103#include <machine/tsb.h>
104
105#define	PMAP_DEBUG
106
107#ifndef	PMAP_SHPGPERPROC
108#define	PMAP_SHPGPERPROC	200
109#endif
110
111/*
112 * Virtual and physical address of message buffer.
113 */
114struct msgbuf *msgbufp;
115vm_offset_t msgbuf_phys;
116
117/*
118 * Physical addresses of first and last available physical page.
119 */
120vm_offset_t avail_start;
121vm_offset_t avail_end;
122
123int pmap_pagedaemon_waken;
124
125/*
126 * Map of physical memory reagions.
127 */
128vm_offset_t phys_avail[128];
129static struct ofw_mem_region mra[128];
130struct ofw_mem_region sparc64_memreg[128];
131int sparc64_nmemreg;
132static struct ofw_map translations[128];
133static int translations_size;
134
135/*
136 * First and last available kernel virtual addresses.
137 */
138vm_offset_t virtual_avail;
139vm_offset_t virtual_end;
140vm_offset_t kernel_vm_end;
141
142vm_offset_t vm_max_kernel_address;
143
144static vm_offset_t crashdumpmap;
145
146/*
147 * Kernel pmap.
148 */
149struct pmap kernel_pmap_store;
150
151/*
152 * Allocate physical memory for use in pmap_bootstrap.
153 */
154static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
155
156static vm_offset_t pmap_map_direct(vm_page_t m);
157
158extern int tl1_immu_miss_patch_1[];
159extern int tl1_immu_miss_patch_2[];
160extern int tl1_dmmu_miss_patch_1[];
161extern int tl1_dmmu_miss_patch_2[];
162extern int tl1_dmmu_prot_patch_1[];
163extern int tl1_dmmu_prot_patch_2[];
164
165/*
166 * If user pmap is processed with pmap_remove and with pmap_remove and the
167 * resident count drops to 0, there are no more pages to remove, so we
168 * need not continue.
169 */
170#define	PMAP_REMOVE_DONE(pm) \
171	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
172
173/*
174 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
175 * and pmap_protect() instead of trying each virtual address.
176 */
177#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
178
179#ifdef PMAP_STATS
180static long pmap_enter_nupdate;
181static long pmap_enter_nreplace;
182static long pmap_enter_nnew;
183static long pmap_ncache_enter;
184static long pmap_ncache_enter_c;
185static long pmap_ncache_enter_cc;
186static long pmap_ncache_enter_nc;
187static long pmap_ncache_remove;
188static long pmap_ncache_remove_c;
189static long pmap_ncache_remove_cc;
190static long pmap_ncache_remove_nc;
191static long pmap_niflush;
192
193SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "Statistics");
194SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nupdate, CTLFLAG_RD,
195    &pmap_enter_nupdate, 0, "Number of pmap_enter() updates");
196SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nreplace, CTLFLAG_RD,
197    &pmap_enter_nreplace, 0, "Number of pmap_enter() replacements");
198SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nnew, CTLFLAG_RD,
199    &pmap_enter_nnew, 0, "Number of pmap_enter() additions");
200SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter, CTLFLAG_RD,
201    &pmap_ncache_enter, 0, "Number of pmap_cache_enter() calls");
202SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_c, CTLFLAG_RD,
203    &pmap_ncache_enter_c, 0, "Number of pmap_cache_enter() cacheable");
204SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_cc, CTLFLAG_RD,
205    &pmap_ncache_enter_cc, 0, "Number of pmap_cache_enter() change color");
206SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_nc, CTLFLAG_RD,
207    &pmap_ncache_enter_nc, 0, "Number of pmap_cache_enter() noncacheable");
208SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_remove, CTLFLAG_RD,
209    &pmap_ncache_remove, 0, "Number of pmap_cache_remove() calls");
210SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_remove_c, CTLFLAG_RD,
211    &pmap_ncache_remove_c, 0, "Number of pmap_cache_remove() cacheable");
212SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_remove_cc, CTLFLAG_RD,
213    &pmap_ncache_remove_cc, 0, "Number of pmap_cache_remove() change color");
214SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_remove_nc, CTLFLAG_RD,
215    &pmap_ncache_remove_nc, 0, "Number of pmap_cache_remove() noncacheable");
216SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_niflush, CTLFLAG_RD,
217    &pmap_niflush, 0, "Number of pmap I$ flushes");
218
219#define	PMAP_STATS_INC(var)	atomic_add_long(&var, 1)
220#else
221#define	PMAP_STATS_INC(var)
222#endif
223
224/*
225 * Quick sort callout for comparing memory regions.
226 */
227static int mr_cmp(const void *a, const void *b);
228static int om_cmp(const void *a, const void *b);
229static int
230mr_cmp(const void *a, const void *b)
231{
232	const struct ofw_mem_region *mra;
233	const struct ofw_mem_region *mrb;
234
235	mra = a;
236	mrb = b;
237	if (mra->mr_start < mrb->mr_start)
238		return (-1);
239	else if (mra->mr_start > mrb->mr_start)
240		return (1);
241	else
242		return (0);
243}
244static int
245om_cmp(const void *a, const void *b)
246{
247	const struct ofw_map *oma;
248	const struct ofw_map *omb;
249
250	oma = a;
251	omb = b;
252	if (oma->om_start < omb->om_start)
253		return (-1);
254	else if (oma->om_start > omb->om_start)
255		return (1);
256	else
257		return (0);
258}
259
260/*
261 * Bootstrap the system enough to run with virtual memory.
262 */
263void
264pmap_bootstrap(vm_offset_t ekva)
265{
266	struct pmap *pm;
267	struct tte *tp;
268	vm_offset_t off;
269	vm_offset_t pa;
270	vm_offset_t va;
271	vm_size_t physsz;
272	vm_size_t virtsz;
273	ihandle_t pmem;
274	ihandle_t vmem;
275	int sz;
276	int i;
277	int j;
278
279	/*
280	 * Find out what physical memory is available from the prom and
281	 * initialize the phys_avail array.  This must be done before
282	 * pmap_bootstrap_alloc is called.
283	 */
284	if ((pmem = OF_finddevice("/memory")) == -1)
285		panic("pmap_bootstrap: finddevice /memory");
286	if ((sz = OF_getproplen(pmem, "available")) == -1)
287		panic("pmap_bootstrap: getproplen /memory/available");
288	if (sizeof(phys_avail) < sz)
289		panic("pmap_bootstrap: phys_avail too small");
290	if (sizeof(mra) < sz)
291		panic("pmap_bootstrap: mra too small");
292	bzero(mra, sz);
293	if (OF_getprop(pmem, "available", mra, sz) == -1)
294		panic("pmap_bootstrap: getprop /memory/available");
295	sz /= sizeof(*mra);
296	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
297	qsort(mra, sz, sizeof (*mra), mr_cmp);
298	physsz = 0;
299	for (i = 0, j = 0; i < sz; i++, j += 2) {
300		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
301		    mra[i].mr_size);
302		phys_avail[j] = mra[i].mr_start;
303		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
304		physsz += mra[i].mr_size;
305	}
306	physmem = btoc(physsz);
307
308	virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
309	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
310	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
311	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
312
313	/*
314	 * Get the available physical memory ranges from /memory/reg. These
315	 * are only used for kernel dumps, but it may not be wise to do prom
316	 * calls in that situation.
317	 */
318	if ((sz = OF_getproplen(pmem, "reg")) == -1)
319		panic("pmap_bootstrap: getproplen /memory/reg");
320	if (sizeof(sparc64_memreg) < sz)
321		panic("pmap_bootstrap: sparc64_memreg too small");
322	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
323		panic("pmap_bootstrap: getprop /memory/reg");
324	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
325
326	/*
327	 * Set the start and end of kva.  The kernel is loaded at the first
328	 * available 4 meg super page, so round up to the end of the page.
329	 */
330	virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
331	virtual_end = vm_max_kernel_address;
332	kernel_vm_end = vm_max_kernel_address;
333
334	/*
335	 * Allocate the kernel tsb.
336	 */
337	pa = pmap_bootstrap_alloc(tsb_kernel_size);
338	if (pa & PAGE_MASK_4M)
339		panic("pmap_bootstrap: tsb unaligned\n");
340	tsb_kernel_phys = pa;
341	tsb_kernel = (struct tte *)virtual_avail;
342	virtual_avail += tsb_kernel_size;
343
344	/*
345	 * Patch the virtual address and the tsb mask into the trap table.
346	 */
347
348#define	SETHI(rd, imm22) \
349	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) | \
350	    EIF_IMM((imm22) >> 10, 22))
351#define	OR_R_I_R(rd, imm13, rs1) \
352	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) | \
353	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
354
355#define	PATCH(addr) do { \
356	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) || \
357	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0, IF_F3_RS1(addr[1])) || \
358	    addr[2] != SETHI(IF_F2_RD(addr[2]), 0x0)) \
359		panic("pmap_boostrap: patched instructions have changed"); \
360	addr[0] |= EIF_IMM((tsb_kernel_mask) >> 10, 22); \
361	addr[1] |= EIF_IMM(tsb_kernel_mask, 10); \
362	addr[2] |= EIF_IMM(((vm_offset_t)tsb_kernel) >> 10, 22); \
363	flush(addr); \
364	flush(addr + 1); \
365	flush(addr + 2); \
366} while (0)
367
368	PATCH(tl1_immu_miss_patch_1);
369	PATCH(tl1_immu_miss_patch_2);
370	PATCH(tl1_dmmu_miss_patch_1);
371	PATCH(tl1_dmmu_miss_patch_2);
372	PATCH(tl1_dmmu_prot_patch_1);
373	PATCH(tl1_dmmu_prot_patch_2);
374
375	/*
376	 * Lock it in the tlb.
377	 */
378	pmap_map_tsb();
379	bzero(tsb_kernel, tsb_kernel_size);
380
381	/*
382	 * Enter fake 8k pages for the 4MB kernel pages, so that
383	 * pmap_kextract() will work for them.
384	 */
385	for (i = 0; i < kernel_tlb_slots; i++) {
386		pa = kernel_tlbs[i].te_pa;
387		va = kernel_tlbs[i].te_va;
388		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
389			tp = tsb_kvtotte(va + off);
390			tp->tte_vpn = TV_VPN(va + off, TS_8K);
391			tp->tte_data = TD_V | TD_8K | TD_PA(pa + off) |
392			    TD_REF | TD_SW | TD_CP | TD_CV | TD_P | TD_W;
393		}
394	}
395
396	/*
397	 * Allocate a kernel stack with guard page for thread0 and map it into
398	 * the kernel tsb.
399	 */
400	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE);
401	kstack0_phys = pa;
402	kstack0 = virtual_avail + (KSTACK_GUARD_PAGES * PAGE_SIZE);
403	virtual_avail += (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE;
404	for (i = 0; i < KSTACK_PAGES; i++) {
405		pa = kstack0_phys + i * PAGE_SIZE;
406		va = kstack0 + i * PAGE_SIZE;
407		tp = tsb_kvtotte(va);
408		tp->tte_vpn = TV_VPN(va, TS_8K);
409		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW |
410		    TD_CP | TD_CV | TD_P | TD_W;
411	}
412
413	/*
414	 * Allocate the message buffer.
415	 */
416	msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
417
418	/*
419	 * Add the prom mappings to the kernel tsb.
420	 */
421	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
422		panic("pmap_bootstrap: finddevice /virtual-memory");
423	if ((sz = OF_getproplen(vmem, "translations")) == -1)
424		panic("pmap_bootstrap: getproplen translations");
425	if (sizeof(translations) < sz)
426		panic("pmap_bootstrap: translations too small");
427	bzero(translations, sz);
428	if (OF_getprop(vmem, "translations", translations, sz) == -1)
429		panic("pmap_bootstrap: getprop /virtual-memory/translations");
430	sz /= sizeof(*translations);
431	translations_size = sz;
432	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
433	qsort(translations, sz, sizeof (*translations), om_cmp);
434	for (i = 0; i < sz; i++) {
435		CTR3(KTR_PMAP,
436		    "translation: start=%#lx size=%#lx tte=%#lx",
437		    translations[i].om_start, translations[i].om_size,
438		    translations[i].om_tte);
439		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
440		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
441			continue;
442		for (off = 0; off < translations[i].om_size;
443		    off += PAGE_SIZE) {
444			va = translations[i].om_start + off;
445			tp = tsb_kvtotte(va);
446			tp->tte_vpn = TV_VPN(va, TS_8K);
447			tp->tte_data =
448			    ((translations[i].om_tte &
449			      ~(TD_SOFT_MASK << TD_SOFT_SHIFT)) | TD_EXEC) +
450			    off;
451		}
452	}
453
454	/*
455	 * Calculate the first and last available physical addresses.
456	 */
457	avail_start = phys_avail[0];
458	for (i = 0; phys_avail[i + 2] != 0; i += 2)
459		;
460	avail_end = phys_avail[i + 1];
461	Maxmem = sparc64_btop(avail_end);
462
463	/*
464	 * Allocate virtual address space for the message buffer.
465	 */
466	msgbufp = (struct msgbuf *)virtual_avail;
467	virtual_avail += round_page(MSGBUF_SIZE);
468
469	/*
470	 * Allocate virtual address space to map pages during a kernel dump.
471	 */
472	crashdumpmap = virtual_avail;
473	virtual_avail += MAXDUMPPGS * PAGE_SIZE;
474
475	/*
476	 * Initialize the kernel pmap (which is statically allocated).
477	 */
478	pm = kernel_pmap;
479	for (i = 0; i < MAXCPU; i++)
480		pm->pm_context[i] = TLB_CTX_KERNEL;
481	pm->pm_active = ~0;
482
483	/* XXX flush all non-locked tlb entries */
484}
485
486void
487pmap_map_tsb(void)
488{
489	vm_offset_t va;
490	vm_offset_t pa;
491	u_long data;
492	u_long s;
493	int i;
494
495	s = intr_disable();
496
497	/*
498	 * Map the 4mb tsb pages.
499	 */
500	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
501		va = (vm_offset_t)tsb_kernel + i;
502		pa = tsb_kernel_phys + i;
503		/* XXX - cheetah */
504		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
505		    TD_P | TD_W;
506		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
507		    TLB_TAR_CTX(TLB_CTX_KERNEL));
508		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
509	}
510
511	/*
512	 * Set the secondary context to be the kernel context (needed for
513	 * fp block operations in the kernel and the cache code).
514	 */
515	stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL);
516	membar(Sync);
517
518	intr_restore(s);
519}
520
521/*
522 * Allocate a physical page of memory directly from the phys_avail map.
523 * Can only be called from pmap_bootstrap before avail start and end are
524 * calculated.
525 */
526static vm_offset_t
527pmap_bootstrap_alloc(vm_size_t size)
528{
529	vm_offset_t pa;
530	int i;
531
532	size = round_page(size);
533	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
534		if (phys_avail[i + 1] - phys_avail[i] < size)
535			continue;
536		pa = phys_avail[i];
537		phys_avail[i] += size;
538		return (pa);
539	}
540	panic("pmap_bootstrap_alloc");
541}
542
543void
544pmap_context_rollover(void)
545{
546	u_long data;
547	u_long tag;
548	int i;
549
550	mtx_assert(&sched_lock, MA_OWNED);
551	CTR0(KTR_PMAP, "pmap_context_rollover");
552	for (i = 0; i < tlb_dtlb_entries; i++) {
553		/* XXX - cheetah */
554		data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG);
555		tag = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_TAG_READ_REG);
556		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
557		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
558			stxa_sync(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0);
559		data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG);
560		tag = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_TAG_READ_REG);
561		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
562		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
563			stxa_sync(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0);
564	}
565	PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min));
566}
567
568static __inline u_int
569pmap_context_alloc(void)
570{
571	u_int context;
572
573	mtx_assert(&sched_lock, MA_OWNED);
574	context = PCPU_GET(tlb_ctx);
575	if (context + 1 == PCPU_GET(tlb_ctx_max))
576		pmap_context_rollover();
577	else
578		PCPU_SET(tlb_ctx, context + 1);
579	return (context);
580}
581
582/*
583 * Initialize the pmap module.
584 */
585void
586pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
587{
588	vm_offset_t addr;
589	vm_size_t size;
590	int result;
591	int i;
592
593	for (i = 0; i < vm_page_array_size; i++) {
594		vm_page_t m;
595
596		m = &vm_page_array[i];
597		STAILQ_INIT(&m->md.tte_list);
598		m->md.flags = 0;
599		m->md.color = 0;
600	}
601
602	for (i = 0; i < translations_size; i++) {
603		addr = translations[i].om_start;
604		size = translations[i].om_size;
605		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
606			continue;
607		result = vm_map_find(kernel_map, NULL, 0, &addr, size, TRUE,
608		    VM_PROT_ALL, VM_PROT_ALL, 0);
609		if (result != KERN_SUCCESS || addr != translations[i].om_start)
610			panic("pmap_init: vm_map_find");
611	}
612}
613
614/*
615 * Initialize the address space (zone) for the pv_entries.  Set a
616 * high water mark so that the system can recover from excessive
617 * numbers of pv entries.
618 */
619void
620pmap_init2(void)
621{
622}
623
624/*
625 * Extract the physical page address associated with the given
626 * map/virtual_address pair.
627 */
628vm_offset_t
629pmap_extract(pmap_t pm, vm_offset_t va)
630{
631	struct tte *tp;
632
633	if (pm == kernel_pmap)
634		return (pmap_kextract(va));
635	tp = tsb_tte_lookup(pm, va);
636	if (tp == NULL)
637		return (0);
638	else
639		return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
640}
641
642/*
643 * Extract the physical page address associated with the given kernel virtual
644 * address.
645 */
646vm_offset_t
647pmap_kextract(vm_offset_t va)
648{
649	struct tte *tp;
650
651	tp = tsb_kvtotte(va);
652	if ((tp->tte_data & TD_V) == 0)
653		return (0);
654	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
655}
656
657int
658pmap_cache_enter(vm_page_t m, vm_offset_t va)
659{
660	struct tte *tp;
661	int color;
662
663	PMAP_STATS_INC(pmap_ncache_enter);
664
665	/*
666	 * Find the color for this virtual address and note the added mapping.
667	 */
668	color = DCACHE_COLOR(va);
669	m->md.colors[color]++;
670
671	/*
672	 * If all existing mappings have the same color, the mapping is
673	 * cacheable.
674	 */
675	if (m->md.color == color) {
676		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
677		    ("pmap_cache_enter: cacheable, mappings of other color"));
678		PMAP_STATS_INC(pmap_ncache_enter_c);
679		return (1);
680	}
681
682	/*
683	 * If there are no mappings of the other color, and the page still has
684	 * the wrong color, this must be a new mapping.  Change the color to
685	 * match the new mapping, which is cacheable.  We must flush the page
686	 * from the cache now.
687	 */
688	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
689		KASSERT(m->md.colors[color] == 1,
690		    ("pmap_cache_enter: changing color, not new mapping"));
691		dcache_page_inval(VM_PAGE_TO_PHYS(m));
692		m->md.color = color;
693		PMAP_STATS_INC(pmap_ncache_enter_cc);
694		return (1);
695	}
696
697	PMAP_STATS_INC(pmap_ncache_enter_nc);
698
699	/*
700	 * If the mapping is already non-cacheable, just return.
701	 */
702	if (m->md.color == -1)
703		return (0);
704
705	/*
706	 * Mark all mappings as uncacheable, flush any lines with the other
707	 * color out of the dcache, and set the color to none (-1).
708	 */
709	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
710		tp->tte_data &= ~TD_CV;
711		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
712	}
713	dcache_page_inval(VM_PAGE_TO_PHYS(m));
714	m->md.color = -1;
715	return (0);
716}
717
718void
719pmap_cache_remove(vm_page_t m, vm_offset_t va)
720{
721	struct tte *tp;
722	int color;
723
724	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
725	    m->md.colors[DCACHE_COLOR(va)]);
726	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
727	    ("pmap_cache_remove: no mappings %d <= 0",
728	    m->md.colors[DCACHE_COLOR(va)]));
729	PMAP_STATS_INC(pmap_ncache_remove);
730
731	/*
732	 * Find the color for this virtual address and note the removal of
733	 * the mapping.
734	 */
735	color = DCACHE_COLOR(va);
736	m->md.colors[color]--;
737
738	/*
739	 * If the page is cacheable, just return and keep the same color, even
740	 * if there are no longer any mappings.
741	 */
742	if (m->md.color != -1) {
743		PMAP_STATS_INC(pmap_ncache_remove_c);
744		return;
745	}
746
747	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
748	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
749
750	/*
751	 * If the page is not cacheable (color is -1), and the number of
752	 * mappings for this color is not zero, just return.  There are
753	 * mappings of the other color still, so remain non-cacheable.
754	 */
755	if (m->md.colors[color] != 0) {
756		PMAP_STATS_INC(pmap_ncache_remove_nc);
757		return;
758	}
759
760	PMAP_STATS_INC(pmap_ncache_remove_cc);
761
762	/*
763	 * The number of mappings for this color is now zero.  Recache the
764	 * other colored mappings, and change the page color to the other
765	 * color.  There should be no lines in the data cache for this page,
766	 * so flushing should not be needed.
767	 */
768	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
769		tp->tte_data |= TD_CV;
770		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
771	}
772	m->md.color = DCACHE_OTHER_COLOR(color);
773}
774
775/*
776 * Map a wired page into kernel virtual address space.
777 */
778void
779pmap_kenter(vm_offset_t va, vm_offset_t pa)
780{
781	vm_offset_t ova;
782	struct tte *tp;
783	vm_page_t om;
784	vm_page_t m;
785	u_long data;
786
787	tp = tsb_kvtotte(va);
788	m = PHYS_TO_VM_PAGE(pa);
789	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
790	    va, pa, tp, tp->tte_data);
791	if ((tp->tte_data & TD_V) != 0) {
792		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
793		ova = TTE_GET_VA(tp);
794		STAILQ_REMOVE(&om->md.tte_list, tp, tte, tte_link);
795		pmap_cache_remove(om, ova);
796		if (va != ova)
797			tlb_page_demap(kernel_pmap, ova);
798	}
799	data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP | TD_P | TD_W;
800	if (pmap_cache_enter(m, va) != 0)
801		data |= TD_CV;
802	tp->tte_vpn = TV_VPN(va, TS_8K);
803	tp->tte_data = data;
804	STAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
805	tp->tte_pmap = kernel_pmap;
806}
807
808/*
809 * Map a wired page into kernel virtual address space. This additionally
810 * takes a flag argument wich is or'ed to the TTE data. This is used by
811 * bus_space_map().
812 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
813 * to flush entries that might still be in the cache, if applicable.
814 */
815void
816pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags)
817{
818	struct tte *tp;
819
820	tp = tsb_kvtotte(va);
821	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
822	    va, pa, tp, tp->tte_data);
823	tp->tte_vpn = TV_VPN(va, TS_8K);
824	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
825}
826
827/*
828 * Make a temporary mapping for a physical address.  This is only intended
829 * to be used for panic dumps. Caching issues can be ignored completely here,
830 * because pages mapped this way are only read.
831 */
832void *
833pmap_kenter_temporary(vm_offset_t pa, int i)
834{
835	struct tte *tp;
836	vm_offset_t va;
837
838	va = crashdumpmap + i * PAGE_SIZE;
839	tlb_page_demap(kernel_pmap, va);
840	tp = tsb_kvtotte(va);
841	tp->tte_vpn = TV_VPN(va, TS_8K);
842	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_CP | TD_CV | TD_P;
843	return ((void *)crashdumpmap);
844}
845
846/*
847 * Remove a wired page from kernel virtual address space.
848 */
849void
850pmap_kremove(vm_offset_t va)
851{
852	struct tte *tp;
853	vm_page_t m;
854
855	tp = tsb_kvtotte(va);
856	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
857	    tp->tte_data);
858	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
859	STAILQ_REMOVE(&m->md.tte_list, tp, tte, tte_link);
860	pmap_cache_remove(m, va);
861	TTE_ZERO(tp);
862}
863
864/*
865 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
866 */
867void
868pmap_kremove_flags(vm_offset_t va)
869{
870	struct tte *tp;
871
872	tp = tsb_kvtotte(va);
873	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
874	    tp->tte_data);
875	TTE_ZERO(tp);
876}
877
878/*
879 * Map a range of physical addresses into kernel virtual address space.
880 *
881 * The value passed in *virt is a suggested virtual address for the mapping.
882 * Architectures which can support a direct-mapped physical to virtual region
883 * can return the appropriate address within that region, leaving '*virt'
884 * unchanged.  We cannot and therefore do not; *virt is updated with the
885 * first usable address after the mapped region.
886 */
887vm_offset_t
888pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot)
889{
890	struct tte *tp;
891	vm_offset_t sva;
892	vm_offset_t va;
893	vm_offset_t pa;
894
895	pa = pa_start;
896	sva = *virt;
897	va = sva;
898	for (; pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
899		tp = tsb_kvtotte(va);
900		tp->tte_vpn = TV_VPN(va, TS_8K);
901		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW |
902		    TD_CP | TD_CV | TD_P | TD_W;
903	}
904	tlb_range_demap(kernel_pmap, sva, sva + (pa_end - pa_start) - 1);
905	*virt = va;
906	return (sva);
907}
908
909static vm_offset_t
910pmap_map_direct(vm_page_t m)
911{
912	vm_offset_t pa;
913	vm_offset_t va;
914
915	pa = VM_PAGE_TO_PHYS(m);
916	if (m->md.color == -1) {
917		KASSERT(m->md.colors[0] != 0 && m->md.colors[1] != 0,
918		    ("pmap_map_direct: non-cacheable, only 1 color"));
919		va = TLB_DIRECT_MASK | pa | TLB_DIRECT_UNCACHEABLE;
920	} else {
921		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(m->md.color)] == 0,
922		    ("pmap_map_direct: cacheable, mappings of other color"));
923		va = TLB_DIRECT_MASK | pa |
924		    (m->md.color << TLB_DIRECT_COLOR_SHIFT);
925	}
926	return (va << TLB_DIRECT_SHIFT);
927}
928
929/*
930 * Map a list of wired pages into kernel virtual address space.  This is
931 * intended for temporary mappings which do not need page modification or
932 * references recorded.  Existing mappings in the region are overwritten.
933 */
934void
935pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
936{
937	vm_offset_t va;
938	int i;
939
940	va = sva;
941	for (i = 0; i < count; i++, va += PAGE_SIZE)
942		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
943	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
944}
945
946/*
947 * As above, but take an additional flags argument and call
948 * pmap_kenter_flags().
949 */
950void
951pmap_qenter_flags(vm_offset_t sva, vm_page_t *m, int count, u_long fl)
952{
953	vm_offset_t va;
954	int i;
955
956	va = sva;
957	for (i = 0; i < count; i++, va += PAGE_SIZE)
958		pmap_kenter_flags(va, VM_PAGE_TO_PHYS(m[i]), fl);
959	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
960}
961
962/*
963 * Remove page mappings from kernel virtual address space.  Intended for
964 * temporary mappings entered by pmap_qenter.
965 */
966void
967pmap_qremove(vm_offset_t sva, int count)
968{
969	vm_offset_t va;
970	int i;
971
972	va = sva;
973	for (i = 0; i < count; i++, va += PAGE_SIZE)
974		pmap_kremove(va);
975	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
976}
977
978#ifndef KSTACK_MAX_PAGES
979#define KSTACK_MAX_PAGES 32
980#endif
981
982/*
983 * Create the kernel stack and pcb for a new thread.
984 * This routine directly affects the fork perf for a process and
985 * create performance for a thread.
986 */
987void
988pmap_new_thread(struct thread *td, int pages)
989{
990	vm_page_t ma[KSTACK_MAX_PAGES];
991	vm_object_t ksobj;
992	vm_offset_t ks;
993	vm_page_t m;
994	u_int i;
995
996	/* Bounds check */
997	if (pages <= 1)
998		pages = KSTACK_PAGES;
999	else if (pages > KSTACK_MAX_PAGES)
1000		pages = KSTACK_MAX_PAGES;
1001
1002	/*
1003	 * Allocate object for the kstack,
1004	 */
1005	ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
1006	td->td_kstack_obj = ksobj;
1007
1008	/*
1009	 * Get a kernel virtual address for the kstack for this thread.
1010	 */
1011	ks = kmem_alloc_nofault(kernel_map,
1012	   (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
1013	if (ks == 0)
1014		panic("pmap_new_thread: kstack allocation failed");
1015	if (KSTACK_GUARD_PAGES != 0) {
1016		tlb_page_demap(kernel_pmap, ks);
1017		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
1018	}
1019	td->td_kstack = ks;
1020
1021	/*
1022	 * Knowing the number of pages allocated is useful when you
1023	 * want to deallocate them.
1024	 */
1025	td->td_kstack_pages = pages;
1026
1027	for (i = 0; i < pages; i++) {
1028		/*
1029		 * Get a kernel stack page.
1030		 */
1031		m = vm_page_grab(ksobj, i,
1032		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
1033		ma[i] = m;
1034
1035		vm_page_wakeup(m);
1036		vm_page_flag_clear(m, PG_ZERO);
1037		m->valid = VM_PAGE_BITS_ALL;
1038	}
1039
1040	/*
1041	 * Enter the page into the kernel address space.
1042	 */
1043	pmap_qenter(ks, ma, pages);
1044}
1045
1046/*
1047 * Dispose the kernel stack for a thread that has exited.
1048 * This routine directly impacts the exit perf of a process and thread.
1049 */
1050void
1051pmap_dispose_thread(struct thread *td)
1052{
1053	vm_object_t ksobj;
1054	vm_offset_t ks;
1055	vm_page_t m;
1056	int i;
1057	int pages;
1058
1059	pages = td->td_kstack_pages;
1060	ksobj = td->td_kstack_obj;
1061	ks = td->td_kstack;
1062	for (i = 0; i < pages ; i++) {
1063		m = vm_page_lookup(ksobj, i);
1064		if (m == NULL)
1065			panic("pmap_dispose_thread: kstack already missing?");
1066		vm_page_lock_queues();
1067		vm_page_busy(m);
1068		vm_page_unwire(m, 0);
1069		vm_page_free(m);
1070		vm_page_unlock_queues();
1071	}
1072	pmap_qremove(ks, pages);
1073	kmem_free(kernel_map, ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
1074	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
1075	vm_object_deallocate(ksobj);
1076}
1077
1078/*
1079 * Set up a variable sized alternate kstack.
1080 */
1081void
1082pmap_new_altkstack(struct thread *td, int pages)
1083{
1084	/* shuffle the original stack */
1085	td->td_altkstack_obj = td->td_kstack_obj;
1086	td->td_altkstack = td->td_kstack;
1087	td->td_altkstack_pages = td->td_kstack_pages;
1088
1089	pmap_new_thread(td, pages);
1090}
1091
1092void
1093pmap_dispose_altkstack(struct thread *td)
1094{
1095	pmap_dispose_thread(td);
1096
1097	/* restore the original kstack */
1098	td->td_kstack = td->td_altkstack;
1099	td->td_kstack_obj = td->td_altkstack_obj;
1100	td->td_kstack_pages = td->td_altkstack_pages;
1101	td->td_altkstack = 0;
1102	td->td_altkstack_obj = NULL;
1103	td->td_altkstack_pages = 0;
1104}
1105
1106/*
1107 * Allow the kernel stack for a thread to be prejudicially paged out.
1108 */
1109void
1110pmap_swapout_thread(struct thread *td)
1111{
1112	vm_object_t ksobj;
1113	vm_offset_t ks;
1114	vm_page_t m;
1115	int i;
1116	int pages;
1117
1118	pages = td->td_kstack_pages;
1119	ksobj = td->td_kstack_obj;
1120	ks = (vm_offset_t)td->td_kstack;
1121	for (i = 0; i < pages; i++) {
1122		m = vm_page_lookup(ksobj, i);
1123		if (m == NULL)
1124			panic("pmap_swapout_thread: kstack already missing?");
1125		vm_page_lock_queues();
1126		vm_page_dirty(m);
1127		vm_page_unwire(m, 0);
1128		vm_page_unlock_queues();
1129	}
1130	pmap_qremove(ks, pages);
1131}
1132
1133/*
1134 * Bring the kernel stack for a specified thread back in.
1135 */
1136void
1137pmap_swapin_thread(struct thread *td)
1138{
1139	vm_page_t ma[KSTACK_MAX_PAGES];
1140	vm_object_t ksobj;
1141	vm_offset_t ks;
1142	vm_page_t m;
1143	int rv;
1144	int i;
1145	int pages;
1146
1147	pages = td->td_kstack_pages;
1148	ksobj = td->td_kstack_obj;
1149	ks = td->td_kstack;
1150	for (i = 0; i < pages; i++) {
1151		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1152		if (m->valid != VM_PAGE_BITS_ALL) {
1153			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
1154			if (rv != VM_PAGER_OK)
1155				panic("pmap_swapin_thread: cannot get kstack");
1156			m = vm_page_lookup(ksobj, i);
1157			m->valid = VM_PAGE_BITS_ALL;
1158		}
1159		ma[i] = m;
1160		vm_page_lock_queues();
1161		vm_page_wire(m);
1162		vm_page_wakeup(m);
1163		vm_page_unlock_queues();
1164	}
1165	pmap_qenter(ks, ma, pages);
1166}
1167
1168/*
1169 * Initialize the pmap associated with process 0.
1170 */
1171void
1172pmap_pinit0(pmap_t pm)
1173{
1174	int i;
1175
1176	for (i = 0; i < MAXCPU; i++)
1177		pm->pm_context[i] = 0;
1178	pm->pm_active = 0;
1179	pm->pm_tsb = NULL;
1180	pm->pm_tsb_obj = NULL;
1181	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1182}
1183
1184/*
1185 * Initialize a preallocated and zeroed pmap structure, uch as one in a
1186 * vmspace structure.
1187 */
1188void
1189pmap_pinit(pmap_t pm)
1190{
1191	vm_page_t ma[TSB_PAGES];
1192	vm_page_t m;
1193	int i;
1194
1195	/*
1196	 * Allocate kva space for the tsb.
1197	 */
1198	if (pm->pm_tsb == NULL) {
1199		pm->pm_tsb = (struct tte *)kmem_alloc_pageable(kernel_map,
1200		    TSB_BSIZE);
1201	}
1202
1203	/*
1204	 * Allocate an object for it.
1205	 */
1206	if (pm->pm_tsb_obj == NULL)
1207		pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1208
1209	for (i = 0; i < TSB_PAGES; i++) {
1210		m = vm_page_grab(pm->pm_tsb_obj, i,
1211		    VM_ALLOC_RETRY | VM_ALLOC_ZERO);
1212		if ((m->flags & PG_ZERO) == 0)
1213			pmap_zero_page(m);
1214
1215		m->wire_count++;
1216		cnt.v_wire_count++;
1217
1218		vm_page_flag_clear(m, PG_BUSY);
1219		m->valid = VM_PAGE_BITS_ALL;
1220
1221		ma[i] = m;
1222	}
1223	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1224
1225	for (i = 0; i < MAXCPU; i++)
1226		pm->pm_context[i] = -1;
1227	pm->pm_active = 0;
1228	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1229}
1230
1231void
1232pmap_pinit2(pmap_t pmap)
1233{
1234	/* XXX: Remove this stub when no longer called */
1235}
1236
1237/*
1238 * Release any resources held by the given physical map.
1239 * Called when a pmap initialized by pmap_pinit is being released.
1240 * Should only be called if the map contains no valid mappings.
1241 */
1242void
1243pmap_release(pmap_t pm)
1244{
1245	vm_object_t obj;
1246	vm_page_t m;
1247
1248	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1249	    pm->pm_context[PCPU_GET(cpuid)], pm->pm_tsb);
1250	obj = pm->pm_tsb_obj;
1251	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1252	KASSERT(pmap_resident_count(pm) == 0,
1253	    ("pmap_release: resident pages %ld != 0",
1254	    pmap_resident_count(pm)));
1255	while (!TAILQ_EMPTY(&obj->memq)) {
1256		m = TAILQ_FIRST(&obj->memq);
1257		vm_page_lock_queues();
1258		if (vm_page_sleep_if_busy(m, FALSE, "pmaprl"))
1259			continue;
1260		vm_page_busy(m);
1261		KASSERT(m->hold_count == 0,
1262		    ("pmap_release: freeing held tsb page"));
1263		m->wire_count--;
1264		cnt.v_wire_count--;
1265		vm_page_free_zero(m);
1266		vm_page_unlock_queues();
1267	}
1268	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1269}
1270
1271/*
1272 * Grow the number of kernel page table entries.  Unneeded.
1273 */
1274void
1275pmap_growkernel(vm_offset_t addr)
1276{
1277
1278	panic("pmap_growkernel: can't grow kernel");
1279}
1280
1281int
1282pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1283		vm_offset_t va)
1284{
1285	vm_page_t m;
1286
1287	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1288	STAILQ_REMOVE(&m->md.tte_list, tp, tte, tte_link);
1289	if ((tp->tte_data & TD_WIRED) != 0)
1290		pm->pm_stats.wired_count--;
1291	if ((tp->tte_data & TD_PV) != 0) {
1292		if ((tp->tte_data & TD_W) != 0 &&
1293		    pmap_track_modified(pm, va))
1294			vm_page_dirty(m);
1295		if ((tp->tte_data & TD_REF) != 0)
1296			vm_page_flag_set(m, PG_REFERENCED);
1297		if (STAILQ_EMPTY(&m->md.tte_list))
1298			vm_page_flag_clear(m, PG_WRITEABLE);
1299		pm->pm_stats.resident_count--;
1300	}
1301	pmap_cache_remove(m, va);
1302	TTE_ZERO(tp);
1303	if (PMAP_REMOVE_DONE(pm))
1304		return (0);
1305	return (1);
1306}
1307
1308/*
1309 * Remove the given range of addresses from the specified map.
1310 */
1311void
1312pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1313{
1314	struct tte *tp;
1315	vm_offset_t va;
1316
1317	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1318	    pm->pm_context[PCPU_GET(cpuid)], start, end);
1319	if (PMAP_REMOVE_DONE(pm))
1320		return;
1321	if (end - start > PMAP_TSB_THRESH) {
1322		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1323		tlb_context_demap(pm);
1324	} else {
1325		for (va = start; va < end; va += PAGE_SIZE) {
1326			if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1327				if (!pmap_remove_tte(pm, NULL, tp, va))
1328					break;
1329			}
1330		}
1331		tlb_range_demap(pm, start, end - 1);
1332	}
1333}
1334
1335void
1336pmap_remove_all(vm_page_t m)
1337{
1338	struct pmap *pm;
1339	struct tte *tpn;
1340	struct tte *tp;
1341	vm_offset_t va;
1342
1343	KASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1344	   ("pv_remove_all: illegal for unmanaged page %#lx",
1345	   VM_PAGE_TO_PHYS(m)));
1346	for (tp = STAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1347		tpn = STAILQ_NEXT(tp, tte_link);
1348		if ((tp->tte_data & TD_PV) == 0)
1349			continue;
1350		pm = TTE_GET_PMAP(tp);
1351		va = TTE_GET_VA(tp);
1352		if ((tp->tte_data & TD_WIRED) != 0)
1353			pm->pm_stats.wired_count--;
1354		if ((tp->tte_data & TD_REF) != 0)
1355			vm_page_flag_set(m, PG_REFERENCED);
1356		if ((tp->tte_data & TD_W) != 0 &&
1357		    pmap_track_modified(pm, va))
1358			vm_page_dirty(m);
1359		tp->tte_data &= ~TD_V;
1360		tlb_page_demap(pm, va);
1361		STAILQ_REMOVE(&m->md.tte_list, tp, tte, tte_link);
1362		pm->pm_stats.resident_count--;
1363		pmap_cache_remove(m, va);
1364		TTE_ZERO(tp);
1365	}
1366	vm_page_flag_clear(m, PG_WRITEABLE);
1367}
1368
1369int
1370pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1371		 vm_offset_t va)
1372{
1373	vm_page_t m;
1374
1375	if ((tp->tte_data & TD_PV) != 0) {
1376		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1377		if ((tp->tte_data & TD_REF) != 0) {
1378			vm_page_flag_set(m, PG_REFERENCED);
1379			tp->tte_data &= ~TD_REF;
1380		}
1381		if ((tp->tte_data & TD_W) != 0 &&
1382		    pmap_track_modified(pm, va)) {
1383			vm_page_dirty(m);
1384		}
1385	}
1386	tp->tte_data &= ~(TD_W | TD_SW);
1387	return (0);
1388}
1389
1390/*
1391 * Set the physical protection on the specified range of this map as requested.
1392 */
1393void
1394pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1395{
1396	vm_offset_t va;
1397	struct tte *tp;
1398
1399	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1400	    pm->pm_context[PCPU_GET(cpuid)], sva, eva, prot);
1401
1402	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1403		pmap_remove(pm, sva, eva);
1404		return;
1405	}
1406
1407	if (prot & VM_PROT_WRITE)
1408		return;
1409
1410	if (eva - sva > PMAP_TSB_THRESH) {
1411		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1412		tlb_context_demap(pm);
1413	} else {
1414		for (va = sva; va < eva; va += PAGE_SIZE) {
1415			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1416				pmap_protect_tte(pm, NULL, tp, va);
1417		}
1418		tlb_range_demap(pm, sva, eva - 1);
1419	}
1420}
1421
1422/*
1423 * Map the given physical page at the specified virtual address in the
1424 * target pmap with the protection requested.  If specified the page
1425 * will be wired down.
1426 */
1427void
1428pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1429	   boolean_t wired)
1430{
1431	struct tte *tp;
1432	vm_offset_t pa;
1433	u_long data;
1434
1435	pa = VM_PAGE_TO_PHYS(m);
1436	CTR6(KTR_PMAP,
1437	    "pmap_enter: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1438	    pm->pm_context[PCPU_GET(cpuid)], m, va, pa, prot, wired);
1439
1440	/*
1441	 * If there is an existing mapping, and the physical address has not
1442	 * changed, must be protection or wiring change.
1443	 */
1444	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1445		CTR0(KTR_PMAP, "pmap_enter: update");
1446		PMAP_STATS_INC(pmap_enter_nupdate);
1447
1448		/*
1449		 * Wiring change, just update stats.
1450		 */
1451		if (wired) {
1452			if ((tp->tte_data & TD_WIRED) == 0) {
1453				tp->tte_data |= TD_WIRED;
1454				pm->pm_stats.wired_count++;
1455			}
1456		} else {
1457			if ((tp->tte_data & TD_WIRED) != 0) {
1458				tp->tte_data &= ~TD_WIRED;
1459				pm->pm_stats.wired_count--;
1460			}
1461		}
1462
1463		/*
1464		 * Save the old bits and clear the ones we're interested in.
1465		 */
1466		data = tp->tte_data;
1467		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1468
1469		/*
1470		 * If we're turning off write permissions, sense modify status.
1471		 */
1472		if ((prot & VM_PROT_WRITE) != 0) {
1473			tp->tte_data |= TD_SW;
1474			if (wired) {
1475				tp->tte_data |= TD_W;
1476			}
1477		} else if ((data & TD_W) != 0 &&
1478		    pmap_track_modified(pm, va)) {
1479			vm_page_dirty(m);
1480		}
1481
1482		/*
1483		 * If we're turning on execute permissions, flush the icache.
1484		 */
1485		if ((prot & VM_PROT_EXECUTE) != 0) {
1486			if ((data & TD_EXEC) == 0) {
1487				PMAP_STATS_INC(pmap_niflush);
1488				icache_page_inval(pa);
1489			}
1490			tp->tte_data |= TD_EXEC;
1491		}
1492
1493		/*
1494		 * Delete the old mapping.
1495		 */
1496		tlb_page_demap(pm, TTE_GET_VA(tp));
1497	} else {
1498		/*
1499		 * If there is an existing mapping, but its for a different
1500		 * phsyical address, delete the old mapping.
1501		 */
1502		if (tp != NULL) {
1503			CTR0(KTR_PMAP, "pmap_enter: replace");
1504			PMAP_STATS_INC(pmap_enter_nreplace);
1505			pmap_remove_tte(pm, NULL, tp, va);
1506			tlb_page_demap(pm, va);
1507		} else {
1508			CTR0(KTR_PMAP, "pmap_enter: new");
1509			PMAP_STATS_INC(pmap_enter_nnew);
1510		}
1511
1512		/*
1513		 * Now set up the data and install the new mapping.
1514		 */
1515		data = TD_V | TD_8K | TD_PA(pa) | TD_CP;
1516		if (pm == kernel_pmap)
1517			data |= TD_P;
1518		if (prot & VM_PROT_WRITE)
1519			data |= TD_SW;
1520		if (prot & VM_PROT_EXECUTE) {
1521			data |= TD_EXEC;
1522			PMAP_STATS_INC(pmap_niflush);
1523			icache_page_inval(pa);
1524		}
1525
1526		/*
1527		 * If its wired update stats.  We also don't need reference or
1528		 * modify tracking for wired mappings, so set the bits now.
1529		 */
1530		if (wired) {
1531			pm->pm_stats.wired_count++;
1532			data |= TD_REF | TD_WIRED;
1533			if ((prot & VM_PROT_WRITE) != 0)
1534				data |= TD_W;
1535		}
1536
1537		tsb_tte_enter(pm, m, va, TS_8K, data);
1538	}
1539}
1540
1541void
1542pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1543		    vm_pindex_t pindex, vm_size_t size, int limit)
1544{
1545	/* XXX */
1546}
1547
1548void
1549pmap_prefault(pmap_t pm, vm_offset_t va, vm_map_entry_t entry)
1550{
1551	/* XXX */
1552}
1553
1554/*
1555 * Change the wiring attribute for a map/virtual-address pair.
1556 * The mapping must already exist in the pmap.
1557 */
1558void
1559pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1560{
1561	struct tte *tp;
1562
1563	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1564		if (wired) {
1565			if ((tp->tte_data & TD_WIRED) == 0)
1566				pm->pm_stats.wired_count++;
1567			tp->tte_data |= TD_WIRED;
1568		} else {
1569			if ((tp->tte_data & TD_WIRED) != 0)
1570				pm->pm_stats.wired_count--;
1571			tp->tte_data &= ~TD_WIRED;
1572		}
1573	}
1574}
1575
1576static int
1577pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp, vm_offset_t va)
1578{
1579	vm_page_t m;
1580	u_long data;
1581
1582	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1583		data = tp->tte_data &
1584		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1585		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1586		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1587	}
1588	return (1);
1589}
1590
1591void
1592pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1593	  vm_size_t len, vm_offset_t src_addr)
1594{
1595	struct tte *tp;
1596	vm_offset_t va;
1597
1598	if (dst_addr != src_addr)
1599		return;
1600	if (len > PMAP_TSB_THRESH) {
1601		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1602		    pmap_copy_tte);
1603		tlb_context_demap(dst_pmap);
1604	} else {
1605		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE) {
1606			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1607				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1608		}
1609		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1610	}
1611}
1612
1613/*
1614 * Zero a page of physical memory by temporarily mapping it into the tlb.
1615 */
1616void
1617pmap_zero_page(vm_page_t m)
1618{
1619	vm_offset_t va;
1620
1621	va = pmap_map_direct(m);
1622	CTR2(KTR_PMAP, "pmap_zero_page: pa=%#lx va=%#lx",
1623	    VM_PAGE_TO_PHYS(m), va);
1624	bzero((void *)va, PAGE_SIZE);
1625}
1626
1627void
1628pmap_zero_page_area(vm_page_t m, int off, int size)
1629{
1630	vm_offset_t va;
1631
1632	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1633	va = pmap_map_direct(m);
1634	CTR4(KTR_PMAP, "pmap_zero_page_area: pa=%#lx va=%#lx off=%#x size=%#x",
1635	    VM_PAGE_TO_PHYS(m), va, off, size);
1636	bzero((void *)(va + off), size);
1637}
1638
1639void
1640pmap_zero_page_idle(vm_page_t m)
1641{
1642	vm_offset_t va;
1643
1644	va = pmap_map_direct(m);
1645	CTR2(KTR_PMAP, "pmap_zero_page_idle: pa=%#lx va=%#lx",
1646	    VM_PAGE_TO_PHYS(m), va);
1647	bzero((void *)va, PAGE_SIZE);
1648}
1649
1650/*
1651 * Copy a page of physical memory by temporarily mapping it into the tlb.
1652 */
1653void
1654pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1655{
1656	vm_offset_t dst;
1657	vm_offset_t src;
1658
1659	src = pmap_map_direct(msrc);
1660	dst = pmap_map_direct(mdst);
1661	CTR4(KTR_PMAP, "pmap_zero_page: src=%#lx va=%#lx dst=%#lx va=%#lx",
1662	    VM_PAGE_TO_PHYS(msrc), src, VM_PAGE_TO_PHYS(mdst), dst);
1663	bcopy((void *)src, (void *)dst, PAGE_SIZE);
1664}
1665
1666/*
1667 * Returns true if the pmap's pv is one of the first
1668 * 16 pvs linked to from this page.  This count may
1669 * be changed upwards or downwards in the future; it
1670 * is only necessary that true be returned for a small
1671 * subset of pmaps for proper page aging.
1672 */
1673boolean_t
1674pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1675{
1676	struct tte *tp;
1677	int loops;
1678
1679	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1680		return (FALSE);
1681	loops = 0;
1682	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1683		if ((tp->tte_data & TD_PV) == 0)
1684			continue;
1685		if (TTE_GET_PMAP(tp) == pm)
1686			return (TRUE);
1687		if (++loops >= 16)
1688			break;
1689	}
1690	return (FALSE);
1691}
1692
1693/*
1694 * Remove all pages from specified address space, this aids process exit
1695 * speeds.  This is much faster than pmap_remove n the case of running down
1696 * an entire address space.  Only works for the current pmap.
1697 */
1698void
1699pmap_remove_pages(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1700{
1701}
1702
1703/*
1704 * Lower the permission for all mappings to a given page.
1705 */
1706void
1707pmap_page_protect(vm_page_t m, vm_prot_t prot)
1708{
1709
1710	if ((prot & VM_PROT_WRITE) == 0) {
1711		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1712			pmap_clear_write(m);
1713		else
1714			pmap_remove_all(m);
1715	}
1716}
1717
1718vm_offset_t
1719pmap_phys_address(int ppn)
1720{
1721
1722	return (sparc64_ptob(ppn));
1723}
1724
1725/*
1726 *	pmap_ts_referenced:
1727 *
1728 *	Return a count of reference bits for a page, clearing those bits.
1729 *	It is not necessary for every reference bit to be cleared, but it
1730 *	is necessary that 0 only be returned when there are truly no
1731 *	reference bits set.
1732 *
1733 *	XXX: The exact number of bits to check and clear is a matter that
1734 *	should be tested and standardized at some point in the future for
1735 *	optimal aging of shared pages.
1736 */
1737
1738int
1739pmap_ts_referenced(vm_page_t m)
1740{
1741	struct tte *tpf;
1742	struct tte *tpn;
1743	struct tte *tp;
1744	int count;
1745
1746	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1747		return (0);
1748	count = 0;
1749	if ((tp = STAILQ_FIRST(&m->md.tte_list)) != NULL) {
1750		tpf = tp;
1751		do {
1752			tpn = STAILQ_NEXT(tp, tte_link);
1753			STAILQ_REMOVE(&m->md.tte_list, tp, tte, tte_link);
1754			STAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1755			if ((tp->tte_data & TD_PV) == 0 ||
1756			    !pmap_track_modified(TTE_GET_PMAP(tp),
1757			     TTE_GET_VA(tp)))
1758				continue;
1759			if ((tp->tte_data & TD_REF) != 0) {
1760				tp->tte_data &= ~TD_REF;
1761				if (++count > 4)
1762					break;
1763			}
1764		} while ((tp = tpn) != NULL && tp != tpf);
1765	}
1766	return (count);
1767}
1768
1769boolean_t
1770pmap_is_modified(vm_page_t m)
1771{
1772	struct tte *tp;
1773
1774	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1775		return FALSE;
1776	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1777		if ((tp->tte_data & TD_PV) == 0 ||
1778		    !pmap_track_modified(TTE_GET_PMAP(tp), TTE_GET_VA(tp)))
1779			continue;
1780		if ((tp->tte_data & TD_W) != 0)
1781			return (TRUE);
1782	}
1783	return (FALSE);
1784}
1785
1786void
1787pmap_clear_modify(vm_page_t m)
1788{
1789	struct tte *tp;
1790
1791	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1792		return;
1793	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1794		if ((tp->tte_data & TD_PV) == 0)
1795			continue;
1796		if ((tp->tte_data & TD_W) != 0) {
1797			tp->tte_data &= ~TD_W;
1798			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1799		}
1800	}
1801}
1802
1803void
1804pmap_clear_reference(vm_page_t m)
1805{
1806	struct tte *tp;
1807
1808	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1809		return;
1810	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1811		if ((tp->tte_data & TD_PV) == 0)
1812			continue;
1813		if ((tp->tte_data & TD_REF) != 0) {
1814			tp->tte_data &= ~TD_REF;
1815			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1816		}
1817	}
1818}
1819
1820void
1821pmap_clear_write(vm_page_t m)
1822{
1823	struct tte *tp;
1824
1825	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1826	    (m->flags & PG_WRITEABLE) == 0)
1827		return;
1828	STAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1829		if ((tp->tte_data & TD_PV) == 0)
1830			continue;
1831		if ((tp->tte_data & (TD_SW | TD_W)) != 0) {
1832			if ((tp->tte_data & TD_W) != 0 &&
1833			    pmap_track_modified(TTE_GET_PMAP(tp),
1834			    TTE_GET_VA(tp)))
1835				vm_page_dirty(m);
1836			tp->tte_data &= ~(TD_SW | TD_W);
1837			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1838		}
1839	}
1840	vm_page_flag_clear(m, PG_WRITEABLE);
1841}
1842
1843int
1844pmap_mincore(pmap_t pm, vm_offset_t addr)
1845{
1846	/* TODO; */
1847	return (0);
1848}
1849
1850/*
1851 * Activate a user pmap.  The pmap must be activated before its address space
1852 * can be accessed in any way.
1853 */
1854void
1855pmap_activate(struct thread *td)
1856{
1857	struct vmspace *vm;
1858	vm_offset_t tsb;
1859	u_long context;
1860	pmap_t pm;
1861
1862	vm = td->td_proc->p_vmspace;
1863	pm = &vm->vm_pmap;
1864	tsb = (vm_offset_t)pm->pm_tsb;
1865
1866	KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?"));
1867	KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0,
1868	    ("pmap_activate: activating nucleus context?"));
1869
1870	mtx_lock_spin(&sched_lock);
1871	stxa(AA_DMMU_TSB, ASI_DMMU, tsb);
1872	stxa(AA_IMMU_TSB, ASI_IMMU, tsb);
1873	membar(Sync);
1874	context = pmap_context_alloc();
1875	pm->pm_context[PCPU_GET(cpuid)] = context;
1876	pm->pm_active |= PCPU_GET(cpumask);
1877	PCPU_SET(vmspace, vm);
1878	stxa(AA_DMMU_PCXR, ASI_DMMU, context);
1879	membar(Sync);
1880	mtx_unlock_spin(&sched_lock);
1881}
1882
1883vm_offset_t
1884pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
1885{
1886
1887	return (va);
1888}
1889