pmap.c revision 97027
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *      This product includes software developed by the University of
24 *      California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
42 * $FreeBSD: head/sys/sparc64/sparc64/pmap.c 97027 2002-05-21 00:29:02Z jake $
43 */
44
45/*
46 * Manages physical address maps.
47 *
48 * In addition to hardware address maps, this module is called upon to
49 * provide software-use-only maps which may or may not be stored in the
50 * same form as hardware maps.  These pseudo-maps are used to store
51 * intermediate results from copy operations to and from address spaces.
52 *
53 * Since the information managed by this module is also stored by the
54 * logical address mapping module, this module may throw away valid virtual
55 * to physical mappings at almost any time.  However, invalidations of
56 * mappings must be done as requested.
57 *
58 * In order to cope with hardware architectures which make virtual to
59 * physical map invalidates expensive, this module may delay invalidate
60 * reduced protection operations until such time as they are actually
61 * necessary.  This module is given full information as to which processors
62 * are currently using which maps, and to when physical maps must be made
63 * correct.
64 */
65
66#include "opt_msgbuf.h"
67#include "opt_pmap.h"
68
69#include <sys/param.h>
70#include <sys/kernel.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/msgbuf.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/smp.h>
77#include <sys/sysctl.h>
78#include <sys/systm.h>
79#include <sys/vmmeter.h>
80
81#include <dev/ofw/openfirm.h>
82
83#include <vm/vm.h>
84#include <vm/vm_param.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/uma.h>
93
94#include <machine/cache.h>
95#include <machine/frame.h>
96#include <machine/md_var.h>
97#include <machine/pv.h>
98#include <machine/smp.h>
99#include <machine/tlb.h>
100#include <machine/tte.h>
101#include <machine/tsb.h>
102
103#define	PMAP_DEBUG
104
105#ifndef	PMAP_SHPGPERPROC
106#define	PMAP_SHPGPERPROC	200
107#endif
108
109struct mem_region {
110	vm_offset_t mr_start;
111	vm_offset_t mr_size;
112};
113
114struct ofw_map {
115	vm_offset_t om_start;
116	vm_offset_t om_size;
117	u_long	om_tte;
118};
119
120/*
121 * Virtual and physical address of message buffer.
122 */
123struct msgbuf *msgbufp;
124vm_offset_t msgbuf_phys;
125
126/*
127 * Physical addresses of first and last available physical page.
128 */
129vm_offset_t avail_start;
130vm_offset_t avail_end;
131
132int pmap_pagedaemon_waken;
133
134/*
135 * Map of physical memory reagions.
136 */
137vm_offset_t phys_avail[128];
138static struct mem_region mra[128];
139static struct ofw_map translations[128];
140static int translations_size;
141
142/*
143 * First and last available kernel virtual addresses.
144 */
145vm_offset_t virtual_avail;
146vm_offset_t virtual_end;
147vm_offset_t kernel_vm_end;
148
149/*
150 * Kernel pmap.
151 */
152struct pmap kernel_pmap_store;
153
154static boolean_t pmap_initialized = FALSE;
155
156/*
157 * Allocate physical memory for use in pmap_bootstrap.
158 */
159static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
160
161/*
162 * If user pmap is processed with pmap_remove and with pmap_remove and the
163 * resident count drops to 0, there are no more pages to remove, so we
164 * need not continue.
165 */
166#define	PMAP_REMOVE_DONE(pm) \
167	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
168
169/*
170 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
171 * and pmap_protect() instead of trying each virtual address.
172 */
173#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
174
175/* Callbacks for tsb_foreach. */
176static tsb_callback_t pmap_remove_tte;
177static tsb_callback_t pmap_protect_tte;
178
179#ifdef PMAP_STATS
180static long pmap_enter_nupdate;
181static long pmap_enter_nreplace;
182static long pmap_enter_nnew;
183static long pmap_ncache_enter;
184static long pmap_ncache_enter_nc;
185static long pmap_niflush;
186
187SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "Statistics");
188SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nupdate, CTLFLAG_RD,
189    &pmap_enter_nupdate, 0, "Number of pmap_enter() updates");
190SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nreplace, CTLFLAG_RD,
191    &pmap_enter_nreplace, 0, "Number of pmap_enter() replacements");
192SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nnew, CTLFLAG_RD,
193    &pmap_enter_nnew, 0, "Number of pmap_enter() additions");
194SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter, CTLFLAG_RD,
195    &pmap_ncache_enter, 0, "Number of pmap_cache_enter() calls");
196SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_nc, CTLFLAG_RD,
197    &pmap_ncache_enter_nc, 0, "Number of pmap_cache_enter() nc");
198SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_niflush, CTLFLAG_RD,
199    &pmap_niflush, 0, "Number of pmap I$ flushes");
200
201#define	PMAP_STATS_INC(var)	atomic_add_long(&var, 1)
202#else
203#define	PMAP_STATS_INC(var)
204#endif
205
206/*
207 * Quick sort callout for comparing memory regions.
208 */
209static int mr_cmp(const void *a, const void *b);
210static int om_cmp(const void *a, const void *b);
211static int
212mr_cmp(const void *a, const void *b)
213{
214	const struct mem_region *mra;
215	const struct mem_region *mrb;
216
217	mra = a;
218	mrb = b;
219	if (mra->mr_start < mrb->mr_start)
220		return (-1);
221	else if (mra->mr_start > mrb->mr_start)
222		return (1);
223	else
224		return (0);
225}
226static int
227om_cmp(const void *a, const void *b)
228{
229	const struct ofw_map *oma;
230	const struct ofw_map *omb;
231
232	oma = a;
233	omb = b;
234	if (oma->om_start < omb->om_start)
235		return (-1);
236	else if (oma->om_start > omb->om_start)
237		return (1);
238	else
239		return (0);
240}
241
242/*
243 * Bootstrap the system enough to run with virtual memory.
244 */
245void
246pmap_bootstrap(vm_offset_t ekva)
247{
248	struct pmap *pm;
249	struct tte *tp;
250	vm_offset_t off;
251	vm_offset_t pa;
252	vm_offset_t va;
253	vm_size_t physsz;
254	ihandle_t pmem;
255	ihandle_t vmem;
256	int sz;
257	int i;
258	int j;
259
260	/*
261	 * Set the start and end of kva.  The kernel is loaded at the first
262	 * available 4 meg super page, so round up to the end of the page.
263	 */
264	virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
265	virtual_end = VM_MAX_KERNEL_ADDRESS;
266
267	/*
268	 * Find out what physical memory is available from the prom and
269	 * initialize the phys_avail array.  This must be done before
270	 * pmap_bootstrap_alloc is called.
271	 */
272	if ((pmem = OF_finddevice("/memory")) == -1)
273		panic("pmap_bootstrap: finddevice /memory");
274	if ((sz = OF_getproplen(pmem, "available")) == -1)
275		panic("pmap_bootstrap: getproplen /memory/available");
276	if (sizeof(phys_avail) < sz)
277		panic("pmap_bootstrap: phys_avail too small");
278	if (sizeof(mra) < sz)
279		panic("pmap_bootstrap: mra too small");
280	bzero(mra, sz);
281	if (OF_getprop(pmem, "available", mra, sz) == -1)
282		panic("pmap_bootstrap: getprop /memory/available");
283	sz /= sizeof(*mra);
284	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
285	qsort(mra, sz, sizeof (*mra), mr_cmp);
286	physsz = 0;
287	for (i = 0, j = 0; i < sz; i++, j += 2) {
288		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
289		    mra[i].mr_size);
290		phys_avail[j] = mra[i].mr_start;
291		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
292		physsz += mra[i].mr_size;
293	}
294	physmem = btoc(physsz);
295
296	/*
297	 * Allocate the kernel tsb and lock it in the tlb.
298	 */
299	pa = pmap_bootstrap_alloc(KVA_PAGES * PAGE_SIZE_4M);
300	if (pa & PAGE_MASK_4M)
301		panic("pmap_bootstrap: tsb unaligned\n");
302	tsb_kernel_phys = pa;
303	tsb_kernel = (struct tte *)virtual_avail;
304	virtual_avail += KVA_PAGES * PAGE_SIZE_4M;
305	pmap_map_tsb();
306	bzero(tsb_kernel, KVA_PAGES * PAGE_SIZE_4M);
307
308	/*
309	 * Enter fake 8k pages for the 4MB kernel pages, so that
310	 * pmap_kextract() will work for them.
311	 */
312	for (i = 0; i < kernel_tlb_slots; i++) {
313		va = TTE_GET_VA(&kernel_ttes[i]);
314		pa = TTE_GET_PA(&kernel_ttes[i]);
315		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
316			tp = tsb_kvtotte(va + off);
317			tp->tte_vpn = TV_VPN(va + off);
318			tp->tte_data = TD_V | TD_8K | TD_PA(pa + off) |
319			    TD_REF | TD_SW | TD_CP | TD_CV | TD_P | TD_W;
320		}
321	}
322
323	/*
324	 * Allocate a kernel stack with guard page for thread0 and map it into
325	 * the kernel tsb.
326	 */
327	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE);
328	kstack0_phys = pa;
329	kstack0 = virtual_avail + (KSTACK_GUARD_PAGES * PAGE_SIZE);
330	virtual_avail += (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE;
331	for (i = 0; i < KSTACK_PAGES; i++) {
332		pa = kstack0_phys + i * PAGE_SIZE;
333		va = kstack0 + i * PAGE_SIZE;
334		tp = tsb_kvtotte(va);
335		tp->tte_vpn = TV_VPN(va);
336		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW |
337		    TD_CP | TD_CV | TD_P | TD_W;
338	}
339
340	/*
341	 * Allocate the message buffer.
342	 */
343	msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
344
345	/*
346	 * Add the prom mappings to the kernel tsb.
347	 */
348	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
349		panic("pmap_bootstrap: finddevice /virtual-memory");
350	if ((sz = OF_getproplen(vmem, "translations")) == -1)
351		panic("pmap_bootstrap: getproplen translations");
352	if (sizeof(translations) < sz)
353		panic("pmap_bootstrap: translations too small");
354	bzero(translations, sz);
355	if (OF_getprop(vmem, "translations", translations, sz) == -1)
356		panic("pmap_bootstrap: getprop /virtual-memory/translations");
357	sz /= sizeof(*translations);
358	translations_size = sz;
359	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
360	qsort(translations, sz, sizeof (*translations), om_cmp);
361	for (i = 0; i < sz; i++) {
362		CTR3(KTR_PMAP,
363		    "translation: start=%#lx size=%#lx tte=%#lx",
364		    translations[i].om_start, translations[i].om_size,
365		    translations[i].om_tte);
366		if (translations[i].om_start < 0xf0000000)	/* XXX!!! */
367			continue;
368		for (off = 0; off < translations[i].om_size;
369		    off += PAGE_SIZE) {
370			va = translations[i].om_start + off;
371			tp = tsb_kvtotte(va);
372			tp->tte_vpn = TV_VPN(va);
373			tp->tte_data = translations[i].om_tte + off;
374		}
375	}
376
377	/*
378	 * Calculate the first and last available physical addresses.
379	 */
380	avail_start = phys_avail[0];
381	for (i = 0; phys_avail[i + 2] != 0; i += 2)
382		;
383	avail_end = phys_avail[i + 1];
384	Maxmem = sparc64_btop(avail_end);
385
386	/*
387	 * Allocate virtual address space for the message buffer.
388	 */
389	msgbufp = (struct msgbuf *)virtual_avail;
390	virtual_avail += round_page(MSGBUF_SIZE);
391
392	/*
393	 * Initialize the kernel pmap (which is statically allocated).
394	 */
395	pm = kernel_pmap;
396	for (i = 0; i < MAXCPU; i++)
397		pm->pm_context[i] = TLB_CTX_KERNEL;
398	pm->pm_active = ~0;
399	TAILQ_INIT(&pm->pm_pvlist);
400
401	/* XXX flush all non-locked tlb entries */
402}
403
404void
405pmap_map_tsb(void)
406{
407	vm_offset_t va;
408	vm_offset_t pa;
409	u_long data;
410	u_long s;
411	int i;
412
413	s = intr_disable();
414
415	/*
416	 * Map the 4mb tsb pages.
417	 */
418	for (i = 0; i < KVA_PAGES; i++) {
419		va = (vm_offset_t)tsb_kernel + i * PAGE_SIZE_4M;
420		pa = tsb_kernel_phys + i * PAGE_SIZE_4M;
421		/* XXX - cheetah */
422		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
423		    TD_P | TD_W;
424		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
425		    TLB_TAR_CTX(TLB_CTX_KERNEL));
426		stxa(0, ASI_DTLB_DATA_IN_REG, data);
427		membar(Sync);
428	}
429
430	/*
431	 * Load the tsb registers.
432	 */
433	stxa(AA_DMMU_TSB, ASI_DMMU, (vm_offset_t)tsb_kernel);
434	stxa(AA_IMMU_TSB, ASI_IMMU, (vm_offset_t)tsb_kernel);
435	membar(Sync);
436	flush(tsb_kernel);
437
438	/*
439	 * Set the secondary context to be the kernel context (needed for
440	 * fp block operations in the kernel and the cache code).
441	 */
442	stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL);
443	membar(Sync);
444
445	intr_restore(s);
446}
447
448/*
449 * Allocate a physical page of memory directly from the phys_avail map.
450 * Can only be called from pmap_bootstrap before avail start and end are
451 * calculated.
452 */
453static vm_offset_t
454pmap_bootstrap_alloc(vm_size_t size)
455{
456	vm_offset_t pa;
457	int i;
458
459	size = round_page(size);
460	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
461		if (phys_avail[i + 1] - phys_avail[i] < size)
462			continue;
463		pa = phys_avail[i];
464		phys_avail[i] += size;
465		return (pa);
466	}
467	panic("pmap_bootstrap_alloc");
468}
469
470void
471pmap_context_rollover(void)
472{
473	u_long data;
474	u_long tag;
475	int i;
476
477	mtx_assert(&sched_lock, MA_OWNED);
478	CTR0(KTR_PMAP, "pmap_context_rollover");
479	for (i = 0; i < 64; i++) {
480		/* XXX - cheetah */
481		data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG);
482		tag = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_TAG_READ_REG);
483		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
484		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
485			stxa_sync(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0);
486		data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG);
487		tag = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_TAG_READ_REG);
488		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
489		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
490			stxa_sync(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0);
491	}
492	PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min));
493}
494
495static __inline u_int
496pmap_context_alloc(void)
497{
498	u_int context;
499
500	mtx_assert(&sched_lock, MA_OWNED);
501	context = PCPU_GET(tlb_ctx);
502	if (context + 1 == PCPU_GET(tlb_ctx_max))
503		pmap_context_rollover();
504	else
505		PCPU_SET(tlb_ctx, context + 1);
506	return (context);
507}
508
509/*
510 * Initialize the pmap module.
511 */
512void
513pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
514{
515	vm_offset_t addr;
516	vm_size_t size;
517	int result;
518	int i;
519
520	for (i = 0; i < vm_page_array_size; i++) {
521		vm_page_t m;
522
523		m = &vm_page_array[i];
524		TAILQ_INIT(&m->md.pv_list);
525		m->md.pv_list_count = 0;
526	}
527
528	for (i = 0; i < translations_size; i++) {
529		addr = translations[i].om_start;
530		size = translations[i].om_size;
531		if (addr < 0xf0000000)	/* XXX */
532			continue;
533		result = vm_map_find(kernel_map, NULL, 0, &addr, size, TRUE,
534		    VM_PROT_ALL, VM_PROT_ALL, 0);
535		if (result != KERN_SUCCESS || addr != translations[i].om_start)
536			panic("pmap_init: vm_map_find");
537	}
538
539	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
540	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
541	uma_zone_set_allocf(pvzone, pv_allocf);
542	uma_prealloc(pvzone, vm_page_array_size);
543	pmap_initialized = TRUE;
544}
545
546/*
547 * Initialize the address space (zone) for the pv_entries.  Set a
548 * high water mark so that the system can recover from excessive
549 * numbers of pv entries.
550 */
551void
552pmap_init2(void)
553{
554	int shpgperproc;
555
556	shpgperproc = PMAP_SHPGPERPROC;
557	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
558	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
559	pv_entry_high_water = 9 * (pv_entry_max / 10);
560	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
561}
562
563/*
564 * Extract the physical page address associated with the given
565 * map/virtual_address pair.
566 */
567vm_offset_t
568pmap_extract(pmap_t pm, vm_offset_t va)
569{
570	struct tte *tp;
571
572	if (pm == kernel_pmap)
573		return (pmap_kextract(va));
574	tp = tsb_tte_lookup(pm, va);
575	if (tp == NULL)
576		return (0);
577	else
578		return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
579}
580
581/*
582 * Extract the physical page address associated with the given kernel virtual
583 * address.
584 */
585vm_offset_t
586pmap_kextract(vm_offset_t va)
587{
588	struct tte *tp;
589
590	tp = tsb_kvtotte(va);
591	if ((tp->tte_data & TD_V) == 0)
592		return (0);
593	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
594}
595
596int
597pmap_cache_enter(vm_page_t m, vm_offset_t va)
598{
599	struct tte *tp;
600	vm_offset_t pa;
601	pv_entry_t pv;
602	int c;
603	int i;
604
605	CTR2(KTR_PMAP, "pmap_cache_enter: m=%p va=%#lx", m, va);
606	PMAP_STATS_INC(pmap_ncache_enter);
607	for (i = 0, c = 0; i < DCACHE_COLORS; i++) {
608		if (i != DCACHE_COLOR(va))
609			c += m->md.colors[i];
610	}
611	m->md.colors[DCACHE_COLOR(va)]++;
612	if (c == 0) {
613		CTR0(KTR_PMAP, "pmap_cache_enter: cacheable");
614		return (1);
615	}
616	PMAP_STATS_INC(pmap_ncache_enter_nc);
617	if (c != 1) {
618		CTR0(KTR_PMAP, "pmap_cache_enter: already uncacheable");
619		return (0);
620	}
621	CTR0(KTR_PMAP, "pmap_cache_enter: marking uncacheable");
622	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
623		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) != NULL) {
624			atomic_clear_long(&tp->tte_data, TD_CV);
625			tlb_page_demap(TLB_DTLB | TLB_ITLB, pv->pv_pmap,
626			    pv->pv_va);
627		}
628	}
629	pa = VM_PAGE_TO_PHYS(m);
630	dcache_page_inval(pa);
631	return (0);
632}
633
634void
635pmap_cache_remove(vm_page_t m, vm_offset_t va)
636{
637
638	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
639	    m->md.colors[DCACHE_COLOR(va)]);
640	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
641	    ("pmap_cache_remove: no mappings %d <= 0",
642	    m->md.colors[DCACHE_COLOR(va)]));
643	m->md.colors[DCACHE_COLOR(va)]--;
644}
645
646/*
647 * Map a wired page into kernel virtual address space.
648 */
649void
650pmap_kenter(vm_offset_t va, vm_offset_t pa)
651{
652	struct tte *tp;
653
654	tp = tsb_kvtotte(va);
655	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
656	    va, pa, tp, tp->tte_data);
657	tp->tte_vpn = TV_VPN(va);
658	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
659	    TD_CV | TD_P | TD_W;
660}
661
662/*
663 * Map a wired page into kernel virtual address space. This additionally
664 * takes a flag argument wich is or'ed to the TTE data. This is used by
665 * bus_space_map().
666 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
667 * to flush entries that might still be in the cache, if applicable.
668 */
669void
670pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags)
671{
672	struct tte *tp;
673
674	tp = tsb_kvtotte(va);
675	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
676	    va, pa, tp, tp->tte_data);
677	tp->tte_vpn = TV_VPN(va);
678	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
679}
680
681/*
682 * Make a temporary mapping for a physical address.  This is only intended
683 * to be used for panic dumps.
684 */
685void *
686pmap_kenter_temporary(vm_offset_t pa, int i)
687{
688
689	TODO;
690}
691
692/*
693 * Remove a wired page from kernel virtual address space.
694 */
695void
696pmap_kremove(vm_offset_t va)
697{
698	struct tte *tp;
699
700	tp = tsb_kvtotte(va);
701	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
702	    tp->tte_data);
703	atomic_clear_long(&tp->tte_data, TD_V);
704	tp->tte_vpn = 0;
705	tp->tte_data = 0;
706}
707
708/*
709 * Map a range of physical addresses into kernel virtual address space.
710 *
711 * The value passed in *virt is a suggested virtual address for the mapping.
712 * Architectures which can support a direct-mapped physical to virtual region
713 * can return the appropriate address within that region, leaving '*virt'
714 * unchanged.  We cannot and therefore do not; *virt is updated with the
715 * first usable address after the mapped region.
716 */
717vm_offset_t
718pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot)
719{
720	struct tte *tp;
721	vm_offset_t sva;
722	vm_offset_t va;
723	vm_offset_t pa;
724
725	pa = pa_start;
726	sva = *virt;
727	va = sva;
728	for (; pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
729		tp = tsb_kvtotte(va);
730		tp->tte_vpn = TV_VPN(va);
731		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW |
732		    TD_CP | TD_CV | TD_P | TD_W;
733	}
734	tlb_range_demap(kernel_pmap, sva, sva + (pa_end - pa_start) - 1);
735	*virt = va;
736	return (sva);
737}
738
739/*
740 * Map a list of wired pages into kernel virtual address space.  This is
741 * intended for temporary mappings which do not need page modification or
742 * references recorded.  Existing mappings in the region are overwritten.
743 */
744void
745pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
746{
747	vm_offset_t va;
748	int i;
749
750	va = sva;
751	for (i = 0; i < count; i++, va += PAGE_SIZE)
752		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
753	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
754}
755
756/*
757 * As above, but take an additional flags argument and call
758 * pmap_kenter_flags().
759 */
760void
761pmap_qenter_flags(vm_offset_t sva, vm_page_t *m, int count, u_long fl)
762{
763	vm_offset_t va;
764	int i;
765
766	va = sva;
767	for (i = 0; i < count; i++, va += PAGE_SIZE)
768		pmap_kenter_flags(va, VM_PAGE_TO_PHYS(m[i]), fl);
769	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
770}
771
772/*
773 * Remove page mappings from kernel virtual address space.  Intended for
774 * temporary mappings entered by pmap_qenter.
775 */
776void
777pmap_qremove(vm_offset_t sva, int count)
778{
779	vm_offset_t va;
780	int i;
781
782	va = sva;
783	for (i = 0; i < count; i++, va += PAGE_SIZE)
784		pmap_kremove(va);
785	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
786}
787
788/*
789 * Create the uarea for a new process.
790 * This routine directly affects the fork perf for a process.
791 */
792void
793pmap_new_proc(struct proc *p)
794{
795	vm_page_t ma[UAREA_PAGES];
796	vm_object_t upobj;
797	vm_offset_t up;
798	vm_page_t m;
799	u_int i;
800
801	/*
802	 * Allocate object for the upages.
803	 */
804	upobj = p->p_upages_obj;
805	if (upobj == NULL) {
806		upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES);
807		p->p_upages_obj = upobj;
808	}
809
810	/*
811	 * Get a kernel virtual address for the U area for this process.
812	 */
813	up = (vm_offset_t)p->p_uarea;
814	if (up == 0) {
815		up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE);
816		if (up == 0)
817			panic("pmap_new_proc: upage allocation failed");
818		p->p_uarea = (struct user *)up;
819	}
820
821	for (i = 0; i < UAREA_PAGES; i++) {
822		/*
823		 * Get a uarea page.
824		 */
825		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
826		ma[i] = m;
827
828		/*
829		 * Wire the page.
830		 */
831		m->wire_count++;
832		cnt.v_wire_count++;
833
834		vm_page_wakeup(m);
835		vm_page_flag_clear(m, PG_ZERO);
836		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
837		m->valid = VM_PAGE_BITS_ALL;
838	}
839
840	/*
841	 * Enter the pages into the kernel address space.
842	 */
843	pmap_qenter(up, ma, UAREA_PAGES);
844}
845
846/*
847 * Dispose the uarea for a process that has exited.
848 * This routine directly impacts the exit perf of a process.
849 */
850void
851pmap_dispose_proc(struct proc *p)
852{
853	vm_object_t upobj;
854	vm_offset_t up;
855	vm_page_t m;
856	int i;
857
858	upobj = p->p_upages_obj;
859	up = (vm_offset_t)p->p_uarea;
860	for (i = 0; i < UAREA_PAGES; i++) {
861		m = vm_page_lookup(upobj, i);
862		if (m == NULL)
863			panic("pmap_dispose_proc: upage already missing?");
864		vm_page_busy(m);
865		vm_page_unwire(m, 0);
866		vm_page_free(m);
867	}
868	pmap_qremove(up, UAREA_PAGES);
869
870	/*
871	 * If the process got swapped out some of its UPAGES might have gotten
872	 * swapped.  Just get rid of the object to clean up the swap use
873	 * proactively.  NOTE! might block waiting for paging I/O to complete.
874	 */
875	if (upobj->type == OBJT_SWAP) {
876		p->p_upages_obj = NULL;
877		vm_object_deallocate(upobj);
878	}
879}
880
881/*
882 * Allow the uarea for a process to be prejudicially paged out.
883 */
884void
885pmap_swapout_proc(struct proc *p)
886{
887	vm_object_t upobj;
888	vm_offset_t up;
889	vm_page_t m;
890	int i;
891
892	upobj = p->p_upages_obj;
893	up = (vm_offset_t)p->p_uarea;
894	for (i = 0; i < UAREA_PAGES; i++) {
895		m = vm_page_lookup(upobj, i);
896		if (m == NULL)
897			panic("pmap_swapout_proc: upage already missing?");
898		vm_page_dirty(m);
899		vm_page_unwire(m, 0);
900	}
901	pmap_qremove(up, UAREA_PAGES);
902}
903
904/*
905 * Bring the uarea for a specified process back in.
906 */
907void
908pmap_swapin_proc(struct proc *p)
909{
910	vm_page_t ma[UAREA_PAGES];
911	vm_object_t upobj;
912	vm_offset_t up;
913	vm_page_t m;
914	int rv;
915	int i;
916
917	upobj = p->p_upages_obj;
918	up = (vm_offset_t)p->p_uarea;
919	for (i = 0; i < UAREA_PAGES; i++) {
920		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
921		if (m->valid != VM_PAGE_BITS_ALL) {
922			rv = vm_pager_get_pages(upobj, &m, 1, 0);
923			if (rv != VM_PAGER_OK)
924				panic("pmap_swapin_proc: cannot get upage");
925			m = vm_page_lookup(upobj, i);
926			m->valid = VM_PAGE_BITS_ALL;
927		}
928		ma[i] = m;
929		vm_page_wire(m);
930		vm_page_wakeup(m);
931		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
932	}
933	pmap_qenter(up, ma, UAREA_PAGES);
934}
935
936/*
937 * Create the kernel stack and pcb for a new thread.
938 * This routine directly affects the fork perf for a process and
939 * create performance for a thread.
940 */
941void
942pmap_new_thread(struct thread *td)
943{
944	vm_page_t ma[KSTACK_PAGES];
945	vm_object_t ksobj;
946	vm_offset_t ks;
947	vm_page_t m;
948	u_int i;
949
950	/*
951	 * Allocate object for the kstack,
952	 */
953	ksobj = td->td_kstack_obj;
954	if (ksobj == NULL) {
955		ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES);
956		td->td_kstack_obj = ksobj;
957	}
958
959	/*
960	 * Get a kernel virtual address for the kstack for this thread.
961	 */
962	ks = td->td_kstack;
963	if (ks == 0) {
964		ks = kmem_alloc_nofault(kernel_map,
965		   (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE);
966		if (ks == 0)
967			panic("pmap_new_thread: kstack allocation failed");
968		if (KSTACK_GUARD_PAGES != 0) {
969			tlb_page_demap(TLB_DTLB, kernel_pmap, ks);
970			ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
971		}
972		td->td_kstack = ks;
973	}
974
975	for (i = 0; i < KSTACK_PAGES; i++) {
976		/*
977		 * Get a kernel stack page.
978		 */
979		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
980		ma[i] = m;
981
982		/*
983		 * Wire the page.
984		 */
985		m->wire_count++;
986		cnt.v_wire_count++;
987
988		vm_page_wakeup(m);
989		vm_page_flag_clear(m, PG_ZERO);
990		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
991		m->valid = VM_PAGE_BITS_ALL;
992	}
993
994	/*
995	 * Enter the page into the kernel address space.
996	 */
997	pmap_qenter(ks, ma, KSTACK_PAGES);
998}
999
1000/*
1001 * Dispose the kernel stack for a thread that has exited.
1002 * This routine directly impacts the exit perf of a process and thread.
1003 */
1004void
1005pmap_dispose_thread(struct thread *td)
1006{
1007	vm_object_t ksobj;
1008	vm_offset_t ks;
1009	vm_page_t m;
1010	int i;
1011
1012	ksobj = td->td_kstack_obj;
1013	ks = td->td_kstack;
1014	for (i = 0; i < KSTACK_PAGES; i++) {
1015		m = vm_page_lookup(ksobj, i);
1016		if (m == NULL)
1017			panic("pmap_dispose_proc: kstack already missing?");
1018		vm_page_busy(m);
1019		vm_page_unwire(m, 0);
1020		vm_page_free(m);
1021	}
1022	pmap_qremove(ks, KSTACK_PAGES);
1023
1024	/*
1025	 * If the thread got swapped out some of its KSTACK might have gotten
1026	 * swapped.  Just get rid of the object to clean up the swap use
1027	 * proactively.  NOTE! might block waiting for paging I/O to complete.
1028	 */
1029	if (ksobj->type == OBJT_SWAP) {
1030		td->td_kstack_obj = NULL;
1031		vm_object_deallocate(ksobj);
1032	}
1033}
1034
1035/*
1036 * Allow the kernel stack for a thread to be prejudicially paged out.
1037 */
1038void
1039pmap_swapout_thread(struct thread *td)
1040{
1041	vm_object_t ksobj;
1042	vm_offset_t ks;
1043	vm_page_t m;
1044	int i;
1045
1046	ksobj = td->td_kstack_obj;
1047	ks = (vm_offset_t)td->td_kstack;
1048	for (i = 0; i < KSTACK_PAGES; i++) {
1049		m = vm_page_lookup(ksobj, i);
1050		if (m == NULL)
1051			panic("pmap_swapout_thread: kstack already missing?");
1052		vm_page_dirty(m);
1053		vm_page_unwire(m, 0);
1054	}
1055	pmap_qremove(ks, KSTACK_PAGES);
1056}
1057
1058/*
1059 * Bring the kernel stack for a specified thread back in.
1060 */
1061void
1062pmap_swapin_thread(struct thread *td)
1063{
1064	vm_page_t ma[KSTACK_PAGES];
1065	vm_object_t ksobj;
1066	vm_offset_t ks;
1067	vm_page_t m;
1068	int rv;
1069	int i;
1070
1071	ksobj = td->td_kstack_obj;
1072	ks = td->td_kstack;
1073	for (i = 0; i < KSTACK_PAGES; i++) {
1074		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1075		if (m->valid != VM_PAGE_BITS_ALL) {
1076			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
1077			if (rv != VM_PAGER_OK)
1078				panic("pmap_swapin_proc: cannot get kstack");
1079			m = vm_page_lookup(ksobj, i);
1080			m->valid = VM_PAGE_BITS_ALL;
1081		}
1082		ma[i] = m;
1083		vm_page_wire(m);
1084		vm_page_wakeup(m);
1085		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
1086	}
1087	pmap_qenter(ks, ma, KSTACK_PAGES);
1088}
1089
1090/*
1091 * Initialize the pmap associated with process 0.
1092 */
1093void
1094pmap_pinit0(pmap_t pm)
1095{
1096	int i;
1097
1098	for (i = 0; i < MAXCPU; i++)
1099		pm->pm_context[i] = 0;
1100	pm->pm_active = 0;
1101	pm->pm_tsb = NULL;
1102	pm->pm_tsb_obj = NULL;
1103	TAILQ_INIT(&pm->pm_pvlist);
1104	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1105}
1106
1107/*
1108 * Initialize a preallocated and zeroed pmap structure, uch as one in a
1109 * vmspace structure.
1110 */
1111void
1112pmap_pinit(pmap_t pm)
1113{
1114	vm_page_t ma[TSB_PAGES];
1115	vm_page_t m;
1116	int i;
1117
1118	/*
1119	 * Allocate kva space for the tsb.
1120	 */
1121	if (pm->pm_tsb == NULL) {
1122		pm->pm_tsb = (struct tte *)kmem_alloc_pageable(kernel_map,
1123		    TSB_BSIZE);
1124	}
1125
1126	/*
1127	 * Allocate an object for it.
1128	 */
1129	if (pm->pm_tsb_obj == NULL)
1130		pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1131
1132	for (i = 0; i < TSB_PAGES; i++) {
1133		m = vm_page_grab(pm->pm_tsb_obj, i,
1134		    VM_ALLOC_RETRY | VM_ALLOC_ZERO);
1135		if ((m->flags & PG_ZERO) == 0)
1136			pmap_zero_page(m);
1137
1138		m->wire_count++;
1139		cnt.v_wire_count++;
1140
1141		vm_page_flag_clear(m, PG_MAPPED | PG_BUSY);
1142		m->valid = VM_PAGE_BITS_ALL;
1143
1144		ma[i] = m;
1145	}
1146	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1147
1148	for (i = 0; i < MAXCPU; i++)
1149		pm->pm_context[i] = -1;
1150	pm->pm_active = 0;
1151	TAILQ_INIT(&pm->pm_pvlist);
1152	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1153}
1154
1155void
1156pmap_pinit2(pmap_t pmap)
1157{
1158	/* XXX: Remove this stub when no longer called */
1159}
1160
1161/*
1162 * Release any resources held by the given physical map.
1163 * Called when a pmap initialized by pmap_pinit is being released.
1164 * Should only be called if the map contains no valid mappings.
1165 */
1166void
1167pmap_release(pmap_t pm)
1168{
1169	vm_object_t obj;
1170	vm_page_t m;
1171
1172	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1173	    pm->pm_context[PCPU_GET(cpuid)], pm->pm_tsb);
1174	obj = pm->pm_tsb_obj;
1175	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1176	KASSERT(TAILQ_EMPTY(&pm->pm_pvlist),
1177	    ("pmap_release: leaking pv entries"));
1178	KASSERT(pmap_resident_count(pm) == 0,
1179	    ("pmap_release: resident pages %ld != 0",
1180	    pmap_resident_count(pm)));
1181	TAILQ_FOREACH(m, &obj->memq, listq) {
1182		if (vm_page_sleep_busy(m, FALSE, "pmaprl"))
1183			continue;
1184		vm_page_busy(m);
1185		KASSERT(m->hold_count == 0,
1186		    ("pmap_release: freeing held tsb page"));
1187		m->wire_count--;
1188		cnt.v_wire_count--;
1189		vm_page_free_zero(m);
1190	}
1191	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1192}
1193
1194/*
1195 * Grow the number of kernel page table entries.  Unneeded.
1196 */
1197void
1198pmap_growkernel(vm_offset_t addr)
1199{
1200}
1201
1202/*
1203 * This routine is very drastic, but can save the system
1204 * in a pinch.
1205 */
1206void
1207pmap_collect(void)
1208{
1209	static int warningdone;
1210	vm_page_t m;
1211	int i;
1212
1213	if (pmap_pagedaemon_waken == 0)
1214		return;
1215	if (warningdone++ < 5)
1216		printf("pmap_collect: collecting pv entries -- suggest"
1217		    "increasing PMAP_SHPGPERPROC\n");
1218	for (i = 0; i < vm_page_array_size; i++) {
1219		m = &vm_page_array[i];
1220		if (m->wire_count || m->hold_count || m->busy ||
1221		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1222			continue;
1223		pv_remove_all(m);
1224	}
1225	pmap_pagedaemon_waken = 0;
1226}
1227
1228static int
1229pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1230		vm_offset_t va)
1231{
1232	vm_page_t m;
1233
1234	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1235	if ((tp->tte_data & TD_PV) != 0) {
1236		if ((tp->tte_data & TD_W) != 0 &&
1237		    pmap_track_modified(pm, va))
1238			vm_page_dirty(m);
1239		if ((tp->tte_data & TD_REF) != 0)
1240			vm_page_flag_set(m, PG_REFERENCED);
1241		pv_remove(pm, m, va);
1242		pmap_cache_remove(m, va);
1243	}
1244	atomic_clear_long(&tp->tte_data, TD_V);
1245	tp->tte_vpn = 0;
1246	tp->tte_data = 0;
1247	if (PMAP_REMOVE_DONE(pm))
1248		return (0);
1249	return (1);
1250}
1251
1252/*
1253 * Remove the given range of addresses from the specified map.
1254 */
1255void
1256pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1257{
1258	struct tte *tp;
1259	vm_offset_t va;
1260
1261	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1262	    pm->pm_context[PCPU_GET(cpuid)], start, end);
1263	if (PMAP_REMOVE_DONE(pm))
1264		return;
1265	if (end - start > PMAP_TSB_THRESH) {
1266		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1267		tlb_context_demap(pm);
1268	} else {
1269		for (va = start; va < end; va += PAGE_SIZE) {
1270			if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1271				if (!pmap_remove_tte(pm, NULL, tp, va))
1272					break;
1273			}
1274		}
1275		tlb_range_demap(pm, start, end - 1);
1276	}
1277}
1278
1279static int
1280pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1281		 vm_offset_t va)
1282{
1283	vm_page_t m;
1284
1285	if ((tp->tte_data & TD_PV) != 0) {
1286		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1287		if ((tp->tte_data & TD_REF) != 0) {
1288			vm_page_flag_set(m, PG_REFERENCED);
1289			tp->tte_data &= ~TD_REF;
1290		}
1291		if ((tp->tte_data & TD_W) != 0 &&
1292		    pmap_track_modified(pm, va)) {
1293			vm_page_dirty(m);
1294		}
1295	}
1296	tp->tte_data &= ~(TD_W | TD_SW);
1297	return (0);
1298}
1299
1300/*
1301 * Set the physical protection on the specified range of this map as requested.
1302 */
1303void
1304pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1305{
1306	vm_offset_t va;
1307	struct tte *tp;
1308
1309	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1310	    pm->pm_context[PCPU_GET(cpuid)], sva, eva, prot);
1311
1312	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1313		pmap_remove(pm, sva, eva);
1314		return;
1315	}
1316
1317	if (prot & VM_PROT_WRITE)
1318		return;
1319
1320	if (eva - sva > PMAP_TSB_THRESH) {
1321		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1322		tlb_context_demap(pm);
1323	} else {
1324		for (va = sva; va < eva; va += PAGE_SIZE) {
1325			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1326				pmap_protect_tte(pm, NULL, tp, va);
1327		}
1328		tlb_range_demap(pm, sva, eva - 1);
1329	}
1330}
1331
1332/*
1333 * Map the given physical page at the specified virtual address in the
1334 * target pmap with the protection requested.  If specified the page
1335 * will be wired down.
1336 */
1337void
1338pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1339	   boolean_t wired)
1340{
1341	struct tte otte;
1342	struct tte tte;
1343	struct tte *tp;
1344	vm_offset_t pa;
1345	vm_page_t om;
1346
1347	pa = VM_PAGE_TO_PHYS(m);
1348	CTR6(KTR_PMAP,
1349	    "pmap_enter: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1350	    pm->pm_context[PCPU_GET(cpuid)], m, va, pa, prot, wired);
1351
1352	tte.tte_vpn = TV_VPN(va);
1353	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP;
1354
1355	/*
1356	 * If there is an existing mapping, and the physical address has not
1357	 * changed, must be protection or wiring change.
1358	 */
1359	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1360		otte = *tp;
1361		om = PHYS_TO_VM_PAGE(TTE_GET_PA(&otte));
1362
1363		if (TTE_GET_PA(&otte) == pa) {
1364			CTR0(KTR_PMAP, "pmap_enter: update");
1365			PMAP_STATS_INC(pmap_enter_nupdate);
1366
1367			/*
1368			 * Wiring change, just update stats.
1369			 */
1370			if (wired) {
1371				if ((otte.tte_data & TD_WIRED) == 0)
1372					pm->pm_stats.wired_count++;
1373			} else {
1374				if ((otte.tte_data & TD_WIRED) != 0)
1375					pm->pm_stats.wired_count--;
1376			}
1377
1378			if ((otte.tte_data & TD_CV) != 0)
1379				tte.tte_data |= TD_CV;
1380			if ((otte.tte_data & TD_REF) != 0)
1381				tte.tte_data |= TD_REF;
1382			if ((otte.tte_data & TD_PV) != 0) {
1383				KASSERT((m->flags &
1384				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1385				    ("pmap_enter: unmanaged pv page"));
1386				tte.tte_data |= TD_PV;
1387			}
1388			/*
1389			 * If we're turning off write protection, sense modify
1390			 * status and remove the old mapping.
1391			 */
1392			if ((prot & VM_PROT_WRITE) == 0 &&
1393			    (otte.tte_data & (TD_W | TD_SW)) != 0) {
1394				if ((otte.tte_data & TD_PV) != 0) {
1395					if (pmap_track_modified(pm, va))
1396						vm_page_dirty(m);
1397				}
1398				tlb_tte_demap(&otte, pm);
1399			}
1400		} else {
1401			CTR0(KTR_PMAP, "pmap_enter: replace");
1402			PMAP_STATS_INC(pmap_enter_nreplace);
1403
1404			/*
1405			 * Mapping has changed, invalidate old range.
1406			 */
1407			if (!wired && (otte.tte_data & TD_WIRED) != 0)
1408				pm->pm_stats.wired_count--;
1409
1410			/*
1411			 * Enter on the pv list if part of our managed memory.
1412			 */
1413			if ((otte.tte_data & TD_PV) != 0) {
1414				KASSERT((m->flags &
1415				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1416				    ("pmap_enter: unmanaged pv page"));
1417				if ((otte.tte_data & TD_REF) != 0)
1418					vm_page_flag_set(om, PG_REFERENCED);
1419				if ((otte.tte_data & TD_W) != 0 &&
1420				    pmap_track_modified(pm, va))
1421					vm_page_dirty(om);
1422				pv_remove(pm, om, va);
1423				pv_insert(pm, m, va);
1424				tte.tte_data |= TD_PV;
1425				pmap_cache_remove(om, va);
1426				if (pmap_cache_enter(m, va) != 0)
1427					tte.tte_data |= TD_CV;
1428			}
1429			tlb_tte_demap(&otte, pm);
1430		}
1431	} else {
1432		CTR0(KTR_PMAP, "pmap_enter: new");
1433		PMAP_STATS_INC(pmap_enter_nnew);
1434
1435		/*
1436		 * Enter on the pv list if part of our managed memory.
1437		 */
1438		if (pmap_initialized &&
1439		    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1440			pv_insert(pm, m, va);
1441			tte.tte_data |= TD_PV;
1442			if (pmap_cache_enter(m, va) != 0)
1443				tte.tte_data |= TD_CV;
1444		}
1445
1446		/*
1447		 * Increment counters.
1448		 */
1449		if (wired)
1450			pm->pm_stats.wired_count++;
1451
1452	}
1453
1454	/*
1455	 * Now validate mapping with desired protection/wiring.
1456	 */
1457	if (wired) {
1458		tte.tte_data |= TD_REF | TD_WIRED;
1459		if ((prot & VM_PROT_WRITE) != 0)
1460			tte.tte_data |= TD_W;
1461	}
1462	if (pm->pm_context[PCPU_GET(cpuid)] == TLB_CTX_KERNEL)
1463		tte.tte_data |= TD_P;
1464	if (prot & VM_PROT_WRITE)
1465		tte.tte_data |= TD_SW;
1466	if (prot & VM_PROT_EXECUTE) {
1467		tte.tte_data |= TD_EXEC;
1468		PMAP_STATS_INC(pmap_niflush);
1469		icache_page_inval(pa);
1470	}
1471
1472	if (tp != NULL)
1473		*tp = tte;
1474	else
1475		tsb_tte_enter(pm, m, va, tte);
1476}
1477
1478void
1479pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1480		    vm_pindex_t pindex, vm_size_t size, int limit)
1481{
1482	/* XXX */
1483}
1484
1485void
1486pmap_prefault(pmap_t pm, vm_offset_t va, vm_map_entry_t entry)
1487{
1488	/* XXX */
1489}
1490
1491/*
1492 * Change the wiring attribute for a map/virtual-address pair.
1493 * The mapping must already exist in the pmap.
1494 */
1495void
1496pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1497{
1498	struct tte *tp;
1499
1500	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1501		if (wired) {
1502			if ((tp->tte_data & TD_WIRED) == 0)
1503				pm->pm_stats.wired_count++;
1504			tp->tte_data |= TD_WIRED;
1505		} else {
1506			if ((tp->tte_data & TD_WIRED) != 0)
1507				pm->pm_stats.wired_count--;
1508			tp->tte_data &= ~TD_WIRED;
1509		}
1510	}
1511}
1512
1513static int
1514pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp, vm_offset_t va)
1515{
1516	struct tte tte;
1517	vm_page_t m;
1518
1519	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1520		tte.tte_data = tp->tte_data &
1521		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1522		tte.tte_vpn = TV_VPN(va);
1523		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1524		if ((tp->tte_data & TD_PV) != 0) {
1525			KASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1526			    ("pmap_enter: unmanaged pv page"));
1527			pv_insert(dst_pmap, m, va);
1528			tte.tte_data |= TD_PV;
1529			if (pmap_cache_enter(m, va) != 0)
1530				tte.tte_data |= TD_CV;
1531		}
1532		tsb_tte_enter(dst_pmap, m, va, tte);
1533	}
1534	return (1);
1535}
1536
1537void
1538pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1539	  vm_size_t len, vm_offset_t src_addr)
1540{
1541	struct tte *tp;
1542	vm_offset_t va;
1543
1544	if (dst_addr != src_addr)
1545		return;
1546	if (len > PMAP_TSB_THRESH) {
1547		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1548		    pmap_copy_tte);
1549		tlb_context_demap(dst_pmap);
1550	} else {
1551		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE) {
1552			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1553				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1554		}
1555		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1556	}
1557}
1558
1559/*
1560 * Zero a page of physical memory by temporarily mapping it into the tlb.
1561 */
1562void
1563pmap_zero_page(vm_page_t m)
1564{
1565	vm_offset_t pa;
1566
1567	pa = VM_PAGE_TO_PHYS(m);
1568	CTR1(KTR_PMAP, "pmap_zero_page: pa=%#lx", pa);
1569	dcache_page_inval(pa);
1570	aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1571}
1572
1573void
1574pmap_zero_page_area(vm_page_t m, int off, int size)
1575{
1576	vm_offset_t pa;
1577
1578	pa = VM_PAGE_TO_PHYS(m);
1579	CTR3(KTR_PMAP, "pmap_zero_page_area: pa=%#lx off=%#x size=%#x",
1580	    pa, off, size);
1581	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1582	dcache_page_inval(pa);
1583	aszero(ASI_PHYS_USE_EC, pa + off, size);
1584}
1585
1586/*
1587 * Copy a page of physical memory by temporarily mapping it into the tlb.
1588 */
1589void
1590pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1591{
1592	vm_offset_t dst;
1593	vm_offset_t src;
1594
1595	dst = VM_PAGE_TO_PHYS(mdst);
1596	src = VM_PAGE_TO_PHYS(msrc);
1597	CTR2(KTR_PMAP, "pmap_copy_page: src=%#lx dst=%#lx", src, dst);
1598	dcache_page_inval(dst);
1599	ascopy(ASI_PHYS_USE_EC, src, dst, PAGE_SIZE);
1600}
1601
1602/*
1603 * Make the specified page pageable (or not).  Unneeded.
1604 */
1605void
1606pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
1607	      boolean_t pageable)
1608{
1609}
1610
1611/*
1612 * Returns true if the pmap's pv is one of the first
1613 * 16 pvs linked to from this page.  This count may
1614 * be changed upwards or downwards in the future; it
1615 * is only necessary that true be returned for a small
1616 * subset of pmaps for proper page aging.
1617 */
1618boolean_t
1619pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1620{
1621
1622	if (m->flags & PG_FICTITIOUS)
1623		return (FALSE);
1624	return (pv_page_exists(pm, m));
1625}
1626
1627/*
1628 * Remove all pages from specified address space, this aids process exit
1629 * speeds.  This is much faster than pmap_remove n the case of running down
1630 * an entire address space.  Only works for the current pmap.
1631 */
1632void
1633pmap_remove_pages(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1634{
1635	struct tte *tp;
1636	pv_entry_t npv;
1637	pv_entry_t pv;
1638	vm_page_t m;
1639
1640	npv = NULL;
1641	for (pv = TAILQ_FIRST(&pm->pm_pvlist); pv != NULL; pv = npv) {
1642		npv = TAILQ_NEXT(pv, pv_plist);
1643		if (pv->pv_va >= eva || pv->pv_va < sva)
1644			continue;
1645		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) == NULL)
1646			continue;
1647
1648		/*
1649		 * We cannot remove wired pages at this time.
1650		 */
1651		if ((tp->tte_data & TD_WIRED) != 0)
1652			continue;
1653
1654		atomic_clear_long(&tp->tte_data, TD_V);
1655		tp->tte_vpn = 0;
1656		tp->tte_data = 0;
1657
1658		m = pv->pv_m;
1659
1660		pv->pv_pmap->pm_stats.resident_count--;
1661		m->md.pv_list_count--;
1662		pmap_cache_remove(m, pv->pv_va);
1663		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1664		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1665		if (TAILQ_EMPTY(&m->md.pv_list))
1666			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1667		pv_free(pv);
1668	}
1669	tlb_context_demap(pm);
1670}
1671
1672/*
1673 * Lower the permission for all mappings to a given page.
1674 */
1675void
1676pmap_page_protect(vm_page_t m, vm_prot_t prot)
1677{
1678
1679	if ((prot & VM_PROT_WRITE) == 0) {
1680		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1681			pv_bit_clear(m, TD_W | TD_SW);
1682		else
1683			pv_remove_all(m);
1684	}
1685}
1686
1687vm_offset_t
1688pmap_phys_address(int ppn)
1689{
1690
1691	return (sparc64_ptob(ppn));
1692}
1693
1694/*
1695 *	pmap_ts_referenced:
1696 *
1697 *	Return a count of reference bits for a page, clearing those bits.
1698 *	It is not necessary for every reference bit to be cleared, but it
1699 *	is necessary that 0 only be returned when there are truly no
1700 *	reference bits set.
1701 *
1702 *	XXX: The exact number of bits to check and clear is a matter that
1703 *	should be tested and standardized at some point in the future for
1704 *	optimal aging of shared pages.
1705 */
1706
1707int
1708pmap_ts_referenced(vm_page_t m)
1709{
1710
1711	if (m->flags & PG_FICTITIOUS)
1712		return (0);
1713	return (pv_bit_count(m, TD_REF));
1714}
1715
1716boolean_t
1717pmap_is_modified(vm_page_t m)
1718{
1719
1720	if (m->flags & PG_FICTITIOUS)
1721		return FALSE;
1722	return (pv_bit_test(m, TD_W));
1723}
1724
1725void
1726pmap_clear_modify(vm_page_t m)
1727{
1728
1729	if (m->flags & PG_FICTITIOUS)
1730		return;
1731	pv_bit_clear(m, TD_W);
1732}
1733
1734void
1735pmap_clear_reference(vm_page_t m)
1736{
1737
1738	if (m->flags & PG_FICTITIOUS)
1739		return;
1740	pv_bit_clear(m, TD_REF);
1741}
1742
1743int
1744pmap_mincore(pmap_t pm, vm_offset_t addr)
1745{
1746	TODO;
1747	return (0);
1748}
1749
1750/*
1751 * Activate a user pmap.  The pmap must be activated before its address space
1752 * can be accessed in any way.
1753 */
1754void
1755pmap_activate(struct thread *td)
1756{
1757	struct vmspace *vm;
1758	vm_offset_t tsb;
1759	u_long context;
1760	pmap_t pm;
1761
1762	/*
1763	 * Load all the data we need up front to encourage the compiler to
1764	 * not issue any loads while we have interrupts disable below.
1765	 */
1766	vm = td->td_proc->p_vmspace;
1767	pm = &vm->vm_pmap;
1768	tsb = (vm_offset_t)pm->pm_tsb;
1769
1770	KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?"));
1771	KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0,
1772	    ("pmap_activate: activating nucleus context?"));
1773
1774	mtx_lock_spin(&sched_lock);
1775	wrpr(pstate, 0, PSTATE_MMU);
1776	mov(tsb, TSB_REG);
1777	wrpr(pstate, 0, PSTATE_KERNEL);
1778	context = pmap_context_alloc();
1779	pm->pm_context[PCPU_GET(cpuid)] = context;
1780	pm->pm_active |= PCPU_GET(cpumask);
1781	PCPU_SET(vmspace, vm);
1782	stxa(AA_DMMU_PCXR, ASI_DMMU, context);
1783	membar(Sync);
1784	mtx_unlock_spin(&sched_lock);
1785}
1786
1787vm_offset_t
1788pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
1789{
1790
1791	return (va);
1792}
1793