pmap.c revision 91782
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *      This product includes software developed by the University of
24 *      California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
42 * $FreeBSD: head/sys/sparc64/sparc64/pmap.c 91782 2002-03-07 05:25:15Z jake $
43 */
44
45/*
46 * Manages physical address maps.
47 *
48 * In addition to hardware address maps, this module is called upon to
49 * provide software-use-only maps which may or may not be stored in the
50 * same form as hardware maps.  These pseudo-maps are used to store
51 * intermediate results from copy operations to and from address spaces.
52 *
53 * Since the information managed by this module is also stored by the
54 * logical address mapping module, this module may throw away valid virtual
55 * to physical mappings at almost any time.  However, invalidations of
56 * mappings must be done as requested.
57 *
58 * In order to cope with hardware architectures which make virtual to
59 * physical map invalidates expensive, this module may delay invalidate
60 * reduced protection operations until such time as they are actually
61 * necessary.  This module is given full information as to which processors
62 * are currently using which maps, and to when physical maps must be made
63 * correct.
64 */
65
66#include "opt_msgbuf.h"
67#include "opt_pmap.h"
68
69#include <sys/param.h>
70#include <sys/kernel.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/msgbuf.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/sysctl.h>
77#include <sys/systm.h>
78#include <sys/vmmeter.h>
79
80#include <dev/ofw/openfirm.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_kern.h>
85#include <vm/vm_page.h>
86#include <vm/vm_map.h>
87#include <vm/vm_object.h>
88#include <vm/vm_extern.h>
89#include <vm/vm_pageout.h>
90#include <vm/vm_pager.h>
91#include <vm/vm_zone.h>
92
93#include <machine/cache.h>
94#include <machine/frame.h>
95#include <machine/md_var.h>
96#include <machine/pv.h>
97#include <machine/tlb.h>
98#include <machine/tte.h>
99#include <machine/tsb.h>
100
101#define	PMAP_DEBUG
102
103#ifndef	PMAP_SHPGPERPROC
104#define	PMAP_SHPGPERPROC	200
105#endif
106
107struct mem_region {
108	vm_offset_t mr_start;
109	vm_offset_t mr_size;
110};
111
112struct ofw_map {
113	vm_offset_t om_start;
114	vm_offset_t om_size;
115	u_long	om_tte;
116};
117
118/*
119 * Virtual and physical address of message buffer.
120 */
121struct msgbuf *msgbufp;
122vm_offset_t msgbuf_phys;
123
124/*
125 * Physical addresses of first and last available physical page.
126 */
127vm_offset_t avail_start;
128vm_offset_t avail_end;
129
130int pmap_pagedaemon_waken;
131
132/*
133 * Map of physical memory reagions.
134 */
135vm_offset_t phys_avail[128];
136static struct mem_region mra[128];
137static struct ofw_map translations[128];
138static int translations_size;
139
140/*
141 * First and last available kernel virtual addresses.
142 */
143vm_offset_t virtual_avail;
144vm_offset_t virtual_end;
145vm_offset_t kernel_vm_end;
146
147/*
148 * The locked kernel page the kernel binary was loaded into. This will need
149 * to become a list later.
150 */
151vm_offset_t kernel_page;
152
153/*
154 * Kernel pmap.
155 */
156struct pmap kernel_pmap_store;
157
158static boolean_t pmap_initialized = FALSE;
159
160/* Convert a tte data field into a page mask */
161static vm_offset_t pmap_page_masks[] = {
162	PAGE_MASK_8K,
163	PAGE_MASK_64K,
164	PAGE_MASK_512K,
165	PAGE_MASK_4M
166};
167
168#define	PMAP_TD_GET_MASK(d)	pmap_page_masks[TD_GET_SIZE((d))]
169
170/*
171 * Allocate physical memory for use in pmap_bootstrap.
172 */
173static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
174
175/*
176 * If user pmap is processed with pmap_remove and with pmap_remove and the
177 * resident count drops to 0, there are no more pages to remove, so we
178 * need not continue.
179 */
180#define	PMAP_REMOVE_DONE(pm) \
181	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
182
183/*
184 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
185 * and pmap_protect() instead of trying each virtual address.
186 */
187#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
188
189/* Callbacks for tsb_foreach. */
190static tsb_callback_t pmap_remove_tte;
191static tsb_callback_t pmap_protect_tte;
192
193#ifdef PMAP_STATS
194static long pmap_enter_nupdate;
195static long pmap_enter_nreplace;
196static long pmap_enter_nnew;
197static long pmap_ncache_enter;
198static long pmap_ncache_enter_nc;
199static long pmap_niflush;
200
201SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "Statistics");
202SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nupdate, CTLFLAG_RD,
203    &pmap_enter_nupdate, 0, "Number of pmap_enter() updates");
204SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nreplace, CTLFLAG_RD,
205    &pmap_enter_nreplace, 0, "Number of pmap_enter() replacements");
206SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nnew, CTLFLAG_RD,
207    &pmap_enter_nnew, 0, "Number of pmap_enter() additions");
208SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter, CTLFLAG_RD,
209    &pmap_ncache_enter, 0, "Number of pmap_cache_enter() calls");
210SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_nc, CTLFLAG_RD,
211    &pmap_ncache_enter_nc, 0, "Number of pmap_cache_enter() nc");
212SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_niflush, CTLFLAG_RD,
213    &pmap_niflush, 0, "Number of pmap I$ flushes");
214
215#define	PMAP_STATS_INC(var)	atomic_add_long(&var, 1)
216#else
217#define	PMAP_STATS_INC(var)
218#endif
219
220/*
221 * Quick sort callout for comparing memory regions.
222 */
223static int mr_cmp(const void *a, const void *b);
224static int om_cmp(const void *a, const void *b);
225static int
226mr_cmp(const void *a, const void *b)
227{
228	const struct mem_region *mra;
229	const struct mem_region *mrb;
230
231	mra = a;
232	mrb = b;
233	if (mra->mr_start < mrb->mr_start)
234		return (-1);
235	else if (mra->mr_start > mrb->mr_start)
236		return (1);
237	else
238		return (0);
239}
240static int
241om_cmp(const void *a, const void *b)
242{
243	const struct ofw_map *oma;
244	const struct ofw_map *omb;
245
246	oma = a;
247	omb = b;
248	if (oma->om_start < omb->om_start)
249		return (-1);
250	else if (oma->om_start > omb->om_start)
251		return (1);
252	else
253		return (0);
254}
255
256/*
257 * Bootstrap the system enough to run with virtual memory.
258 */
259void
260pmap_bootstrap(vm_offset_t ekva)
261{
262	struct pmap *pm;
263	struct tte tte;
264	struct tte *tp;
265	vm_offset_t off;
266	vm_offset_t pa;
267	vm_offset_t va;
268	vm_size_t physsz;
269	ihandle_t pmem;
270	ihandle_t vmem;
271	int sz;
272	int i;
273	int j;
274
275	/*
276	 * Set the start and end of kva.  The kernel is loaded at the first
277	 * available 4 meg super page, so round up to the end of the page.
278	 */
279	virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
280	virtual_end = VM_MAX_KERNEL_ADDRESS;
281
282	/* Look up the page the kernel binary was loaded into. */
283	kernel_page = TD_GET_PA(ldxa(TLB_DAR_SLOT(TLB_SLOT_KERNEL),
284	    ASI_DTLB_DATA_ACCESS_REG));
285
286	/*
287	 * Find out what physical memory is available from the prom and
288	 * initialize the phys_avail array.  This must be done before
289	 * pmap_bootstrap_alloc is called.
290	 */
291	if ((pmem = OF_finddevice("/memory")) == -1)
292		panic("pmap_bootstrap: finddevice /memory");
293	if ((sz = OF_getproplen(pmem, "available")) == -1)
294		panic("pmap_bootstrap: getproplen /memory/available");
295	if (sizeof(phys_avail) < sz)
296		panic("pmap_bootstrap: phys_avail too small");
297	if (sizeof(mra) < sz)
298		panic("pmap_bootstrap: mra too small");
299	bzero(mra, sz);
300	if (OF_getprop(pmem, "available", mra, sz) == -1)
301		panic("pmap_bootstrap: getprop /memory/available");
302	sz /= sizeof(*mra);
303	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
304	qsort(mra, sz, sizeof (*mra), mr_cmp);
305	physsz = 0;
306	for (i = 0, j = 0; i < sz; i++, j += 2) {
307		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
308		    mra[i].mr_size);
309		phys_avail[j] = mra[i].mr_start;
310		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
311		physsz += mra[i].mr_size;
312	}
313	physmem = btoc(physsz);
314
315	/*
316	 * Allocate the kernel tsb and lock it in the tlb.
317	 */
318	pa = pmap_bootstrap_alloc(KVA_PAGES * PAGE_SIZE_4M);
319	if (pa & PAGE_MASK_4M)
320		panic("pmap_bootstrap: tsb unaligned\n");
321	tsb_kernel_phys = pa;
322	tsb_kernel = (struct tte *)virtual_avail;
323	virtual_avail += KVA_PAGES * PAGE_SIZE_4M;
324	pmap_map_tsb();
325	bzero(tsb_kernel, KVA_PAGES * PAGE_SIZE_4M);
326
327	/*
328	 * Allocate a kernel stack with guard page for thread0 and map it into
329	 * the kernel tsb.
330	 */
331	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE);
332	kstack0_phys = pa;
333	kstack0 = virtual_avail + (KSTACK_GUARD_PAGES * PAGE_SIZE);
334	virtual_avail += (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE;
335	for (i = 0; i < KSTACK_PAGES; i++) {
336		pa = kstack0_phys + i * PAGE_SIZE;
337		va = kstack0 + i * PAGE_SIZE;
338		pmap_kenter(va, pa);
339		/* tlb_page_demap(TLB_DTLB, kernel_pmap, va); */
340	}
341
342	/*
343	 * Allocate the message buffer.
344	 */
345	msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
346
347	/*
348	 * Add the prom mappings to the kernel tsb.
349	 */
350	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
351		panic("pmap_bootstrap: finddevice /virtual-memory");
352	if ((sz = OF_getproplen(vmem, "translations")) == -1)
353		panic("pmap_bootstrap: getproplen translations");
354	if (sizeof(translations) < sz)
355		panic("pmap_bootstrap: translations too small");
356	bzero(translations, sz);
357	if (OF_getprop(vmem, "translations", translations, sz) == -1)
358		panic("pmap_bootstrap: getprop /virtual-memory/translations");
359	sz /= sizeof(*translations);
360	translations_size = sz;
361	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
362	qsort(translations, sz, sizeof (*translations), om_cmp);
363	for (i = 0; i < sz; i++) {
364		CTR4(KTR_PMAP,
365		    "translation: start=%#lx size=%#lx tte=%#lx pa=%#lx",
366		    translations[i].om_start, translations[i].om_size,
367		    translations[i].om_tte, TD_GET_PA(translations[i].om_tte));
368		if (translations[i].om_start < 0xf0000000)	/* XXX!!! */
369			continue;
370		for (off = 0; off < translations[i].om_size;
371		    off += PAGE_SIZE) {
372			va = translations[i].om_start + off;
373			tte.tte_data = translations[i].om_tte + off;
374			tte.tte_vpn = TV_VPN(va);
375			tp = tsb_kvtotte(va);
376			CTR4(KTR_PMAP,
377			    "mapping: va=%#lx tp=%p tte=%#lx pa=%#lx",
378			    va, tp, tte.tte_data, TD_GET_PA(tte.tte_data));
379			*tp = tte;
380		}
381	}
382
383	/*
384	 * Calculate the first and last available physical addresses.
385	 */
386	avail_start = phys_avail[0];
387	for (i = 0; phys_avail[i + 2] != 0; i += 2)
388		;
389	avail_end = phys_avail[i + 1];
390	Maxmem = sparc64_btop(avail_end);
391
392	/*
393	 * Allocate virtual address space for the message buffer.
394	 */
395	msgbufp = (struct msgbuf *)virtual_avail;
396	virtual_avail += round_page(MSGBUF_SIZE);
397
398	/*
399	 * Initialize the kernel pmap (which is statically allocated).
400	 */
401	pm = kernel_pmap;
402	for (i = 0; i < MAXCPU; i++)
403		pm->pm_context[i] = TLB_CTX_KERNEL;
404	pm->pm_active = ~0;
405	pm->pm_count = 1;
406	TAILQ_INIT(&pm->pm_pvlist);
407}
408
409void
410pmap_map_tsb(void)
411{
412	struct tte tte;
413	vm_offset_t va;
414	vm_offset_t pa;
415	int i;
416
417	/*
418	 * Map the 4mb tsb pages.
419	 */
420	for (i = 0; i < KVA_PAGES; i++) {
421		va = (vm_offset_t)tsb_kernel + i * PAGE_SIZE_4M;
422		pa = tsb_kernel_phys + i * PAGE_SIZE_4M;
423		tte.tte_vpn = TV_VPN(va);
424		tte.tte_data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP |
425		    TD_CV | TD_P | TD_W;
426		tlb_store_slot(TLB_DTLB, va, TLB_CTX_KERNEL, tte,
427		    TLB_SLOT_TSB_KERNEL_MIN + i);
428	}
429
430	/*
431	 * Load the tsb registers.
432	 */
433	stxa(AA_DMMU_TSB, ASI_DMMU, (vm_offset_t)tsb_kernel);
434	stxa(AA_IMMU_TSB, ASI_IMMU, (vm_offset_t)tsb_kernel);
435	membar(Sync);
436	flush(tsb_kernel);
437
438	/*
439	 * Set the secondary context to be the kernel context (needed for
440	 * fp block operations in the kernel and the cache code).
441	 */
442	stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL);
443	membar(Sync);
444}
445
446/*
447 * Allocate a physical page of memory directly from the phys_avail map.
448 * Can only be called from pmap_bootstrap before avail start and end are
449 * calculated.
450 */
451static vm_offset_t
452pmap_bootstrap_alloc(vm_size_t size)
453{
454	vm_offset_t pa;
455	int i;
456
457	size = round_page(size);
458	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
459		if (phys_avail[i + 1] - phys_avail[i] < size)
460			continue;
461		pa = phys_avail[i];
462		phys_avail[i] += size;
463		return (pa);
464	}
465	panic("pmap_bootstrap_alloc");
466}
467
468void
469pmap_context_rollover(void)
470{
471	u_long data;
472	int i;
473
474	mtx_assert(&sched_lock, MA_OWNED);
475	CTR0(KTR_PMAP, "pmap_context_rollover");
476	for (i = 0; i < 64; i++) {
477		data = ldxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG);
478		if ((data & TD_V) != 0 && (data & TD_P) == 0) {
479			stxa(TLB_DAR_SLOT(i), ASI_DTLB_DATA_ACCESS_REG, 0);
480			membar(Sync);
481		}
482		data = ldxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG);
483		if ((data & TD_V) != 0 && (data & TD_P) == 0) {
484			stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, 0);
485			membar(Sync);
486		}
487	}
488	PCPU_SET(tlb_ctx, PCPU_GET(tlb_ctx_min));
489}
490
491static __inline u_int
492pmap_context_alloc(void)
493{
494	u_int context;
495
496	mtx_assert(&sched_lock, MA_OWNED);
497	context = PCPU_GET(tlb_ctx);
498	if (context + 1 == PCPU_GET(tlb_ctx_max))
499		pmap_context_rollover();
500	else
501		PCPU_SET(tlb_ctx, context + 1);
502	return (context);
503}
504
505/*
506 * Initialize the pmap module.
507 */
508void
509pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
510{
511	vm_offset_t addr;
512	vm_size_t size;
513	int result;
514	int i;
515
516	for (i = 0; i < vm_page_array_size; i++) {
517		vm_page_t m;
518
519		m = &vm_page_array[i];
520		TAILQ_INIT(&m->md.pv_list);
521		m->md.pv_list_count = 0;
522	}
523
524	for (i = 0; i < translations_size; i++) {
525		addr = translations[i].om_start;
526		size = translations[i].om_size;
527		if (addr < 0xf0000000)	/* XXX */
528			continue;
529		result = vm_map_find(kernel_map, NULL, 0, &addr, size, TRUE,
530		    VM_PROT_ALL, VM_PROT_ALL, 0);
531		if (result != KERN_SUCCESS || addr != translations[i].om_start)
532			panic("pmap_init: vm_map_find");
533	}
534
535	pvzone = &pvzone_store;
536	pvinit = (struct pv_entry *)kmem_alloc(kernel_map,
537	    vm_page_array_size * sizeof (struct pv_entry));
538	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
539	    vm_page_array_size);
540	pmap_initialized = TRUE;
541}
542
543/*
544 * Initialize the address space (zone) for the pv_entries.  Set a
545 * high water mark so that the system can recover from excessive
546 * numbers of pv entries.
547 */
548void
549pmap_init2(void)
550{
551	int shpgperproc;
552
553	shpgperproc = PMAP_SHPGPERPROC;
554	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
555	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
556	pv_entry_high_water = 9 * (pv_entry_max / 10);
557	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
558}
559
560/*
561 * Extract the physical page address associated with the given
562 * map/virtual_address pair.
563 */
564vm_offset_t
565pmap_extract(pmap_t pm, vm_offset_t va)
566{
567	struct tte *tp;
568	u_long d;
569
570	if (pm == kernel_pmap)
571		return (pmap_kextract(va));
572	tp = tsb_tte_lookup(pm, va);
573	if (tp == NULL)
574		return (0);
575	else {
576		d = tp->tte_data;
577		return (TD_GET_PA(d) | (va & PMAP_TD_GET_MASK(d)));
578	}
579}
580
581/*
582 * Extract the physical page address associated with the given kernel virtual
583 * address.
584 */
585vm_offset_t
586pmap_kextract(vm_offset_t va)
587{
588	struct tte *tp;
589	u_long d;
590
591	if (va >= KERNBASE && va < KERNBASE + PAGE_SIZE_4M)
592		return (kernel_page + (va & PAGE_MASK_4M));
593	tp = tsb_kvtotte(va);
594	d = tp->tte_data;
595	if ((d & TD_V) == 0)
596		return (0);
597	return (TD_GET_PA(d) | (va & PMAP_TD_GET_MASK(d)));
598}
599
600int
601pmap_cache_enter(vm_page_t m, vm_offset_t va)
602{
603	struct tte *tp;
604	vm_offset_t pa;
605	pv_entry_t pv;
606	int c;
607	int i;
608
609	CTR2(KTR_PMAP, "pmap_cache_enter: m=%p va=%#lx", m, va);
610	PMAP_STATS_INC(pmap_ncache_enter);
611	for (i = 0, c = 0; i < DCACHE_COLORS; i++) {
612		if (i != DCACHE_COLOR(va))
613			c += m->md.colors[i];
614	}
615	m->md.colors[DCACHE_COLOR(va)]++;
616	if (c == 0) {
617		CTR0(KTR_PMAP, "pmap_cache_enter: cacheable");
618		return (1);
619	}
620	PMAP_STATS_INC(pmap_ncache_enter_nc);
621	if (c != 1) {
622		CTR0(KTR_PMAP, "pmap_cache_enter: already uncacheable");
623		return (0);
624	}
625	CTR0(KTR_PMAP, "pmap_cache_enter: marking uncacheable");
626	if ((m->flags & PG_UNMANAGED) != 0)
627		panic("pmap_cache_enter: non-managed page");
628	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
629		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) != NULL) {
630			atomic_clear_long(&tp->tte_data, TD_CV);
631			tlb_page_demap(TLB_DTLB | TLB_ITLB, pv->pv_pmap,
632			    pv->pv_va);
633		}
634	}
635	pa = VM_PAGE_TO_PHYS(m);
636	dcache_inval_phys(pa, pa + PAGE_SIZE - 1);
637	return (0);
638}
639
640void
641pmap_cache_remove(vm_page_t m, vm_offset_t va)
642{
643
644	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
645	    m->md.colors[DCACHE_COLOR(va)]);
646	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
647	    ("pmap_cache_remove: no mappings %d <= 0",
648	    m->md.colors[DCACHE_COLOR(va)]));
649	m->md.colors[DCACHE_COLOR(va)]--;
650}
651
652/*
653 * Map a wired page into kernel virtual address space.
654 */
655void
656pmap_kenter(vm_offset_t va, vm_offset_t pa)
657{
658	struct tte tte;
659	struct tte *tp;
660
661	tte.tte_vpn = TV_VPN(va);
662	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
663	    TD_CV | TD_P | TD_W;
664	tp = tsb_kvtotte(va);
665	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
666	    va, pa, tp, tp->tte_data);
667	if ((tp->tte_data & TD_V) != 0)
668		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
669	*tp = tte;
670}
671
672/*
673 * Map a wired page into kernel virtual address space. This additionally
674 * takes a flag argument wich is or'ed to the TTE data. This is used by
675 * bus_space_map().
676 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
677 * to flush entries that might still be in the cache, if applicable.
678 */
679void
680pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags)
681{
682	struct tte tte;
683	struct tte *tp;
684
685	tte.tte_vpn = TV_VPN(va);
686	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
687	tp = tsb_kvtotte(va);
688	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
689	    va, pa, tp, tp->tte_data);
690	if ((tp->tte_data & TD_V) != 0)
691		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
692	*tp = tte;
693}
694
695/*
696 * Make a temporary mapping for a physical address.  This is only intended
697 * to be used for panic dumps.
698 */
699void *
700pmap_kenter_temporary(vm_offset_t pa, int i)
701{
702
703	TODO;
704}
705
706/*
707 * Remove a wired page from kernel virtual address space.
708 */
709void
710pmap_kremove(vm_offset_t va)
711{
712	struct tte *tp;
713
714	tp = tsb_kvtotte(va);
715	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
716	    tp->tte_data);
717	atomic_clear_long(&tp->tte_data, TD_V);
718	tp->tte_vpn = 0;
719	tp->tte_data = 0;
720	tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
721}
722
723/*
724 * Map a range of physical addresses into kernel virtual address space.
725 *
726 * The value passed in *virt is a suggested virtual address for the mapping.
727 * Architectures which can support a direct-mapped physical to virtual region
728 * can return the appropriate address within that region, leaving '*virt'
729 * unchanged.  We cannot and therefore do not; *virt is updated with the
730 * first usable address after the mapped region.
731 */
732vm_offset_t
733pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot)
734{
735	vm_offset_t sva;
736	vm_offset_t va;
737	vm_offset_t pa;
738
739	pa = pa_start;
740	sva = *virt;
741	va = sva;
742	for (; pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE)
743		pmap_kenter(va, pa);
744	tlb_range_demap(kernel_pmap, sva, sva + (pa_end - pa_start) - 1);
745	*virt = va;
746	return (sva);
747}
748
749/*
750 * Map a list of wired pages into kernel virtual address space.  This is
751 * intended for temporary mappings which do not need page modification or
752 * references recorded.  Existing mappings in the region are overwritten.
753 */
754void
755pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
756{
757	vm_offset_t va;
758	int i;
759
760	va = sva;
761	for (i = 0; i < count; i++, va += PAGE_SIZE)
762		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
763	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
764}
765
766/*
767 * As above, but take an additional flags argument and call
768 * pmap_kenter_flags().
769 */
770void
771pmap_qenter_flags(vm_offset_t sva, vm_page_t *m, int count, u_long fl)
772{
773	vm_offset_t va;
774	int i;
775
776	va = sva;
777	for (i = 0; i < count; i++, va += PAGE_SIZE)
778		pmap_kenter_flags(va, VM_PAGE_TO_PHYS(m[i]), fl);
779	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
780}
781
782/*
783 * Remove page mappings from kernel virtual address space.  Intended for
784 * temporary mappings entered by pmap_qenter.
785 */
786void
787pmap_qremove(vm_offset_t sva, int count)
788{
789	vm_offset_t va;
790	int i;
791
792	va = sva;
793	for (i = 0; i < count; i++, va += PAGE_SIZE)
794		pmap_kremove(va);
795	tlb_range_demap(kernel_pmap, sva, sva + (count * PAGE_SIZE) - 1);
796}
797
798/*
799 * Create the uarea for a new process.
800 * This routine directly affects the fork perf for a process.
801 */
802void
803pmap_new_proc(struct proc *p)
804{
805	vm_page_t ma[UAREA_PAGES];
806	vm_object_t upobj;
807	vm_offset_t up;
808	vm_page_t m;
809	u_int i;
810
811	/*
812	 * Allocate object for the upages.
813	 */
814	upobj = p->p_upages_obj;
815	if (upobj == NULL) {
816		upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES);
817		p->p_upages_obj = upobj;
818	}
819
820	/*
821	 * Get a kernel virtual address for the U area for this process.
822	 */
823	up = (vm_offset_t)p->p_uarea;
824	if (up == 0) {
825		up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE);
826		if (up == 0)
827			panic("pmap_new_proc: upage allocation failed");
828		p->p_uarea = (struct user *)up;
829	}
830
831	for (i = 0; i < UAREA_PAGES; i++) {
832		/*
833		 * Get a uarea page.
834		 */
835		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
836		ma[i] = m;
837
838		/*
839		 * Wire the page.
840		 */
841		m->wire_count++;
842		cnt.v_wire_count++;
843
844		vm_page_wakeup(m);
845		vm_page_flag_clear(m, PG_ZERO);
846		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
847		m->valid = VM_PAGE_BITS_ALL;
848	}
849
850	/*
851	 * Enter the pages into the kernel address space.
852	 */
853	pmap_qenter(up, ma, UAREA_PAGES);
854}
855
856/*
857 * Dispose the uarea for a process that has exited.
858 * This routine directly impacts the exit perf of a process.
859 */
860void
861pmap_dispose_proc(struct proc *p)
862{
863	vm_object_t upobj;
864	vm_offset_t up;
865	vm_page_t m;
866	int i;
867
868	upobj = p->p_upages_obj;
869	up = (vm_offset_t)p->p_uarea;
870	for (i = 0; i < UAREA_PAGES; i++) {
871		m = vm_page_lookup(upobj, i);
872		if (m == NULL)
873			panic("pmap_dispose_proc: upage already missing?");
874		vm_page_busy(m);
875		vm_page_unwire(m, 0);
876		vm_page_free(m);
877	}
878	pmap_qremove(up, UAREA_PAGES);
879
880	/*
881	 * If the process got swapped out some of its UPAGES might have gotten
882	 * swapped.  Just get rid of the object to clean up the swap use
883	 * proactively.  NOTE! might block waiting for paging I/O to complete.
884	 */
885	if (upobj->type == OBJT_SWAP) {
886		p->p_upages_obj = NULL;
887		vm_object_deallocate(upobj);
888	}
889}
890
891/*
892 * Allow the uarea for a process to be prejudicially paged out.
893 */
894void
895pmap_swapout_proc(struct proc *p)
896{
897	vm_object_t upobj;
898	vm_offset_t up;
899	vm_page_t m;
900	int i;
901
902	upobj = p->p_upages_obj;
903	up = (vm_offset_t)p->p_uarea;
904	for (i = 0; i < UAREA_PAGES; i++) {
905		m = vm_page_lookup(upobj, i);
906		if (m == NULL)
907			panic("pmap_swapout_proc: upage already missing?");
908		vm_page_dirty(m);
909		vm_page_unwire(m, 0);
910	}
911	pmap_qremove(up, UAREA_PAGES);
912}
913
914/*
915 * Bring the uarea for a specified process back in.
916 */
917void
918pmap_swapin_proc(struct proc *p)
919{
920	vm_page_t ma[UAREA_PAGES];
921	vm_object_t upobj;
922	vm_offset_t up;
923	vm_page_t m;
924	int rv;
925	int i;
926
927	upobj = p->p_upages_obj;
928	up = (vm_offset_t)p->p_uarea;
929	for (i = 0; i < UAREA_PAGES; i++) {
930		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
931		if (m->valid != VM_PAGE_BITS_ALL) {
932			rv = vm_pager_get_pages(upobj, &m, 1, 0);
933			if (rv != VM_PAGER_OK)
934				panic("pmap_swapin_proc: cannot get upage");
935			m = vm_page_lookup(upobj, i);
936			m->valid = VM_PAGE_BITS_ALL;
937		}
938		ma[i] = m;
939		vm_page_wire(m);
940		vm_page_wakeup(m);
941		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
942	}
943	pmap_qenter(up, ma, UAREA_PAGES);
944}
945
946/*
947 * Create the kernel stack and pcb for a new thread.
948 * This routine directly affects the fork perf for a process and
949 * create performance for a thread.
950 */
951void
952pmap_new_thread(struct thread *td)
953{
954	vm_page_t ma[KSTACK_PAGES];
955	vm_object_t ksobj;
956	vm_offset_t ks;
957	vm_page_t m;
958	u_int i;
959
960	/*
961	 * Allocate object for the kstack,
962	 */
963	ksobj = td->td_kstack_obj;
964	if (ksobj == NULL) {
965		ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES);
966		td->td_kstack_obj = ksobj;
967	}
968
969	/*
970	 * Get a kernel virtual address for the kstack for this thread.
971	 */
972	ks = td->td_kstack;
973	if (ks == 0) {
974		ks = kmem_alloc_nofault(kernel_map,
975		   (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE);
976		if (ks == 0)
977			panic("pmap_new_thread: kstack allocation failed");
978		tlb_page_demap(TLB_DTLB, kernel_pmap, ks);
979		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
980		td->td_kstack = ks;
981	}
982
983	for (i = 0; i < KSTACK_PAGES; i++) {
984		/*
985		 * Get a kernel stack page.
986		 */
987		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
988		ma[i] = m;
989
990		/*
991		 * Wire the page.
992		 */
993		m->wire_count++;
994		cnt.v_wire_count++;
995
996		vm_page_wakeup(m);
997		vm_page_flag_clear(m, PG_ZERO);
998		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
999		m->valid = VM_PAGE_BITS_ALL;
1000	}
1001
1002	/*
1003	 * Enter the page into the kernel address space.
1004	 */
1005	pmap_qenter(ks, ma, KSTACK_PAGES);
1006}
1007
1008/*
1009 * Dispose the kernel stack for a thread that has exited.
1010 * This routine directly impacts the exit perf of a process and thread.
1011 */
1012void
1013pmap_dispose_thread(struct thread *td)
1014{
1015	vm_object_t ksobj;
1016	vm_offset_t ks;
1017	vm_page_t m;
1018	int i;
1019
1020	ksobj = td->td_kstack_obj;
1021	ks = td->td_kstack;
1022	for (i = 0; i < KSTACK_PAGES; i++) {
1023		m = vm_page_lookup(ksobj, i);
1024		if (m == NULL)
1025			panic("pmap_dispose_proc: kstack already missing?");
1026		vm_page_busy(m);
1027		vm_page_unwire(m, 0);
1028		vm_page_free(m);
1029	}
1030	pmap_qremove(ks, KSTACK_PAGES);
1031
1032	/*
1033	 * If the thread got swapped out some of its KSTACK might have gotten
1034	 * swapped.  Just get rid of the object to clean up the swap use
1035	 * proactively.  NOTE! might block waiting for paging I/O to complete.
1036	 */
1037	if (ksobj->type == OBJT_SWAP) {
1038		td->td_kstack_obj = NULL;
1039		vm_object_deallocate(ksobj);
1040	}
1041}
1042
1043/*
1044 * Allow the kernel stack for a thread to be prejudicially paged out.
1045 */
1046void
1047pmap_swapout_thread(struct thread *td)
1048{
1049	vm_object_t ksobj;
1050	vm_offset_t ks;
1051	vm_page_t m;
1052	int i;
1053
1054	ksobj = td->td_kstack_obj;
1055	ks = (vm_offset_t)td->td_kstack;
1056	for (i = 0; i < KSTACK_PAGES; i++) {
1057		m = vm_page_lookup(ksobj, i);
1058		if (m == NULL)
1059			panic("pmap_swapout_thread: kstack already missing?");
1060		vm_page_dirty(m);
1061		vm_page_unwire(m, 0);
1062	}
1063	pmap_qremove(ks, KSTACK_PAGES);
1064}
1065
1066/*
1067 * Bring the kernel stack for a specified thread back in.
1068 */
1069void
1070pmap_swapin_thread(struct thread *td)
1071{
1072	vm_page_t ma[KSTACK_PAGES];
1073	vm_object_t ksobj;
1074	vm_offset_t ks;
1075	vm_page_t m;
1076	int rv;
1077	int i;
1078
1079	ksobj = td->td_kstack_obj;
1080	ks = td->td_kstack;
1081	for (i = 0; i < KSTACK_PAGES; i++) {
1082		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1083		if (m->valid != VM_PAGE_BITS_ALL) {
1084			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
1085			if (rv != VM_PAGER_OK)
1086				panic("pmap_swapin_proc: cannot get kstack");
1087			m = vm_page_lookup(ksobj, i);
1088			m->valid = VM_PAGE_BITS_ALL;
1089		}
1090		ma[i] = m;
1091		vm_page_wire(m);
1092		vm_page_wakeup(m);
1093		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
1094	}
1095	pmap_qenter(ks, ma, KSTACK_PAGES);
1096}
1097
1098/*
1099 * Initialize the pmap associated with process 0.
1100 */
1101void
1102pmap_pinit0(pmap_t pm)
1103{
1104	int i;
1105
1106	for (i = 0; i < MAXCPU; i++)
1107		pm->pm_context[i] = 0;
1108	pm->pm_active = 0;
1109	pm->pm_count = 1;
1110	pm->pm_tsb = NULL;
1111	pm->pm_tsb_obj = NULL;
1112	TAILQ_INIT(&pm->pm_pvlist);
1113	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1114}
1115
1116/*
1117 * Initialize a preallocated and zeroed pmap structure, uch as one in a
1118 * vmspace structure.
1119 */
1120void
1121pmap_pinit(pmap_t pm)
1122{
1123	vm_page_t ma[TSB_PAGES];
1124	vm_page_t m;
1125	int i;
1126
1127	/*
1128	 * Allocate kva space for the tsb.
1129	 */
1130	if (pm->pm_tsb == NULL) {
1131		pm->pm_tsb = (struct tte *)kmem_alloc_pageable(kernel_map,
1132		    TSB_BSIZE);
1133	}
1134
1135	/*
1136	 * Allocate an object for it.
1137	 */
1138	if (pm->pm_tsb_obj == NULL)
1139		pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1140
1141	for (i = 0; i < TSB_PAGES; i++) {
1142		m = vm_page_grab(pm->pm_tsb_obj, i,
1143		    VM_ALLOC_RETRY | VM_ALLOC_ZERO);
1144		if ((m->flags & PG_ZERO) == 0)
1145			pmap_zero_page(VM_PAGE_TO_PHYS(m));
1146
1147		m->wire_count++;
1148		cnt.v_wire_count++;
1149
1150		vm_page_flag_clear(m, PG_MAPPED | PG_BUSY);
1151		m->valid = VM_PAGE_BITS_ALL;
1152
1153		ma[i] = m;
1154	}
1155	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1156
1157	for (i = 0; i < MAXCPU; i++)
1158		pm->pm_context[i] = -1;
1159	pm->pm_active = 0;
1160	pm->pm_count = 1;
1161	TAILQ_INIT(&pm->pm_pvlist);
1162	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1163}
1164
1165void
1166pmap_pinit2(pmap_t pmap)
1167{
1168	/* XXX: Remove this stub when no longer called */
1169}
1170
1171/*
1172 * Release any resources held by the given physical map.
1173 * Called when a pmap initialized by pmap_pinit is being released.
1174 * Should only be called if the map contains no valid mappings.
1175 */
1176void
1177pmap_release(pmap_t pm)
1178{
1179	vm_object_t obj;
1180	vm_page_t m;
1181
1182	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1183	    pm->pm_context[PCPU_GET(cpuid)], pm->pm_tsb);
1184	obj = pm->pm_tsb_obj;
1185	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1186	KASSERT(TAILQ_EMPTY(&pm->pm_pvlist),
1187	    ("pmap_release: leaking pv entries"));
1188	KASSERT(pmap_resident_count(pm) == 0,
1189	    ("pmap_release: resident pages %ld != 0",
1190	    pmap_resident_count(pm)));
1191	TAILQ_FOREACH(m, &obj->memq, listq) {
1192		if (vm_page_sleep_busy(m, FALSE, "pmaprl"))
1193			continue;
1194		vm_page_busy(m);
1195		KASSERT(m->hold_count == 0,
1196		    ("pmap_release: freeing held tsb page"));
1197		m->wire_count--;
1198		cnt.v_wire_count--;
1199		vm_page_free_zero(m);
1200	}
1201	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1202}
1203
1204/*
1205 * Grow the number of kernel page table entries.  Unneeded.
1206 */
1207void
1208pmap_growkernel(vm_offset_t addr)
1209{
1210}
1211
1212/*
1213 * Retire the given physical map from service.  Pmaps are always allocated
1214 * as part of a larger structure, so this never happens.
1215 */
1216void
1217pmap_destroy(pmap_t pm)
1218{
1219	panic("pmap_destroy: unimplemented");
1220}
1221
1222/*
1223 * Add a reference to the specified pmap.
1224 */
1225void
1226pmap_reference(pmap_t pm)
1227{
1228	if (pm != NULL)
1229		pm->pm_count++;
1230}
1231
1232/*
1233 * This routine is very drastic, but can save the system
1234 * in a pinch.
1235 */
1236void
1237pmap_collect(void)
1238{
1239	static int warningdone;
1240	vm_page_t m;
1241	int i;
1242
1243	if (pmap_pagedaemon_waken == 0)
1244		return;
1245	if (warningdone++ < 5)
1246		printf("pmap_collect: collecting pv entries -- suggest"
1247		    "increasing PMAP_SHPGPERPROC\n");
1248	for (i = 0; i < vm_page_array_size; i++) {
1249		m = &vm_page_array[i];
1250		if (m->wire_count || m->hold_count || m->busy ||
1251		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1252			continue;
1253		pv_remove_all(m);
1254	}
1255	pmap_pagedaemon_waken = 0;
1256}
1257
1258static int
1259pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1260		vm_offset_t va)
1261{
1262	vm_page_t m;
1263
1264	m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
1265	if ((tp->tte_data & TD_PV) != 0) {
1266		if ((tp->tte_data & TD_W) != 0 &&
1267		    pmap_track_modified(pm, va))
1268			vm_page_dirty(m);
1269		if ((tp->tte_data & TD_REF) != 0)
1270			vm_page_flag_set(m, PG_REFERENCED);
1271		pv_remove(pm, m, va);
1272		pmap_cache_remove(m, va);
1273	}
1274	atomic_clear_long(&tp->tte_data, TD_V);
1275	tp->tte_vpn = 0;
1276	tp->tte_data = 0;
1277	if (PMAP_REMOVE_DONE(pm))
1278		return (0);
1279	return (1);
1280}
1281
1282/*
1283 * Remove the given range of addresses from the specified map.
1284 */
1285void
1286pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1287{
1288	struct tte *tp;
1289	vm_offset_t va;
1290
1291	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1292	    pm->pm_context[PCPU_GET(cpuid)], start, end);
1293	if (PMAP_REMOVE_DONE(pm))
1294		return;
1295	if (end - start > PMAP_TSB_THRESH) {
1296		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1297		tlb_context_demap(pm);
1298	} else {
1299		for (va = start; va < end; va += PAGE_SIZE) {
1300			if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1301				if (!pmap_remove_tte(pm, NULL, tp, va))
1302					break;
1303			}
1304		}
1305		tlb_range_demap(pm, start, end - 1);
1306	}
1307}
1308
1309static int
1310pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1311		 vm_offset_t va)
1312{
1313	vm_page_t m;
1314	u_long data;
1315
1316	data = tp->tte_data;
1317	if ((data & TD_PV) != 0) {
1318		m = PHYS_TO_VM_PAGE(TD_GET_PA(data));
1319		if ((data & TD_REF) != 0) {
1320			vm_page_flag_set(m, PG_REFERENCED);
1321			data &= ~TD_REF;
1322		}
1323		if ((data & TD_W) != 0 &&
1324		    pmap_track_modified(pm, va)) {
1325			vm_page_dirty(m);
1326		}
1327	}
1328
1329	data &= ~(TD_W | TD_SW);
1330	CTR2(KTR_PMAP, "pmap_protect: new=%#lx old=%#lx",
1331	    data, tp->tte_data);
1332	tp->tte_data = data;
1333	return (0);
1334}
1335
1336/*
1337 * Set the physical protection on the specified range of this map as requested.
1338 */
1339void
1340pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1341{
1342	vm_offset_t va;
1343	struct tte *tp;
1344
1345	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1346	    pm->pm_context[PCPU_GET(cpuid)], sva, eva, prot);
1347
1348	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1349	    ("pmap_protect: non current pmap"));
1350
1351	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1352		pmap_remove(pm, sva, eva);
1353		return;
1354	}
1355
1356	if (prot & VM_PROT_WRITE)
1357		return;
1358
1359	if (eva - sva > PMAP_TSB_THRESH) {
1360		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1361		tlb_context_demap(pm);
1362	} else {
1363		for (va = sva; va < eva; va += PAGE_SIZE) {
1364			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1365				pmap_protect_tte(pm, NULL, tp, va);
1366		}
1367		tlb_range_demap(pm, sva, eva - 1);
1368	}
1369}
1370
1371/*
1372 * Map the given physical page at the specified virtual address in the
1373 * target pmap with the protection requested.  If specified the page
1374 * will be wired down.
1375 */
1376void
1377pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1378	   boolean_t wired)
1379{
1380	struct tte otte;
1381	struct tte tte;
1382	struct tte *tp;
1383	vm_offset_t pa;
1384	vm_page_t om;
1385
1386	pa = VM_PAGE_TO_PHYS(m);
1387	CTR6(KTR_PMAP,
1388	    "pmap_enter: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1389	    pm->pm_context[PCPU_GET(cpuid)], m, va, pa, prot, wired);
1390
1391	tte.tte_vpn = TV_VPN(va);
1392	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP;
1393
1394	/*
1395	 * If there is an existing mapping, and the physical address has not
1396	 * changed, must be protection or wiring change.
1397	 */
1398	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1399		otte = *tp;
1400		om = PHYS_TO_VM_PAGE(TD_GET_PA(otte.tte_data));
1401
1402		if (TD_GET_PA(otte.tte_data) == pa) {
1403			CTR0(KTR_PMAP, "pmap_enter: update");
1404			PMAP_STATS_INC(pmap_enter_nupdate);
1405
1406			/*
1407			 * Wiring change, just update stats.
1408			 */
1409			if (wired) {
1410				if ((otte.tte_data & TD_WIRED) == 0)
1411					pm->pm_stats.wired_count++;
1412			} else {
1413				if ((otte.tte_data & TD_WIRED) != 0)
1414					pm->pm_stats.wired_count--;
1415			}
1416
1417			if ((otte.tte_data & TD_CV) != 0)
1418				tte.tte_data |= TD_CV;
1419			if ((otte.tte_data & TD_REF) != 0)
1420				tte.tte_data |= TD_REF;
1421			if ((otte.tte_data & TD_PV) != 0) {
1422				KASSERT((m->flags &
1423				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1424				    ("pmap_enter: unmanaged pv page"));
1425				tte.tte_data |= TD_PV;
1426			}
1427			/*
1428			 * If we're turning off write protection, sense modify
1429			 * status and remove the old mapping.
1430			 */
1431			if ((prot & VM_PROT_WRITE) == 0 &&
1432			    (otte.tte_data & (TD_W | TD_SW)) != 0) {
1433				if ((otte.tte_data & TD_PV) != 0) {
1434					if (pmap_track_modified(pm, va))
1435						vm_page_dirty(m);
1436				}
1437				tlb_tte_demap(otte, pm);
1438			}
1439		} else {
1440			CTR0(KTR_PMAP, "pmap_enter: replace");
1441			PMAP_STATS_INC(pmap_enter_nreplace);
1442
1443			/*
1444			 * Mapping has changed, invalidate old range.
1445			 */
1446			if (!wired && (otte.tte_data & TD_WIRED) != 0)
1447				pm->pm_stats.wired_count--;
1448
1449			/*
1450			 * Enter on the pv list if part of our managed memory.
1451			 */
1452			if ((otte.tte_data & TD_PV) != 0) {
1453				KASSERT((m->flags &
1454				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1455				    ("pmap_enter: unmanaged pv page"));
1456				if ((otte.tte_data & TD_REF) != 0)
1457					vm_page_flag_set(om, PG_REFERENCED);
1458				if ((otte.tte_data & TD_W) != 0 &&
1459				    pmap_track_modified(pm, va))
1460					vm_page_dirty(om);
1461				pv_remove(pm, om, va);
1462				pv_insert(pm, m, va);
1463				tte.tte_data |= TD_PV;
1464				pmap_cache_remove(om, va);
1465				if (pmap_cache_enter(m, va) != 0)
1466					tte.tte_data |= TD_CV;
1467			}
1468			tlb_tte_demap(otte, pm);
1469		}
1470	} else {
1471		CTR0(KTR_PMAP, "pmap_enter: new");
1472		PMAP_STATS_INC(pmap_enter_nnew);
1473
1474		/*
1475		 * Enter on the pv list if part of our managed memory.
1476		 */
1477		if (pmap_initialized &&
1478		    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1479			pv_insert(pm, m, va);
1480			tte.tte_data |= TD_PV;
1481			if (pmap_cache_enter(m, va) != 0)
1482				tte.tte_data |= TD_CV;
1483		}
1484
1485		/*
1486		 * Increment counters.
1487		 */
1488		if (wired)
1489			pm->pm_stats.wired_count++;
1490
1491	}
1492
1493	/*
1494	 * Now validate mapping with desired protection/wiring.
1495	 */
1496	if (wired) {
1497		tte.tte_data |= TD_REF | TD_WIRED;
1498		if ((prot & VM_PROT_WRITE) != 0)
1499			tte.tte_data |= TD_W;
1500	}
1501	if (pm->pm_context[PCPU_GET(cpuid)] == TLB_CTX_KERNEL)
1502		tte.tte_data |= TD_P;
1503	if (prot & VM_PROT_WRITE)
1504		tte.tte_data |= TD_SW;
1505	if (prot & VM_PROT_EXECUTE) {
1506		tte.tte_data |= TD_EXEC;
1507		PMAP_STATS_INC(pmap_niflush);
1508		icache_inval_phys(pa, pa + PAGE_SIZE - 1);
1509	}
1510
1511	if (tp != NULL)
1512		*tp = tte;
1513	else
1514		tsb_tte_enter(pm, m, va, tte);
1515}
1516
1517void
1518pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1519		    vm_pindex_t pindex, vm_size_t size, int limit)
1520{
1521	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1522	    ("pmap_object_init_pt: non current pmap"));
1523	/* XXX */
1524}
1525
1526void
1527pmap_prefault(pmap_t pm, vm_offset_t va, vm_map_entry_t entry)
1528{
1529	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1530	    ("pmap_prefault: non current pmap"));
1531	/* XXX */
1532}
1533
1534/*
1535 * Change the wiring attribute for a map/virtual-address pair.
1536 * The mapping must already exist in the pmap.
1537 */
1538void
1539pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1540{
1541	struct tte *tp;
1542
1543	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1544		if (wired) {
1545			if ((tp->tte_data & TD_WIRED) == 0)
1546				pm->pm_stats.wired_count++;
1547			tp->tte_data |= TD_WIRED;
1548		} else {
1549			if ((tp->tte_data & TD_WIRED) != 0)
1550				pm->pm_stats.wired_count--;
1551			tp->tte_data &= ~TD_WIRED;
1552		}
1553	}
1554}
1555
1556static int
1557pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp, vm_offset_t va)
1558{
1559	struct tte tte;
1560	vm_page_t m;
1561
1562	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1563		tte.tte_data = tp->tte_data &
1564		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1565		tte.tte_vpn = TV_VPN(va);
1566		m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
1567		if ((tp->tte_data & TD_PV) != 0) {
1568			KASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1569			    ("pmap_enter: unmanaged pv page"));
1570			pv_insert(dst_pmap, m, va);
1571			tte.tte_data |= TD_PV;
1572			if (pmap_cache_enter(m, va) != 0)
1573				tte.tte_data |= TD_CV;
1574		}
1575		tsb_tte_enter(dst_pmap, m, va, tte);
1576	}
1577	return (1);
1578}
1579
1580void
1581pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1582	  vm_size_t len, vm_offset_t src_addr)
1583{
1584	struct tte *tp;
1585	vm_offset_t va;
1586
1587	if (dst_addr != src_addr)
1588		return;
1589	if (len > PMAP_TSB_THRESH) {
1590		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1591		    pmap_copy_tte);
1592		tlb_context_demap(dst_pmap);
1593	} else {
1594		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE) {
1595			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1596				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1597		}
1598		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1599	}
1600}
1601
1602/*
1603 * Zero a page of physical memory by temporarily mapping it into the tlb.
1604 */
1605void
1606pmap_zero_page(vm_offset_t pa)
1607{
1608
1609	CTR1(KTR_PMAP, "pmap_zero_page: pa=%#lx", pa);
1610	dcache_inval_phys(pa, pa + PAGE_SIZE);
1611	aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1612}
1613
1614void
1615pmap_zero_page_area(vm_offset_t pa, int off, int size)
1616{
1617
1618	CTR3(KTR_PMAP, "pmap_zero_page_area: pa=%#lx off=%#x size=%#x",
1619	    pa, off, size);
1620	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1621	dcache_inval_phys(pa + off, pa + off + size);
1622	aszero(ASI_PHYS_USE_EC, pa + off, size);
1623}
1624
1625/*
1626 * Copy a page of physical memory by temporarily mapping it into the tlb.
1627 */
1628void
1629pmap_copy_page(vm_offset_t src, vm_offset_t dst)
1630{
1631
1632	CTR2(KTR_PMAP, "pmap_copy_page: src=%#lx dst=%#lx", src, dst);
1633	dcache_inval_phys(dst, dst + PAGE_SIZE);
1634	ascopy(ASI_PHYS_USE_EC, src, dst, PAGE_SIZE);
1635}
1636
1637/*
1638 * Make the specified page pageable (or not).  Unneeded.
1639 */
1640void
1641pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
1642	      boolean_t pageable)
1643{
1644}
1645
1646/*
1647 * Returns true if the pmap's pv is one of the first
1648 * 16 pvs linked to from this page.  This count may
1649 * be changed upwards or downwards in the future; it
1650 * is only necessary that true be returned for a small
1651 * subset of pmaps for proper page aging.
1652 */
1653boolean_t
1654pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1655{
1656
1657	if (m->flags & PG_FICTITIOUS)
1658		return (FALSE);
1659	return (pv_page_exists(pm, m));
1660}
1661
1662/*
1663 * Remove all pages from specified address space, this aids process exit
1664 * speeds.  This is much faster than pmap_remove n the case of running down
1665 * an entire address space.  Only works for the current pmap.
1666 */
1667void
1668pmap_remove_pages(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1669{
1670	struct tte *tp;
1671	pv_entry_t npv;
1672	pv_entry_t pv;
1673	vm_page_t m;
1674
1675	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1676	    ("pmap_remove_pages: non current pmap"));
1677	npv = NULL;
1678	for (pv = TAILQ_FIRST(&pm->pm_pvlist); pv != NULL; pv = npv) {
1679		npv = TAILQ_NEXT(pv, pv_plist);
1680		if (pv->pv_va >= eva || pv->pv_va < sva)
1681			continue;
1682		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) == NULL)
1683			continue;
1684
1685		/*
1686		 * We cannot remove wired pages at this time.
1687		 */
1688		if ((tp->tte_data & TD_WIRED) != 0)
1689			continue;
1690
1691		atomic_clear_long(&tp->tte_data, TD_V);
1692		tp->tte_vpn = 0;
1693		tp->tte_data = 0;
1694
1695		m = pv->pv_m;
1696
1697		pv->pv_pmap->pm_stats.resident_count--;
1698		m->md.pv_list_count--;
1699		pmap_cache_remove(m, pv->pv_va);
1700		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1701		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1702		if (TAILQ_EMPTY(&m->md.pv_list))
1703			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1704		pv_free(pv);
1705	}
1706	tlb_context_demap(pm);
1707}
1708
1709/*
1710 * Lower the permission for all mappings to a given page.
1711 */
1712void
1713pmap_page_protect(vm_page_t m, vm_prot_t prot)
1714{
1715
1716	if ((prot & VM_PROT_WRITE) == 0) {
1717		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1718			pv_bit_clear(m, TD_W | TD_SW);
1719		else
1720			pv_remove_all(m);
1721	}
1722}
1723
1724vm_offset_t
1725pmap_phys_address(int ppn)
1726{
1727
1728	return (sparc64_ptob(ppn));
1729}
1730
1731/*
1732 *	pmap_ts_referenced:
1733 *
1734 *	Return a count of reference bits for a page, clearing those bits.
1735 *	It is not necessary for every reference bit to be cleared, but it
1736 *	is necessary that 0 only be returned when there are truly no
1737 *	reference bits set.
1738 *
1739 *	XXX: The exact number of bits to check and clear is a matter that
1740 *	should be tested and standardized at some point in the future for
1741 *	optimal aging of shared pages.
1742 */
1743
1744int
1745pmap_ts_referenced(vm_page_t m)
1746{
1747
1748	if (m->flags & PG_FICTITIOUS)
1749		return (0);
1750	return (pv_bit_count(m, TD_REF));
1751}
1752
1753boolean_t
1754pmap_is_modified(vm_page_t m)
1755{
1756
1757	if (m->flags & PG_FICTITIOUS)
1758		return FALSE;
1759	return (pv_bit_test(m, TD_W));
1760}
1761
1762void
1763pmap_clear_modify(vm_page_t m)
1764{
1765
1766	if (m->flags & PG_FICTITIOUS)
1767		return;
1768	pv_bit_clear(m, TD_W);
1769}
1770
1771void
1772pmap_clear_reference(vm_page_t m)
1773{
1774
1775	if (m->flags & PG_FICTITIOUS)
1776		return;
1777	pv_bit_clear(m, TD_REF);
1778}
1779
1780int
1781pmap_mincore(pmap_t pm, vm_offset_t addr)
1782{
1783	TODO;
1784	return (0);
1785}
1786
1787/*
1788 * Activate a user pmap.  The pmap must be activated before its address space
1789 * can be accessed in any way.
1790 */
1791void
1792pmap_activate(struct thread *td)
1793{
1794	struct vmspace *vm;
1795	vm_offset_t tsb;
1796	u_long context;
1797	pmap_t pm;
1798
1799	/*
1800	 * Load all the data we need up front to encourage the compiler to
1801	 * not issue any loads while we have interrupts disable below.
1802	 */
1803	vm = td->td_proc->p_vmspace;
1804	pm = &vm->vm_pmap;
1805	tsb = (vm_offset_t)pm->pm_tsb;
1806
1807	KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?"));
1808	KASSERT(pm->pm_context[PCPU_GET(cpuid)] != 0,
1809	    ("pmap_activate: activating nucleus context?"));
1810
1811	mtx_lock_spin(&sched_lock);
1812	wrpr(pstate, 0, PSTATE_MMU);
1813	mov(tsb, TSB_REG);
1814	wrpr(pstate, 0, PSTATE_KERNEL);
1815	context = pmap_context_alloc();
1816	pm->pm_context[PCPU_GET(cpuid)] = context;
1817	pm->pm_active |= PCPU_GET(cpumask);
1818	PCPU_SET(vmspace, vm);
1819	stxa(AA_DMMU_PCXR, ASI_DMMU, context);
1820	membar(Sync);
1821	mtx_unlock_spin(&sched_lock);
1822}
1823
1824vm_offset_t
1825pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
1826{
1827
1828	return (va);
1829}
1830