pmap.c revision 91403
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *      This product includes software developed by the University of
24 *      California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
42 * $FreeBSD: head/sys/sparc64/sparc64/pmap.c 91403 2002-02-27 18:03:02Z silby $
43 */
44
45/*
46 * Manages physical address maps.
47 *
48 * In addition to hardware address maps, this module is called upon to
49 * provide software-use-only maps which may or may not be stored in the
50 * same form as hardware maps.  These pseudo-maps are used to store
51 * intermediate results from copy operations to and from address spaces.
52 *
53 * Since the information managed by this module is also stored by the
54 * logical address mapping module, this module may throw away valid virtual
55 * to physical mappings at almost any time.  However, invalidations of
56 * mappings must be done as requested.
57 *
58 * In order to cope with hardware architectures which make virtual to
59 * physical map invalidates expensive, this module may delay invalidate
60 * reduced protection operations until such time as they are actually
61 * necessary.  This module is given full information as to which processors
62 * are currently using which maps, and to when physical maps must be made
63 * correct.
64 */
65
66#include "opt_msgbuf.h"
67#include "opt_pmap.h"
68
69#include <sys/param.h>
70#include <sys/kernel.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/msgbuf.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/sysctl.h>
77#include <sys/systm.h>
78#include <sys/vmmeter.h>
79
80#include <dev/ofw/openfirm.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_kern.h>
85#include <vm/vm_page.h>
86#include <vm/vm_map.h>
87#include <vm/vm_object.h>
88#include <vm/vm_extern.h>
89#include <vm/vm_pageout.h>
90#include <vm/vm_pager.h>
91#include <vm/vm_zone.h>
92
93#include <machine/cache.h>
94#include <machine/frame.h>
95#include <machine/md_var.h>
96#include <machine/pv.h>
97#include <machine/tlb.h>
98#include <machine/tte.h>
99#include <machine/tsb.h>
100
101#define	PMAP_DEBUG
102
103#ifndef	PMAP_SHPGPERPROC
104#define	PMAP_SHPGPERPROC	200
105#endif
106
107struct mem_region {
108	vm_offset_t mr_start;
109	vm_offset_t mr_size;
110};
111
112struct ofw_map {
113	vm_offset_t om_start;
114	vm_offset_t om_size;
115	u_long	om_tte;
116};
117
118/*
119 * Virtual and physical address of message buffer.
120 */
121struct msgbuf *msgbufp;
122vm_offset_t msgbuf_phys;
123
124/*
125 * Physical addresses of first and last available physical page.
126 */
127vm_offset_t avail_start;
128vm_offset_t avail_end;
129
130int pmap_pagedaemon_waken;
131
132/*
133 * Map of physical memory reagions.
134 */
135vm_offset_t phys_avail[128];
136static struct mem_region mra[128];
137static struct ofw_map translations[128];
138static int translations_size;
139
140/*
141 * First and last available kernel virtual addresses.
142 */
143vm_offset_t virtual_avail;
144vm_offset_t virtual_end;
145vm_offset_t kernel_vm_end;
146
147/*
148 * The locked kernel page the kernel binary was loaded into. This will need
149 * to become a list later.
150 */
151vm_offset_t kernel_page;
152
153/*
154 * Kernel pmap.
155 */
156struct pmap kernel_pmap_store;
157
158/*
159 * Map of free and in use hardware contexts and index of first potentially
160 * free context.
161 */
162static char pmap_context_map[PMAP_CONTEXT_MAX];
163static u_int pmap_context_base;
164
165static boolean_t pmap_initialized = FALSE;
166
167/* Convert a tte data field into a page mask */
168static vm_offset_t pmap_page_masks[] = {
169	PAGE_MASK_8K,
170	PAGE_MASK_64K,
171	PAGE_MASK_512K,
172	PAGE_MASK_4M
173};
174
175#define	PMAP_TD_GET_MASK(d)	pmap_page_masks[TD_GET_SIZE((d))]
176
177/*
178 * Allocate and free hardware context numbers.
179 */
180static u_int pmap_context_alloc(void);
181static void pmap_context_destroy(u_int i);
182
183/*
184 * Allocate physical memory for use in pmap_bootstrap.
185 */
186static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
187
188/*
189 * If user pmap is processed with pmap_remove and with pmap_remove and the
190 * resident count drops to 0, there are no more pages to remove, so we
191 * need not continue.
192 */
193#define	PMAP_REMOVE_DONE(pm) \
194	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
195
196/*
197 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
198 * and pmap_protect() instead of trying each virtual address.
199 */
200#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
201
202/* Callbacks for tsb_foreach. */
203static tsb_callback_t pmap_remove_tte;
204static tsb_callback_t pmap_protect_tte;
205
206#ifdef PMAP_STATS
207static long pmap_enter_nupdate;
208static long pmap_enter_nreplace;
209static long pmap_enter_nnew;
210static long pmap_ncache_enter;
211static long pmap_ncache_enter_nc;
212static long pmap_niflush;
213
214SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "Statistics");
215SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nupdate, CTLFLAG_RD,
216    &pmap_enter_nupdate, 0, "Number of pmap_enter() updates");
217SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nreplace, CTLFLAG_RD,
218    &pmap_enter_nreplace, 0, "Number of pmap_enter() replacements");
219SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_enter_nnew, CTLFLAG_RD,
220    &pmap_enter_nnew, 0, "Number of pmap_enter() additions");
221SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter, CTLFLAG_RD,
222    &pmap_ncache_enter, 0, "Number of pmap_cache_enter() calls");
223SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_ncache_enter_nc, CTLFLAG_RD,
224    &pmap_ncache_enter_nc, 0, "Number of pmap_cache_enter() nc");
225SYSCTL_LONG(_debug_pmap_stats, OID_AUTO, pmap_niflush, CTLFLAG_RD,
226    &pmap_niflush, 0, "Number of pmap I$ flushes");
227
228#define	PMAP_STATS_INC(var)	atomic_add_long(&var, 1)
229#else
230#define	PMAP_STATS_INC(var)
231#endif
232
233/*
234 * Quick sort callout for comparing memory regions.
235 */
236static int mr_cmp(const void *a, const void *b);
237static int om_cmp(const void *a, const void *b);
238static int
239mr_cmp(const void *a, const void *b)
240{
241	const struct mem_region *mra;
242	const struct mem_region *mrb;
243
244	mra = a;
245	mrb = b;
246	if (mra->mr_start < mrb->mr_start)
247		return (-1);
248	else if (mra->mr_start > mrb->mr_start)
249		return (1);
250	else
251		return (0);
252}
253static int
254om_cmp(const void *a, const void *b)
255{
256	const struct ofw_map *oma;
257	const struct ofw_map *omb;
258
259	oma = a;
260	omb = b;
261	if (oma->om_start < omb->om_start)
262		return (-1);
263	else if (oma->om_start > omb->om_start)
264		return (1);
265	else
266		return (0);
267}
268
269/*
270 * Bootstrap the system enough to run with virtual memory.
271 */
272void
273pmap_bootstrap(vm_offset_t ekva)
274{
275	struct pmap *pm;
276	struct tte tte;
277	struct tte *tp;
278	vm_offset_t off;
279	vm_offset_t pa;
280	vm_offset_t va;
281	vm_size_t physsz;
282	ihandle_t pmem;
283	ihandle_t vmem;
284	int sz;
285	int i;
286	int j;
287
288	/*
289	 * Set the start and end of kva.  The kernel is loaded at the first
290	 * available 4 meg super page, so round up to the end of the page.
291	 */
292	virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
293	virtual_end = VM_MAX_KERNEL_ADDRESS;
294
295	/* Look up the page the kernel binary was loaded into. */
296	kernel_page = TD_GET_PA(ldxa(TLB_DAR_SLOT(TLB_SLOT_KERNEL),
297	    ASI_DTLB_DATA_ACCESS_REG));
298
299	/*
300	 * Find out what physical memory is available from the prom and
301	 * initialize the phys_avail array.  This must be done before
302	 * pmap_bootstrap_alloc is called.
303	 */
304	if ((pmem = OF_finddevice("/memory")) == -1)
305		panic("pmap_bootstrap: finddevice /memory");
306	if ((sz = OF_getproplen(pmem, "available")) == -1)
307		panic("pmap_bootstrap: getproplen /memory/available");
308	if (sizeof(phys_avail) < sz)
309		panic("pmap_bootstrap: phys_avail too small");
310	if (sizeof(mra) < sz)
311		panic("pmap_bootstrap: mra too small");
312	bzero(mra, sz);
313	if (OF_getprop(pmem, "available", mra, sz) == -1)
314		panic("pmap_bootstrap: getprop /memory/available");
315	sz /= sizeof(*mra);
316	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
317	qsort(mra, sz, sizeof (*mra), mr_cmp);
318	physsz = 0;
319	for (i = 0, j = 0; i < sz; i++, j += 2) {
320		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
321		    mra[i].mr_size);
322		phys_avail[j] = mra[i].mr_start;
323		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
324		physsz += mra[i].mr_size;
325	}
326	physmem = btoc(physsz);
327
328	/*
329	 * Allocate the kernel tsb and lock it in the tlb.
330	 */
331	pa = pmap_bootstrap_alloc(KVA_PAGES * PAGE_SIZE_4M);
332	if (pa & PAGE_MASK_4M)
333		panic("pmap_bootstrap: tsb unaligned\n");
334	tsb_kernel_phys = pa;
335	tsb_kernel = (struct tte *)virtual_avail;
336	virtual_avail += KVA_PAGES * PAGE_SIZE_4M;
337	pmap_map_tsb();
338	bzero(tsb_kernel, KVA_PAGES * PAGE_SIZE_4M);
339
340	/*
341	 * Allocate a kernel stack with guard page for thread0 and map it into
342	 * the kernel tsb.
343	 */
344	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE);
345	kstack0_phys = pa;
346	kstack0 = virtual_avail + (KSTACK_GUARD_PAGES * PAGE_SIZE);
347	virtual_avail += (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE;
348	for (i = 0; i < KSTACK_PAGES; i++) {
349		pa = kstack0_phys + i * PAGE_SIZE;
350		va = kstack0 + i * PAGE_SIZE;
351		pmap_kenter(va, pa);
352		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
353	}
354
355	/*
356	 * Allocate the message buffer.
357	 */
358	msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
359
360	/*
361	 * Add the prom mappings to the kernel tsb.
362	 */
363	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
364		panic("pmap_bootstrap: finddevice /virtual-memory");
365	if ((sz = OF_getproplen(vmem, "translations")) == -1)
366		panic("pmap_bootstrap: getproplen translations");
367	if (sizeof(translations) < sz)
368		panic("pmap_bootstrap: translations too small");
369	bzero(translations, sz);
370	if (OF_getprop(vmem, "translations", translations, sz) == -1)
371		panic("pmap_bootstrap: getprop /virtual-memory/translations");
372	sz /= sizeof(*translations);
373	translations_size = sz;
374	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
375	qsort(translations, sz, sizeof (*translations), om_cmp);
376	for (i = 0; i < sz; i++) {
377		CTR4(KTR_PMAP,
378		    "translation: start=%#lx size=%#lx tte=%#lx pa=%#lx",
379		    translations[i].om_start, translations[i].om_size,
380		    translations[i].om_tte, TD_GET_PA(translations[i].om_tte));
381		if (translations[i].om_start < 0xf0000000)	/* XXX!!! */
382			continue;
383		for (off = 0; off < translations[i].om_size;
384		    off += PAGE_SIZE) {
385			va = translations[i].om_start + off;
386			tte.tte_data = translations[i].om_tte + off;
387			tte.tte_vpn = TV_VPN(va);
388			tp = tsb_kvtotte(va);
389			CTR4(KTR_PMAP,
390			    "mapping: va=%#lx tp=%p tte=%#lx pa=%#lx",
391			    va, tp, tte.tte_data, TD_GET_PA(tte.tte_data));
392			*tp = tte;
393		}
394	}
395
396	/*
397	 * Calculate the first and last available physical addresses.
398	 */
399	avail_start = phys_avail[0];
400	for (i = 0; phys_avail[i + 2] != 0; i += 2)
401		;
402	avail_end = phys_avail[i + 1];
403	Maxmem = sparc64_btop(avail_end);
404
405	/*
406	 * Allocate virtual address space for the message buffer.
407	 */
408	msgbufp = (struct msgbuf *)virtual_avail;
409	virtual_avail += round_page(MSGBUF_SIZE);
410
411	/*
412	 * Initialize the kernel pmap (which is statically allocated).
413	 */
414	pm = kernel_pmap;
415	for (i = 0; i < MAXCPU; i++)
416		pm->pm_context[i] = TLB_CTX_KERNEL;
417	pm->pm_active = ~0;
418	pm->pm_count = 1;
419	TAILQ_INIT(&pm->pm_pvlist);
420}
421
422void
423pmap_map_tsb(void)
424{
425	struct tte tte;
426	vm_offset_t va;
427	vm_offset_t pa;
428	int i;
429
430	/*
431	 * Map the 4mb tsb pages.
432	 */
433	for (i = 0; i < KVA_PAGES; i++) {
434		va = (vm_offset_t)tsb_kernel + i * PAGE_SIZE_4M;
435		pa = tsb_kernel_phys + i * PAGE_SIZE_4M;
436		tte.tte_vpn = TV_VPN(va);
437		tte.tte_data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP |
438		    TD_CV | TD_P | TD_W;
439		tlb_store_slot(TLB_DTLB, va, TLB_CTX_KERNEL, tte,
440		    TLB_SLOT_TSB_KERNEL_MIN + i);
441	}
442
443	/*
444	 * Load the tsb registers.
445	 */
446	stxa(AA_DMMU_TSB, ASI_DMMU, (vm_offset_t)tsb_kernel);
447	stxa(AA_IMMU_TSB, ASI_IMMU, (vm_offset_t)tsb_kernel);
448	membar(Sync);
449	flush(tsb_kernel);
450
451	/*
452	 * Set the secondary context to be the kernel context (needed for
453	 * fp block operations in the kernel and the cache code).
454	 */
455	stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL);
456	membar(Sync);
457}
458
459/*
460 * Allocate a physical page of memory directly from the phys_avail map.
461 * Can only be called from pmap_bootstrap before avail start and end are
462 * calculated.
463 */
464static vm_offset_t
465pmap_bootstrap_alloc(vm_size_t size)
466{
467	vm_offset_t pa;
468	int i;
469
470	size = round_page(size);
471	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
472		if (phys_avail[i + 1] - phys_avail[i] < size)
473			continue;
474		pa = phys_avail[i];
475		phys_avail[i] += size;
476		return (pa);
477	}
478	panic("pmap_bootstrap_alloc");
479}
480
481/*
482 * Initialize the pmap module.
483 */
484void
485pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
486{
487	vm_offset_t addr;
488	vm_size_t size;
489	int result;
490	int i;
491
492	for (i = 0; i < vm_page_array_size; i++) {
493		vm_page_t m;
494
495		m = &vm_page_array[i];
496		TAILQ_INIT(&m->md.pv_list);
497		m->md.pv_list_count = 0;
498	}
499
500	for (i = 0; i < translations_size; i++) {
501		addr = translations[i].om_start;
502		size = translations[i].om_size;
503		if (addr < 0xf0000000)	/* XXX */
504			continue;
505		result = vm_map_find(kernel_map, NULL, 0, &addr, size, TRUE,
506		    VM_PROT_ALL, VM_PROT_ALL, 0);
507		if (result != KERN_SUCCESS || addr != translations[i].om_start)
508			panic("pmap_init: vm_map_find");
509	}
510
511	pvzone = &pvzone_store;
512	pvinit = (struct pv_entry *)kmem_alloc(kernel_map,
513	    vm_page_array_size * sizeof (struct pv_entry));
514	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
515	    vm_page_array_size);
516	pmap_initialized = TRUE;
517}
518
519/*
520 * Initialize the address space (zone) for the pv_entries.  Set a
521 * high water mark so that the system can recover from excessive
522 * numbers of pv entries.
523 */
524void
525pmap_init2(void)
526{
527	int shpgperproc;
528
529	shpgperproc = PMAP_SHPGPERPROC;
530	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
531	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
532	pv_entry_high_water = 9 * (pv_entry_max / 10);
533	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
534}
535
536/*
537 * Extract the physical page address associated with the given
538 * map/virtual_address pair.
539 */
540vm_offset_t
541pmap_extract(pmap_t pm, vm_offset_t va)
542{
543	struct tte *tp;
544	u_long d;
545
546	if (pm == kernel_pmap)
547		return (pmap_kextract(va));
548	tp = tsb_tte_lookup(pm, va);
549	if (tp == NULL)
550		return (0);
551	else {
552		d = tp->tte_data;
553		return (TD_GET_PA(d) | (va & PMAP_TD_GET_MASK(d)));
554	}
555}
556
557/*
558 * Extract the physical page address associated with the given kernel virtual
559 * address.
560 */
561vm_offset_t
562pmap_kextract(vm_offset_t va)
563{
564	struct tte *tp;
565	u_long d;
566
567	if (va >= KERNBASE && va < KERNBASE + PAGE_SIZE_4M)
568		return (kernel_page + (va & PAGE_MASK_4M));
569	tp = tsb_kvtotte(va);
570	d = tp->tte_data;
571	if ((d & TD_V) == 0)
572		return (0);
573	return (TD_GET_PA(d) | (va & PMAP_TD_GET_MASK(d)));
574}
575
576int
577pmap_cache_enter(vm_page_t m, vm_offset_t va)
578{
579	struct tte *tp;
580	vm_offset_t pa;
581	pv_entry_t pv;
582	int c;
583	int i;
584
585	CTR2(KTR_PMAP, "pmap_cache_enter: m=%p va=%#lx", m, va);
586	PMAP_STATS_INC(pmap_ncache_enter);
587	for (i = 0, c = 0; i < DCACHE_COLORS; i++) {
588		if (i != DCACHE_COLOR(va))
589			c += m->md.colors[i];
590	}
591	m->md.colors[DCACHE_COLOR(va)]++;
592	if (c == 0) {
593		CTR0(KTR_PMAP, "pmap_cache_enter: cacheable");
594		return (1);
595	}
596	PMAP_STATS_INC(pmap_ncache_enter_nc);
597	if (c != 1) {
598		CTR0(KTR_PMAP, "pmap_cache_enter: already uncacheable");
599		return (0);
600	}
601	CTR0(KTR_PMAP, "pmap_cache_enter: marking uncacheable");
602	if ((m->flags & PG_UNMANAGED) != 0)
603		panic("pmap_cache_enter: non-managed page");
604	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
605		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) != NULL) {
606			atomic_clear_long(&tp->tte_data, TD_CV);
607			tlb_page_demap(TLB_DTLB | TLB_ITLB,
608			    pv->pv_pmap->pm_context[PCPU_GET(cpuid)],
609			    pv->pv_va);
610		}
611	}
612	pa = VM_PAGE_TO_PHYS(m);
613	dcache_inval_phys(pa, pa + PAGE_SIZE - 1);
614	return (0);
615}
616
617void
618pmap_cache_remove(vm_page_t m, vm_offset_t va)
619{
620
621	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
622	    m->md.colors[DCACHE_COLOR(va)]);
623	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
624	    ("pmap_cache_remove: no mappings %d <= 0",
625	    m->md.colors[DCACHE_COLOR(va)]));
626	m->md.colors[DCACHE_COLOR(va)]--;
627}
628
629/*
630 * Map a wired page into kernel virtual address space.
631 */
632void
633pmap_kenter(vm_offset_t va, vm_offset_t pa)
634{
635	struct tte tte;
636	struct tte *tp;
637
638	tte.tte_vpn = TV_VPN(va);
639	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
640	    TD_CV | TD_P | TD_W;
641	tp = tsb_kvtotte(va);
642	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
643	    va, pa, tp, tp->tte_data);
644	if ((tp->tte_data & TD_V) != 0)
645		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
646	*tp = tte;
647}
648
649/*
650 * Map a wired page into kernel virtual address space. This additionally
651 * takes a flag argument wich is or'ed to the TTE data. This is used by
652 * bus_space_map().
653 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
654 * to flush entries that might still be in the cache, if applicable.
655 */
656void
657pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags)
658{
659	struct tte tte;
660	struct tte *tp;
661
662	tte.tte_vpn = TV_VPN(va);
663	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
664	tp = tsb_kvtotte(va);
665	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
666	    va, pa, tp, tp->tte_data);
667	if ((tp->tte_data & TD_V) != 0)
668		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
669	*tp = tte;
670}
671
672/*
673 * Make a temporary mapping for a physical address.  This is only intended
674 * to be used for panic dumps.
675 */
676void *
677pmap_kenter_temporary(vm_offset_t pa, int i)
678{
679
680	TODO;
681}
682
683/*
684 * Remove a wired page from kernel virtual address space.
685 */
686void
687pmap_kremove(vm_offset_t va)
688{
689	struct tte *tp;
690
691	tp = tsb_kvtotte(va);
692	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
693	    tp->tte_data);
694	atomic_clear_long(&tp->tte_data, TD_V);
695	tp->tte_vpn = 0;
696	tp->tte_data = 0;
697	tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, va);
698}
699
700/*
701 * Map a range of physical addresses into kernel virtual address space.
702 *
703 * The value passed in *virt is a suggested virtual address for the mapping.
704 * Architectures which can support a direct-mapped physical to virtual region
705 * can return the appropriate address within that region, leaving '*virt'
706 * unchanged.  We cannot and therefore do not; *virt is updated with the
707 * first usable address after the mapped region.
708 */
709vm_offset_t
710pmap_map(vm_offset_t *virt, vm_offset_t pa_start, vm_offset_t pa_end, int prot)
711{
712	vm_offset_t sva;
713	vm_offset_t va;
714	vm_offset_t pa;
715
716	pa = pa_start;
717	sva = *virt;
718	va = sva;
719	for (; pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE)
720		pmap_kenter(va, pa);
721	tlb_range_demap(TLB_CTX_KERNEL, sva, sva + (pa_end - pa_start) - 1);
722	*virt = va;
723	return (sva);
724}
725
726/*
727 * Map a list of wired pages into kernel virtual address space.  This is
728 * intended for temporary mappings which do not need page modification or
729 * references recorded.  Existing mappings in the region are overwritten.
730 */
731void
732pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
733{
734	vm_offset_t va;
735	int i;
736
737	va = sva;
738	for (i = 0; i < count; i++, va += PAGE_SIZE)
739		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
740	tlb_range_demap(TLB_CTX_KERNEL, sva, sva + (count * PAGE_SIZE) - 1);
741}
742
743/*
744 * As above, but take an additional flags argument and call
745 * pmap_kenter_flags().
746 */
747void
748pmap_qenter_flags(vm_offset_t sva, vm_page_t *m, int count, u_long fl)
749{
750	vm_offset_t va;
751	int i;
752
753	va = sva;
754	for (i = 0; i < count; i++, va += PAGE_SIZE)
755		pmap_kenter_flags(va, VM_PAGE_TO_PHYS(m[i]), fl);
756	tlb_range_demap(TLB_CTX_KERNEL, sva, sva + (count * PAGE_SIZE) - 1);
757}
758
759/*
760 * Remove page mappings from kernel virtual address space.  Intended for
761 * temporary mappings entered by pmap_qenter.
762 */
763void
764pmap_qremove(vm_offset_t sva, int count)
765{
766	vm_offset_t va;
767	int i;
768
769	va = sva;
770	for (i = 0; i < count; i++, va += PAGE_SIZE)
771		pmap_kremove(va);
772	tlb_range_demap(TLB_CTX_KERNEL, sva, sva + (count * PAGE_SIZE) - 1);
773}
774
775/*
776 * Create the uarea for a new process.
777 * This routine directly affects the fork perf for a process.
778 */
779void
780pmap_new_proc(struct proc *p)
781{
782	vm_page_t ma[UAREA_PAGES];
783	vm_object_t upobj;
784	vm_offset_t up;
785	vm_page_t m;
786	u_int i;
787
788	/*
789	 * Allocate object for the upages.
790	 */
791	upobj = p->p_upages_obj;
792	if (upobj == NULL) {
793		upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES);
794		p->p_upages_obj = upobj;
795	}
796
797	/*
798	 * Get a kernel virtual address for the U area for this process.
799	 */
800	up = (vm_offset_t)p->p_uarea;
801	if (up == 0) {
802		up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE);
803		if (up == 0)
804			panic("pmap_new_proc: upage allocation failed");
805		p->p_uarea = (struct user *)up;
806	}
807
808	for (i = 0; i < UAREA_PAGES; i++) {
809		/*
810		 * Get a uarea page.
811		 */
812		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
813		ma[i] = m;
814
815		/*
816		 * Wire the page.
817		 */
818		m->wire_count++;
819		cnt.v_wire_count++;
820
821		vm_page_wakeup(m);
822		vm_page_flag_clear(m, PG_ZERO);
823		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
824		m->valid = VM_PAGE_BITS_ALL;
825	}
826
827	/*
828	 * Enter the pages into the kernel address space.
829	 */
830	pmap_qenter(up, ma, UAREA_PAGES);
831}
832
833/*
834 * Dispose the uarea for a process that has exited.
835 * This routine directly impacts the exit perf of a process.
836 */
837void
838pmap_dispose_proc(struct proc *p)
839{
840	vm_object_t upobj;
841	vm_offset_t up;
842	vm_page_t m;
843	int i;
844
845	upobj = p->p_upages_obj;
846	up = (vm_offset_t)p->p_uarea;
847	for (i = 0; i < UAREA_PAGES; i++) {
848		m = vm_page_lookup(upobj, i);
849		if (m == NULL)
850			panic("pmap_dispose_proc: upage already missing?");
851		vm_page_busy(m);
852		vm_page_unwire(m, 0);
853		vm_page_free(m);
854	}
855	pmap_qremove(up, UAREA_PAGES);
856}
857
858/*
859 * Allow the uarea for a process to be prejudicially paged out.
860 */
861void
862pmap_swapout_proc(struct proc *p)
863{
864	vm_object_t upobj;
865	vm_offset_t up;
866	vm_page_t m;
867	int i;
868
869	upobj = p->p_upages_obj;
870	up = (vm_offset_t)p->p_uarea;
871	for (i = 0; i < UAREA_PAGES; i++) {
872		m = vm_page_lookup(upobj, i);
873		if (m == NULL)
874			panic("pmap_swapout_proc: upage already missing?");
875		vm_page_dirty(m);
876		vm_page_unwire(m, 0);
877	}
878	pmap_qremove(up, UAREA_PAGES);
879}
880
881/*
882 * Bring the uarea for a specified process back in.
883 */
884void
885pmap_swapin_proc(struct proc *p)
886{
887	vm_page_t ma[UAREA_PAGES];
888	vm_object_t upobj;
889	vm_offset_t up;
890	vm_page_t m;
891	int rv;
892	int i;
893
894	upobj = p->p_upages_obj;
895	up = (vm_offset_t)p->p_uarea;
896	for (i = 0; i < UAREA_PAGES; i++) {
897		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
898		if (m->valid != VM_PAGE_BITS_ALL) {
899			rv = vm_pager_get_pages(upobj, &m, 1, 0);
900			if (rv != VM_PAGER_OK)
901				panic("pmap_swapin_proc: cannot get upage");
902			m = vm_page_lookup(upobj, i);
903			m->valid = VM_PAGE_BITS_ALL;
904		}
905		ma[i] = m;
906		vm_page_wire(m);
907		vm_page_wakeup(m);
908		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
909	}
910	pmap_qenter(up, ma, UAREA_PAGES);
911}
912
913/*
914 * Create the kernel stack and pcb for a new thread.
915 * This routine directly affects the fork perf for a process and
916 * create performance for a thread.
917 */
918void
919pmap_new_thread(struct thread *td)
920{
921	vm_page_t ma[KSTACK_PAGES];
922	vm_object_t ksobj;
923	vm_offset_t ks;
924	vm_page_t m;
925	u_int i;
926
927	/*
928	 * Allocate object for the kstack,
929	 */
930	ksobj = td->td_kstack_obj;
931	if (ksobj == NULL) {
932		ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES);
933		td->td_kstack_obj = ksobj;
934	}
935
936	/*
937	 * Get a kernel virtual address for the kstack for this thread.
938	 */
939	ks = td->td_kstack;
940	if (ks == 0) {
941		ks = kmem_alloc_nofault(kernel_map,
942		   (KSTACK_PAGES + KSTACK_GUARD_PAGES) * PAGE_SIZE);
943		if (ks == 0)
944			panic("pmap_new_thread: kstack allocation failed");
945		tlb_page_demap(TLB_DTLB, TLB_CTX_KERNEL, ks);
946		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
947		td->td_kstack = ks;
948	}
949
950	for (i = 0; i < KSTACK_PAGES; i++) {
951		/*
952		 * Get a kernel stack page.
953		 */
954		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
955		ma[i] = m;
956
957		/*
958		 * Wire the page.
959		 */
960		m->wire_count++;
961		cnt.v_wire_count++;
962
963		vm_page_wakeup(m);
964		vm_page_flag_clear(m, PG_ZERO);
965		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
966		m->valid = VM_PAGE_BITS_ALL;
967	}
968
969	/*
970	 * Enter the page into the kernel address space.
971	 */
972	pmap_qenter(ks, ma, KSTACK_PAGES);
973}
974
975/*
976 * Dispose the kernel stack for a thread that has exited.
977 * This routine directly impacts the exit perf of a process and thread.
978 */
979void
980pmap_dispose_thread(struct thread *td)
981{
982	vm_object_t ksobj;
983	vm_offset_t ks;
984	vm_page_t m;
985	int i;
986
987	ksobj = td->td_kstack_obj;
988	ks = td->td_kstack;
989	for (i = 0; i < KSTACK_PAGES; i++) {
990		m = vm_page_lookup(ksobj, i);
991		if (m == NULL)
992			panic("pmap_dispose_proc: kstack already missing?");
993		vm_page_busy(m);
994		vm_page_unwire(m, 0);
995		vm_page_free(m);
996	}
997	pmap_qremove(ks, KSTACK_PAGES);
998}
999
1000/*
1001 * Allow the kernel stack for a thread to be prejudicially paged out.
1002 */
1003void
1004pmap_swapout_thread(struct thread *td)
1005{
1006	vm_object_t ksobj;
1007	vm_offset_t ks;
1008	vm_page_t m;
1009	int i;
1010
1011	ksobj = td->td_kstack_obj;
1012	ks = (vm_offset_t)td->td_kstack;
1013	for (i = 0; i < KSTACK_PAGES; i++) {
1014		m = vm_page_lookup(ksobj, i);
1015		if (m == NULL)
1016			panic("pmap_swapout_thread: kstack already missing?");
1017		vm_page_dirty(m);
1018		vm_page_unwire(m, 0);
1019	}
1020	pmap_qremove(ks, KSTACK_PAGES);
1021}
1022
1023/*
1024 * Bring the kernel stack for a specified thread back in.
1025 */
1026void
1027pmap_swapin_thread(struct thread *td)
1028{
1029	vm_page_t ma[KSTACK_PAGES];
1030	vm_object_t ksobj;
1031	vm_offset_t ks;
1032	vm_page_t m;
1033	int rv;
1034	int i;
1035
1036	ksobj = td->td_kstack_obj;
1037	ks = td->td_kstack;
1038	for (i = 0; i < KSTACK_PAGES; i++) {
1039		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1040		if (m->valid != VM_PAGE_BITS_ALL) {
1041			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
1042			if (rv != VM_PAGER_OK)
1043				panic("pmap_swapin_proc: cannot get kstack");
1044			m = vm_page_lookup(ksobj, i);
1045			m->valid = VM_PAGE_BITS_ALL;
1046		}
1047		ma[i] = m;
1048		vm_page_wire(m);
1049		vm_page_wakeup(m);
1050		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
1051	}
1052	pmap_qenter(ks, ma, KSTACK_PAGES);
1053}
1054
1055/*
1056 * Initialize the pmap associated with process 0.
1057 */
1058void
1059pmap_pinit0(pmap_t pm)
1060{
1061
1062	pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc();
1063	pm->pm_active = 0;
1064	pm->pm_count = 1;
1065	pm->pm_tsb = NULL;
1066	pm->pm_tsb_obj = NULL;
1067	TAILQ_INIT(&pm->pm_pvlist);
1068	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1069}
1070
1071/*
1072 * Initialize a preallocated and zeroed pmap structure, uch as one in a
1073 * vmspace structure.
1074 */
1075void
1076pmap_pinit(pmap_t pm)
1077{
1078	vm_page_t ma[TSB_PAGES];
1079	vm_page_t m;
1080	int i;
1081
1082	/*
1083	 * Allocate kva space for the tsb.
1084	 */
1085	if (pm->pm_tsb == NULL) {
1086		pm->pm_tsb = (struct tte *)kmem_alloc_pageable(kernel_map,
1087		    TSB_BSIZE);
1088	}
1089
1090	/*
1091	 * Allocate an object for it.
1092	 */
1093	if (pm->pm_tsb_obj == NULL)
1094		pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1095
1096	for (i = 0; i < TSB_PAGES; i++) {
1097		m = vm_page_grab(pm->pm_tsb_obj, i,
1098		    VM_ALLOC_RETRY | VM_ALLOC_ZERO);
1099		if ((m->flags & PG_ZERO) == 0)
1100			pmap_zero_page(VM_PAGE_TO_PHYS(m));
1101
1102		m->wire_count++;
1103		cnt.v_wire_count++;
1104
1105		vm_page_flag_clear(m, PG_MAPPED | PG_BUSY);
1106		m->valid = VM_PAGE_BITS_ALL;
1107
1108		ma[i] = m;
1109	}
1110	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1111
1112	pm->pm_active = 0;
1113	pm->pm_context[PCPU_GET(cpuid)] = pmap_context_alloc();
1114	pm->pm_count = 1;
1115	TAILQ_INIT(&pm->pm_pvlist);
1116	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1117}
1118
1119void
1120pmap_pinit2(pmap_t pmap)
1121{
1122	/* XXX: Remove this stub when no longer called */
1123}
1124
1125/*
1126 * Release any resources held by the given physical map.
1127 * Called when a pmap initialized by pmap_pinit is being released.
1128 * Should only be called if the map contains no valid mappings.
1129 */
1130void
1131pmap_release(pmap_t pm)
1132{
1133	vm_object_t obj;
1134	vm_page_t m;
1135
1136	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1137	    pm->pm_context[PCPU_GET(cpuid)], pm->pm_tsb);
1138	obj = pm->pm_tsb_obj;
1139	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1140	KASSERT(TAILQ_EMPTY(&pm->pm_pvlist),
1141	    ("pmap_release: leaking pv entries"));
1142	KASSERT(pmap_resident_count(pm) == 0,
1143	    ("pmap_release: resident pages %ld != 0",
1144	    pmap_resident_count(pm)));
1145	pmap_context_destroy(pm->pm_context[PCPU_GET(cpuid)]);
1146	TAILQ_FOREACH(m, &obj->memq, listq) {
1147		if (vm_page_sleep_busy(m, FALSE, "pmaprl"))
1148			continue;
1149		vm_page_busy(m);
1150		KASSERT(m->hold_count == 0,
1151		    ("pmap_release: freeing held tsb page"));
1152		m->wire_count--;
1153		cnt.v_wire_count--;
1154		vm_page_free_zero(m);
1155	}
1156	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1157}
1158
1159/*
1160 * Grow the number of kernel page table entries.  Unneeded.
1161 */
1162void
1163pmap_growkernel(vm_offset_t addr)
1164{
1165}
1166
1167/*
1168 * Retire the given physical map from service.  Pmaps are always allocated
1169 * as part of a larger structure, so this never happens.
1170 */
1171void
1172pmap_destroy(pmap_t pm)
1173{
1174	panic("pmap_destroy: unimplemented");
1175}
1176
1177/*
1178 * Add a reference to the specified pmap.
1179 */
1180void
1181pmap_reference(pmap_t pm)
1182{
1183	if (pm != NULL)
1184		pm->pm_count++;
1185}
1186
1187/*
1188 * This routine is very drastic, but can save the system
1189 * in a pinch.
1190 */
1191void
1192pmap_collect(void)
1193{
1194	static int warningdone;
1195	vm_page_t m;
1196	int i;
1197
1198	if (pmap_pagedaemon_waken == 0)
1199		return;
1200	if (warningdone++ < 5)
1201		printf("pmap_collect: collecting pv entries -- suggest"
1202		    "increasing PMAP_SHPGPERPROC\n");
1203	for (i = 0; i < vm_page_array_size; i++) {
1204		m = &vm_page_array[i];
1205		if (m->wire_count || m->hold_count || m->busy ||
1206		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1207			continue;
1208		pv_remove_all(m);
1209	}
1210	pmap_pagedaemon_waken = 0;
1211}
1212
1213static int
1214pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1215		vm_offset_t va)
1216{
1217	vm_page_t m;
1218
1219	m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
1220	if ((tp->tte_data & TD_PV) != 0) {
1221		if ((tp->tte_data & TD_W) != 0 &&
1222		    pmap_track_modified(pm, va))
1223			vm_page_dirty(m);
1224		if ((tp->tte_data & TD_REF) != 0)
1225			vm_page_flag_set(m, PG_REFERENCED);
1226		pv_remove(pm, m, va);
1227		pmap_cache_remove(m, va);
1228	}
1229	atomic_clear_long(&tp->tte_data, TD_V);
1230	tp->tte_vpn = 0;
1231	tp->tte_data = 0;
1232	if (PMAP_REMOVE_DONE(pm))
1233		return (0);
1234	return (1);
1235}
1236
1237/*
1238 * Remove the given range of addresses from the specified map.
1239 */
1240void
1241pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1242{
1243	struct tte *tp;
1244	vm_offset_t va;
1245
1246	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1247	    pm->pm_context[PCPU_GET(cpuid)], start, end);
1248	if (PMAP_REMOVE_DONE(pm))
1249		return;
1250	if (end - start > PMAP_TSB_THRESH) {
1251		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1252		tlb_context_demap(pm->pm_context[PCPU_GET(cpuid)]);
1253	} else {
1254		for (va = start; va < end; va += PAGE_SIZE) {
1255			if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1256				if (!pmap_remove_tte(pm, NULL, tp, va))
1257					break;
1258			}
1259		}
1260		tlb_range_demap(pm->pm_context[PCPU_GET(cpuid)],
1261		    start, end - 1);
1262	}
1263}
1264
1265static int
1266pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1267		 vm_offset_t va)
1268{
1269	vm_page_t m;
1270	u_long data;
1271
1272	data = tp->tte_data;
1273	if ((data & TD_PV) != 0) {
1274		m = PHYS_TO_VM_PAGE(TD_GET_PA(data));
1275		if ((data & TD_REF) != 0) {
1276			vm_page_flag_set(m, PG_REFERENCED);
1277			data &= ~TD_REF;
1278		}
1279		if ((data & TD_W) != 0 &&
1280		    pmap_track_modified(pm, va)) {
1281			vm_page_dirty(m);
1282		}
1283	}
1284
1285	data &= ~(TD_W | TD_SW);
1286	CTR2(KTR_PMAP, "pmap_protect: new=%#lx old=%#lx",
1287	    data, tp->tte_data);
1288	tp->tte_data = data;
1289	return (0);
1290}
1291
1292/*
1293 * Set the physical protection on the specified range of this map as requested.
1294 */
1295void
1296pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1297{
1298	vm_offset_t va;
1299	struct tte *tp;
1300
1301	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1302	    pm->pm_context[PCPU_GET(cpuid)], sva, eva, prot);
1303
1304	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1305	    ("pmap_protect: non current pmap"));
1306
1307	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1308		pmap_remove(pm, sva, eva);
1309		return;
1310	}
1311
1312	if (prot & VM_PROT_WRITE)
1313		return;
1314
1315	if (eva - sva > PMAP_TSB_THRESH) {
1316		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1317		tlb_context_demap(pm->pm_context[PCPU_GET(cpuid)]);
1318	} else {
1319		for (va = sva; va < eva; va += PAGE_SIZE) {
1320			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1321				pmap_protect_tte(pm, NULL, tp, va);
1322		}
1323		tlb_range_demap(pm->pm_context[PCPU_GET(cpuid)], sva, eva - 1);
1324	}
1325}
1326
1327/*
1328 * Map the given physical page at the specified virtual address in the
1329 * target pmap with the protection requested.  If specified the page
1330 * will be wired down.
1331 */
1332void
1333pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1334	   boolean_t wired)
1335{
1336	struct tte otte;
1337	struct tte tte;
1338	struct tte *tp;
1339	vm_offset_t pa;
1340	vm_page_t om;
1341
1342	pa = VM_PAGE_TO_PHYS(m);
1343	CTR6(KTR_PMAP,
1344	    "pmap_enter: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1345	    pm->pm_context[PCPU_GET(cpuid)], m, va, pa, prot, wired);
1346
1347	tte.tte_vpn = TV_VPN(va);
1348	tte.tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP;
1349
1350	/*
1351	 * If there is an existing mapping, and the physical address has not
1352	 * changed, must be protection or wiring change.
1353	 */
1354	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1355		otte = *tp;
1356		om = PHYS_TO_VM_PAGE(TD_GET_PA(otte.tte_data));
1357
1358		if (TD_GET_PA(otte.tte_data) == pa) {
1359			CTR0(KTR_PMAP, "pmap_enter: update");
1360			PMAP_STATS_INC(pmap_enter_nupdate);
1361
1362			/*
1363			 * Wiring change, just update stats.
1364			 */
1365			if (wired) {
1366				if ((otte.tte_data & TD_WIRED) == 0)
1367					pm->pm_stats.wired_count++;
1368			} else {
1369				if ((otte.tte_data & TD_WIRED) != 0)
1370					pm->pm_stats.wired_count--;
1371			}
1372
1373			if ((otte.tte_data & TD_CV) != 0)
1374				tte.tte_data |= TD_CV;
1375			if ((otte.tte_data & TD_REF) != 0)
1376				tte.tte_data |= TD_REF;
1377			if ((otte.tte_data & TD_PV) != 0) {
1378				KASSERT((m->flags &
1379				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1380				    ("pmap_enter: unmanaged pv page"));
1381				tte.tte_data |= TD_PV;
1382			}
1383			/*
1384			 * If we're turning off write protection, sense modify
1385			 * status and remove the old mapping.
1386			 */
1387			if ((prot & VM_PROT_WRITE) == 0 &&
1388			    (otte.tte_data & (TD_W | TD_SW)) != 0) {
1389				if ((otte.tte_data & TD_PV) != 0) {
1390					if (pmap_track_modified(pm, va))
1391						vm_page_dirty(m);
1392				}
1393				tlb_tte_demap(otte,
1394				    pm->pm_context[PCPU_GET(cpuid)]);
1395			}
1396		} else {
1397			CTR0(KTR_PMAP, "pmap_enter: replace");
1398			PMAP_STATS_INC(pmap_enter_nreplace);
1399
1400			/*
1401			 * Mapping has changed, invalidate old range.
1402			 */
1403			if (!wired && (otte.tte_data & TD_WIRED) != 0)
1404				pm->pm_stats.wired_count--;
1405
1406			/*
1407			 * Enter on the pv list if part of our managed memory.
1408			 */
1409			if ((otte.tte_data & TD_PV) != 0) {
1410				KASSERT((m->flags &
1411				    (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1412				    ("pmap_enter: unmanaged pv page"));
1413				if ((otte.tte_data & TD_REF) != 0)
1414					vm_page_flag_set(om, PG_REFERENCED);
1415				if ((otte.tte_data & TD_W) != 0 &&
1416				    pmap_track_modified(pm, va))
1417					vm_page_dirty(om);
1418				pv_remove(pm, om, va);
1419				pv_insert(pm, m, va);
1420				tte.tte_data |= TD_PV;
1421				pmap_cache_remove(om, va);
1422				if (pmap_cache_enter(m, va) != 0)
1423					tte.tte_data |= TD_CV;
1424			}
1425			tlb_tte_demap(otte, pm->pm_context[PCPU_GET(cpuid)]);
1426		}
1427	} else {
1428		CTR0(KTR_PMAP, "pmap_enter: new");
1429		PMAP_STATS_INC(pmap_enter_nnew);
1430
1431		/*
1432		 * Enter on the pv list if part of our managed memory.
1433		 */
1434		if (pmap_initialized &&
1435		    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1436			pv_insert(pm, m, va);
1437			tte.tte_data |= TD_PV;
1438			if (pmap_cache_enter(m, va) != 0)
1439				tte.tte_data |= TD_CV;
1440		}
1441
1442		/*
1443		 * Increment counters.
1444		 */
1445		if (wired)
1446			pm->pm_stats.wired_count++;
1447
1448	}
1449
1450	/*
1451	 * Now validate mapping with desired protection/wiring.
1452	 */
1453	if (wired) {
1454		tte.tte_data |= TD_REF | TD_WIRED;
1455		if ((prot & VM_PROT_WRITE) != 0)
1456			tte.tte_data |= TD_W;
1457	}
1458	if (pm->pm_context[PCPU_GET(cpuid)] == TLB_CTX_KERNEL)
1459		tte.tte_data |= TD_P;
1460	if (prot & VM_PROT_WRITE)
1461		tte.tte_data |= TD_SW;
1462	if (prot & VM_PROT_EXECUTE) {
1463		tte.tte_data |= TD_EXEC;
1464		PMAP_STATS_INC(pmap_niflush);
1465		icache_inval_phys(pa, pa + PAGE_SIZE - 1);
1466	}
1467
1468	if (tp != NULL)
1469		*tp = tte;
1470	else
1471		tsb_tte_enter(pm, m, va, tte);
1472}
1473
1474void
1475pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1476		    vm_pindex_t pindex, vm_size_t size, int limit)
1477{
1478	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1479	    ("pmap_object_init_pt: non current pmap"));
1480	/* XXX */
1481}
1482
1483void
1484pmap_prefault(pmap_t pm, vm_offset_t va, vm_map_entry_t entry)
1485{
1486	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1487	    ("pmap_prefault: non current pmap"));
1488	/* XXX */
1489}
1490
1491/*
1492 * Change the wiring attribute for a map/virtual-address pair.
1493 * The mapping must already exist in the pmap.
1494 */
1495void
1496pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1497{
1498	struct tte *tp;
1499
1500	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1501		if (wired) {
1502			if ((tp->tte_data & TD_WIRED) == 0)
1503				pm->pm_stats.wired_count++;
1504			tp->tte_data |= TD_WIRED;
1505		} else {
1506			if ((tp->tte_data & TD_WIRED) != 0)
1507				pm->pm_stats.wired_count--;
1508			tp->tte_data &= ~TD_WIRED;
1509		}
1510	}
1511}
1512
1513static int
1514pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp, vm_offset_t va)
1515{
1516	struct tte tte;
1517	vm_page_t m;
1518
1519	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1520		tte.tte_data = tp->tte_data &
1521		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1522		tte.tte_vpn = TV_VPN(va);
1523		m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
1524		if ((tp->tte_data & TD_PV) != 0) {
1525			KASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0,
1526			    ("pmap_enter: unmanaged pv page"));
1527			pv_insert(dst_pmap, m, va);
1528			tte.tte_data |= TD_PV;
1529			if (pmap_cache_enter(m, va) != 0)
1530				tte.tte_data |= TD_CV;
1531		}
1532		tsb_tte_enter(dst_pmap, m, va, tte);
1533	}
1534	return (1);
1535}
1536
1537void
1538pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1539	  vm_size_t len, vm_offset_t src_addr)
1540{
1541	struct tte *tp;
1542	vm_offset_t va;
1543
1544	if (dst_addr != src_addr)
1545		return;
1546	if (len > PMAP_TSB_THRESH) {
1547		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1548		    pmap_copy_tte);
1549		tlb_context_demap(dst_pmap->pm_context[PCPU_GET(cpuid)]);
1550	} else {
1551		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE) {
1552			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1553				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1554		}
1555		tlb_range_demap(dst_pmap->pm_context[PCPU_GET(cpuid)],
1556		    src_addr, src_addr + len - 1);
1557	}
1558}
1559
1560/*
1561 * Zero a page of physical memory by temporarily mapping it into the tlb.
1562 */
1563void
1564pmap_zero_page(vm_offset_t pa)
1565{
1566
1567	CTR1(KTR_PMAP, "pmap_zero_page: pa=%#lx", pa);
1568	dcache_inval_phys(pa, pa + PAGE_SIZE);
1569	aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1570}
1571
1572void
1573pmap_zero_page_area(vm_offset_t pa, int off, int size)
1574{
1575
1576	CTR3(KTR_PMAP, "pmap_zero_page_area: pa=%#lx off=%#x size=%#x",
1577	    pa, off, size);
1578	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1579	dcache_inval_phys(pa + off, pa + off + size);
1580	aszero(ASI_PHYS_USE_EC, pa + off, size);
1581}
1582
1583/*
1584 * Copy a page of physical memory by temporarily mapping it into the tlb.
1585 */
1586void
1587pmap_copy_page(vm_offset_t src, vm_offset_t dst)
1588{
1589
1590	CTR2(KTR_PMAP, "pmap_copy_page: src=%#lx dst=%#lx", src, dst);
1591	dcache_inval_phys(dst, dst + PAGE_SIZE);
1592	ascopy(ASI_PHYS_USE_EC, src, dst, PAGE_SIZE);
1593}
1594
1595/*
1596 * Make the specified page pageable (or not).  Unneeded.
1597 */
1598void
1599pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
1600	      boolean_t pageable)
1601{
1602}
1603
1604/*
1605 * Returns true if the pmap's pv is one of the first
1606 * 16 pvs linked to from this page.  This count may
1607 * be changed upwards or downwards in the future; it
1608 * is only necessary that true be returned for a small
1609 * subset of pmaps for proper page aging.
1610 */
1611boolean_t
1612pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1613{
1614
1615	if (m->flags & PG_FICTITIOUS)
1616		return (FALSE);
1617	return (pv_page_exists(pm, m));
1618}
1619
1620/*
1621 * Remove all pages from specified address space, this aids process exit
1622 * speeds.  This is much faster than pmap_remove n the case of running down
1623 * an entire address space.  Only works for the current pmap.
1624 */
1625void
1626pmap_remove_pages(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1627{
1628	struct tte *tp;
1629	pv_entry_t npv;
1630	pv_entry_t pv;
1631	vm_page_t m;
1632
1633	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
1634	    ("pmap_remove_pages: non current pmap"));
1635	npv = NULL;
1636	for (pv = TAILQ_FIRST(&pm->pm_pvlist); pv != NULL; pv = npv) {
1637		npv = TAILQ_NEXT(pv, pv_plist);
1638		if (pv->pv_va >= eva || pv->pv_va < sva)
1639			continue;
1640		if ((tp = tsb_tte_lookup(pv->pv_pmap, pv->pv_va)) == NULL)
1641			continue;
1642
1643		/*
1644		 * We cannot remove wired pages at this time.
1645		 */
1646		if ((tp->tte_data & TD_WIRED) != 0)
1647			continue;
1648
1649		atomic_clear_long(&tp->tte_data, TD_V);
1650		tp->tte_vpn = 0;
1651		tp->tte_data = 0;
1652
1653		m = pv->pv_m;
1654
1655		pv->pv_pmap->pm_stats.resident_count--;
1656		m->md.pv_list_count--;
1657		pmap_cache_remove(m, pv->pv_va);
1658		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1659		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1660		if (TAILQ_EMPTY(&m->md.pv_list))
1661			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1662		pv_free(pv);
1663	}
1664	tlb_context_demap(pm->pm_context[PCPU_GET(cpuid)]);
1665}
1666
1667/*
1668 * Lower the permission for all mappings to a given page.
1669 */
1670void
1671pmap_page_protect(vm_page_t m, vm_prot_t prot)
1672{
1673
1674	if ((prot & VM_PROT_WRITE) == 0) {
1675		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1676			pv_bit_clear(m, TD_W | TD_SW);
1677		else
1678			pv_remove_all(m);
1679	}
1680}
1681
1682vm_offset_t
1683pmap_phys_address(int ppn)
1684{
1685
1686	return (sparc64_ptob(ppn));
1687}
1688
1689/*
1690 *	pmap_ts_referenced:
1691 *
1692 *	Return a count of reference bits for a page, clearing those bits.
1693 *	It is not necessary for every reference bit to be cleared, but it
1694 *	is necessary that 0 only be returned when there are truly no
1695 *	reference bits set.
1696 *
1697 *	XXX: The exact number of bits to check and clear is a matter that
1698 *	should be tested and standardized at some point in the future for
1699 *	optimal aging of shared pages.
1700 */
1701
1702int
1703pmap_ts_referenced(vm_page_t m)
1704{
1705
1706	if (m->flags & PG_FICTITIOUS)
1707		return (0);
1708	return (pv_bit_count(m, TD_REF));
1709}
1710
1711boolean_t
1712pmap_is_modified(vm_page_t m)
1713{
1714
1715	if (m->flags & PG_FICTITIOUS)
1716		return FALSE;
1717	return (pv_bit_test(m, TD_W));
1718}
1719
1720void
1721pmap_clear_modify(vm_page_t m)
1722{
1723
1724	if (m->flags & PG_FICTITIOUS)
1725		return;
1726	pv_bit_clear(m, TD_W);
1727}
1728
1729void
1730pmap_clear_reference(vm_page_t m)
1731{
1732
1733	if (m->flags & PG_FICTITIOUS)
1734		return;
1735	pv_bit_clear(m, TD_REF);
1736}
1737
1738int
1739pmap_mincore(pmap_t pm, vm_offset_t addr)
1740{
1741	TODO;
1742	return (0);
1743}
1744
1745/*
1746 * Activate a user pmap.  The pmap must be activated before its address space
1747 * can be accessed in any way.
1748 */
1749void
1750pmap_activate(struct thread *td)
1751{
1752	vm_offset_t tsb;
1753	u_long context;
1754	pmap_t pm;
1755
1756	/*
1757	 * Load all the data we need up front to encourage the compiler to
1758	 * not issue any loads while we have interrupts disable below.
1759	 */
1760	pm = &td->td_proc->p_vmspace->vm_pmap;
1761	context = pm->pm_context[PCPU_GET(cpuid)];
1762	tsb = (vm_offset_t)pm->pm_tsb;
1763
1764	KASSERT(context != 0, ("pmap_activate: activating nucleus context"));
1765	KASSERT(context != -1, ("pmap_activate: steal context"));
1766	KASSERT(pm->pm_active == 0, ("pmap_activate: pmap already active?"));
1767
1768	wrpr(pstate, 0, PSTATE_MMU);
1769	mov(tsb, TSB_REG);
1770	wrpr(pstate, 0, PSTATE_NORMAL);
1771	pm->pm_active |= 1 << PCPU_GET(cpuid);
1772	stxa(AA_DMMU_PCXR, ASI_DMMU, context);
1773	membar(Sync);
1774	wrpr(pstate, 0, PSTATE_KERNEL);
1775}
1776
1777vm_offset_t
1778pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
1779{
1780
1781	return (va);
1782}
1783
1784/*
1785 * Allocate a hardware context number from the context map.
1786 */
1787static u_int
1788pmap_context_alloc(void)
1789{
1790	u_int i;
1791
1792	i = pmap_context_base;
1793	do {
1794		if (pmap_context_map[i] == 0) {
1795			pmap_context_map[i] = 1;
1796			pmap_context_base = (i + 1) & (PMAP_CONTEXT_MAX - 1);
1797			return (i);
1798		}
1799	} while ((i = (i + 1) & (PMAP_CONTEXT_MAX - 1)) != pmap_context_base);
1800	panic("pmap_context_alloc");
1801}
1802
1803/*
1804 * Free a hardware context number back to the context map.
1805 */
1806static void
1807pmap_context_destroy(u_int i)
1808{
1809
1810	pmap_context_map[i] = 0;
1811}
1812