pmap.c revision 242534
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/pmap.c 242534 2012-11-03 23:03:14Z attilio $");
42
43/*
44 * Manages physical address maps.
45 *
46 * Since the information managed by this module is also stored by the
47 * logical address mapping module, this module may throw away valid virtual
48 * to physical mappings at almost any time.  However, invalidations of
49 * mappings must be done as requested.
50 *
51 * In order to cope with hardware architectures which make virtual to
52 * physical map invalidates expensive, this module may delay invalidate
53 * reduced protection operations until such time as they are actually
54 * necessary.  This module is given full information as to which processors
55 * are currently using which maps, and to when physical maps must be made
56 * correct.
57 */
58
59#include "opt_kstack_pages.h"
60#include "opt_pmap.h"
61
62#include <sys/param.h>
63#include <sys/kernel.h>
64#include <sys/ktr.h>
65#include <sys/lock.h>
66#include <sys/msgbuf.h>
67#include <sys/mutex.h>
68#include <sys/proc.h>
69#include <sys/rwlock.h>
70#include <sys/smp.h>
71#include <sys/sysctl.h>
72#include <sys/systm.h>
73#include <sys/vmmeter.h>
74
75#include <dev/ofw/openfirm.h>
76
77#include <vm/vm.h>
78#include <vm/vm_param.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_map.h>
82#include <vm/vm_object.h>
83#include <vm/vm_extern.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_pager.h>
86
87#include <machine/cache.h>
88#include <machine/frame.h>
89#include <machine/instr.h>
90#include <machine/md_var.h>
91#include <machine/metadata.h>
92#include <machine/ofw_mem.h>
93#include <machine/smp.h>
94#include <machine/tlb.h>
95#include <machine/tte.h>
96#include <machine/tsb.h>
97#include <machine/ver.h>
98
99/*
100 * Virtual address of message buffer
101 */
102struct msgbuf *msgbufp;
103
104/*
105 * Map of physical memory reagions
106 */
107vm_paddr_t phys_avail[128];
108static struct ofw_mem_region mra[128];
109struct ofw_mem_region sparc64_memreg[128];
110int sparc64_nmemreg;
111static struct ofw_map translations[128];
112static int translations_size;
113
114static vm_offset_t pmap_idle_map;
115static vm_offset_t pmap_temp_map_1;
116static vm_offset_t pmap_temp_map_2;
117
118/*
119 * First and last available kernel virtual addresses
120 */
121vm_offset_t virtual_avail;
122vm_offset_t virtual_end;
123vm_offset_t kernel_vm_end;
124
125vm_offset_t vm_max_kernel_address;
126
127/*
128 * Kernel pmap
129 */
130struct pmap kernel_pmap_store;
131
132struct rwlock_padalign tte_list_global_lock;
133
134/*
135 * Allocate physical memory for use in pmap_bootstrap.
136 */
137static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
138
139static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
140static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
141static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
142    struct tte *tp, vm_offset_t va);
143
144/*
145 * Map the given physical page at the specified virtual address in the
146 * target pmap with the protection requested.  If specified the page
147 * will be wired down.
148 *
149 * The page queues and pmap must be locked.
150 */
151static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
152    vm_prot_t prot, boolean_t wired);
153
154extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
155extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
156extern int tl1_dmmu_miss_patch_asi_1[];
157extern int tl1_dmmu_miss_patch_quad_ldd_1[];
158extern int tl1_dmmu_miss_patch_tsb_1[];
159extern int tl1_dmmu_miss_patch_tsb_2[];
160extern int tl1_dmmu_miss_patch_tsb_mask_1[];
161extern int tl1_dmmu_miss_patch_tsb_mask_2[];
162extern int tl1_dmmu_prot_patch_asi_1[];
163extern int tl1_dmmu_prot_patch_quad_ldd_1[];
164extern int tl1_dmmu_prot_patch_tsb_1[];
165extern int tl1_dmmu_prot_patch_tsb_2[];
166extern int tl1_dmmu_prot_patch_tsb_mask_1[];
167extern int tl1_dmmu_prot_patch_tsb_mask_2[];
168extern int tl1_immu_miss_patch_asi_1[];
169extern int tl1_immu_miss_patch_quad_ldd_1[];
170extern int tl1_immu_miss_patch_tsb_1[];
171extern int tl1_immu_miss_patch_tsb_2[];
172extern int tl1_immu_miss_patch_tsb_mask_1[];
173extern int tl1_immu_miss_patch_tsb_mask_2[];
174
175/*
176 * If user pmap is processed with pmap_remove and with pmap_remove and the
177 * resident count drops to 0, there are no more pages to remove, so we
178 * need not continue.
179 */
180#define	PMAP_REMOVE_DONE(pm) \
181	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
182
183/*
184 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
185 * and pmap_protect() instead of trying each virtual address.
186 */
187#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
188
189SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
190
191PMAP_STATS_VAR(pmap_nenter);
192PMAP_STATS_VAR(pmap_nenter_update);
193PMAP_STATS_VAR(pmap_nenter_replace);
194PMAP_STATS_VAR(pmap_nenter_new);
195PMAP_STATS_VAR(pmap_nkenter);
196PMAP_STATS_VAR(pmap_nkenter_oc);
197PMAP_STATS_VAR(pmap_nkenter_stupid);
198PMAP_STATS_VAR(pmap_nkremove);
199PMAP_STATS_VAR(pmap_nqenter);
200PMAP_STATS_VAR(pmap_nqremove);
201PMAP_STATS_VAR(pmap_ncache_enter);
202PMAP_STATS_VAR(pmap_ncache_enter_c);
203PMAP_STATS_VAR(pmap_ncache_enter_oc);
204PMAP_STATS_VAR(pmap_ncache_enter_cc);
205PMAP_STATS_VAR(pmap_ncache_enter_coc);
206PMAP_STATS_VAR(pmap_ncache_enter_nc);
207PMAP_STATS_VAR(pmap_ncache_enter_cnc);
208PMAP_STATS_VAR(pmap_ncache_remove);
209PMAP_STATS_VAR(pmap_ncache_remove_c);
210PMAP_STATS_VAR(pmap_ncache_remove_oc);
211PMAP_STATS_VAR(pmap_ncache_remove_cc);
212PMAP_STATS_VAR(pmap_ncache_remove_coc);
213PMAP_STATS_VAR(pmap_ncache_remove_nc);
214PMAP_STATS_VAR(pmap_nzero_page);
215PMAP_STATS_VAR(pmap_nzero_page_c);
216PMAP_STATS_VAR(pmap_nzero_page_oc);
217PMAP_STATS_VAR(pmap_nzero_page_nc);
218PMAP_STATS_VAR(pmap_nzero_page_area);
219PMAP_STATS_VAR(pmap_nzero_page_area_c);
220PMAP_STATS_VAR(pmap_nzero_page_area_oc);
221PMAP_STATS_VAR(pmap_nzero_page_area_nc);
222PMAP_STATS_VAR(pmap_nzero_page_idle);
223PMAP_STATS_VAR(pmap_nzero_page_idle_c);
224PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
225PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
226PMAP_STATS_VAR(pmap_ncopy_page);
227PMAP_STATS_VAR(pmap_ncopy_page_c);
228PMAP_STATS_VAR(pmap_ncopy_page_oc);
229PMAP_STATS_VAR(pmap_ncopy_page_nc);
230PMAP_STATS_VAR(pmap_ncopy_page_dc);
231PMAP_STATS_VAR(pmap_ncopy_page_doc);
232PMAP_STATS_VAR(pmap_ncopy_page_sc);
233PMAP_STATS_VAR(pmap_ncopy_page_soc);
234
235PMAP_STATS_VAR(pmap_nnew_thread);
236PMAP_STATS_VAR(pmap_nnew_thread_oc);
237
238static inline u_long dtlb_get_data(u_int tlb, u_int slot);
239
240/*
241 * Quick sort callout for comparing memory regions
242 */
243static int mr_cmp(const void *a, const void *b);
244static int om_cmp(const void *a, const void *b);
245
246static int
247mr_cmp(const void *a, const void *b)
248{
249	const struct ofw_mem_region *mra;
250	const struct ofw_mem_region *mrb;
251
252	mra = a;
253	mrb = b;
254	if (mra->mr_start < mrb->mr_start)
255		return (-1);
256	else if (mra->mr_start > mrb->mr_start)
257		return (1);
258	else
259		return (0);
260}
261
262static int
263om_cmp(const void *a, const void *b)
264{
265	const struct ofw_map *oma;
266	const struct ofw_map *omb;
267
268	oma = a;
269	omb = b;
270	if (oma->om_start < omb->om_start)
271		return (-1);
272	else if (oma->om_start > omb->om_start)
273		return (1);
274	else
275		return (0);
276}
277
278static inline u_long
279dtlb_get_data(u_int tlb, u_int slot)
280{
281	u_long data;
282	register_t s;
283
284	slot = TLB_DAR_SLOT(tlb, slot);
285	/*
286	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
287	 * work around errata of USIII and beyond.
288	 */
289	s = intr_disable();
290	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
291	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
292	intr_restore(s);
293	return (data);
294}
295
296/*
297 * Bootstrap the system enough to run with virtual memory.
298 */
299void
300pmap_bootstrap(u_int cpu_impl)
301{
302	struct pmap *pm;
303	struct tte *tp;
304	vm_offset_t off;
305	vm_offset_t va;
306	vm_paddr_t pa;
307	vm_size_t physsz;
308	vm_size_t virtsz;
309	u_long data;
310	u_long vpn;
311	phandle_t pmem;
312	phandle_t vmem;
313	u_int dtlb_slots_avail;
314	int i;
315	int j;
316	int sz;
317	uint32_t asi;
318	uint32_t colors;
319	uint32_t ldd;
320
321	/*
322	 * Set the kernel context.
323	 */
324	pmap_set_kctx();
325
326	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
327
328	/*
329	 * Find out what physical memory is available from the PROM and
330	 * initialize the phys_avail array.  This must be done before
331	 * pmap_bootstrap_alloc is called.
332	 */
333	if ((pmem = OF_finddevice("/memory")) == -1)
334		OF_panic("%s: finddevice /memory", __func__);
335	if ((sz = OF_getproplen(pmem, "available")) == -1)
336		OF_panic("%s: getproplen /memory/available", __func__);
337	if (sizeof(phys_avail) < sz)
338		OF_panic("%s: phys_avail too small", __func__);
339	if (sizeof(mra) < sz)
340		OF_panic("%s: mra too small", __func__);
341	bzero(mra, sz);
342	if (OF_getprop(pmem, "available", mra, sz) == -1)
343		OF_panic("%s: getprop /memory/available", __func__);
344	sz /= sizeof(*mra);
345	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
346	qsort(mra, sz, sizeof (*mra), mr_cmp);
347	physsz = 0;
348	getenv_quad("hw.physmem", &physmem);
349	physmem = btoc(physmem);
350	for (i = 0, j = 0; i < sz; i++, j += 2) {
351		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
352		    mra[i].mr_size);
353		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
354			if (btoc(physsz) < physmem) {
355				phys_avail[j] = mra[i].mr_start;
356				phys_avail[j + 1] = mra[i].mr_start +
357				    (ctob(physmem) - physsz);
358				physsz = ctob(physmem);
359			}
360			break;
361		}
362		phys_avail[j] = mra[i].mr_start;
363		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
364		physsz += mra[i].mr_size;
365	}
366	physmem = btoc(physsz);
367
368	/*
369	 * Calculate the size of kernel virtual memory, and the size and mask
370	 * for the kernel TSB based on the phsyical memory size but limited
371	 * by the amount of dTLB slots available for locked entries if we have
372	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
373	 * of the dt64 slots can hold locked entries but there is no large
374	 * dTLB for unlocked ones, we don't use more than half of it for the
375	 * TSB).
376	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
377	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
378	 * public documentation is available for these, the latter just might
379	 * not support it, yet.
380	 */
381	if (cpu_impl == CPU_IMPL_SPARC64V ||
382	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
383		tsb_kernel_ldd_phys = 1;
384		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
385		    (PAGE_SHIFT - TTE_SHIFT));
386	} else {
387		dtlb_slots_avail = 0;
388		for (i = 0; i < dtlb_slots; i++) {
389			data = dtlb_get_data(cpu_impl ==
390			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
391			    TLB_DAR_T32, i);
392			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
393				dtlb_slots_avail++;
394		}
395#ifdef SMP
396		dtlb_slots_avail -= PCPU_PAGES;
397#endif
398		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
399		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
400			dtlb_slots_avail /= 2;
401		virtsz = roundup(physsz, PAGE_SIZE_4M <<
402		    (PAGE_SHIFT - TTE_SHIFT));
403		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
404		    (PAGE_SHIFT - TTE_SHIFT));
405	}
406	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
407	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
408	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
409
410	/*
411	 * Allocate the kernel TSB and lock it in the TLB if necessary.
412	 */
413	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
414	if (pa & PAGE_MASK_4M)
415		OF_panic("%s: TSB unaligned", __func__);
416	tsb_kernel_phys = pa;
417	if (tsb_kernel_ldd_phys == 0) {
418		tsb_kernel =
419		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
420		pmap_map_tsb();
421		bzero(tsb_kernel, tsb_kernel_size);
422	} else {
423		tsb_kernel =
424		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
425		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
426	}
427
428	/*
429	 * Allocate and map the dynamic per-CPU area for the BSP.
430	 */
431	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
432	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
433
434	/*
435	 * Allocate and map the message buffer.
436	 */
437	pa = pmap_bootstrap_alloc(msgbufsize, colors);
438	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
439
440	/*
441	 * Patch the TSB addresses and mask as well as the ASIs used to load
442	 * it into the trap table.
443	 */
444
445#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
446	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
447	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
448	    EIF_F3_RS2(rs2))
449#define	OR_R_I_R(rd, imm13, rs1)					\
450	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
451	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
452#define	SETHI(rd, imm22)						\
453	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
454	    EIF_IMM((imm22) >> 10, 22))
455#define	WR_R_I(rd, imm13, rs1)						\
456	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
457	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
458
459#define	PATCH_ASI(addr, asi) do {					\
460	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
461	    IF_F3_RS1(addr[0])))					\
462		OF_panic("%s: patched instructions have changed",	\
463		    __func__);						\
464	addr[0] |= EIF_IMM((asi), 13);					\
465	flush(addr);							\
466} while (0)
467
468#define	PATCH_LDD(addr, asi) do {					\
469	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
470	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
471		OF_panic("%s: patched instructions have changed",	\
472		    __func__);						\
473	addr[0] |= EIF_F3_IMM_ASI(asi);					\
474	flush(addr);							\
475} while (0)
476
477#define	PATCH_TSB(addr, val) do {					\
478	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
479	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
480	    IF_F3_RS1(addr[1]))	||					\
481	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
482		OF_panic("%s: patched instructions have changed",	\
483		    __func__);						\
484	addr[0] |= EIF_IMM((val) >> 42, 22);				\
485	addr[1] |= EIF_IMM((val) >> 32, 10);				\
486	addr[3] |= EIF_IMM((val) >> 10, 22);				\
487	flush(addr);							\
488	flush(addr + 1);						\
489	flush(addr + 3);						\
490} while (0)
491
492#define	PATCH_TSB_MASK(addr, val) do {					\
493	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
494	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
495	    IF_F3_RS1(addr[1])))					\
496		OF_panic("%s: patched instructions have changed",	\
497		    __func__);						\
498	addr[0] |= EIF_IMM((val) >> 10, 22);				\
499	addr[1] |= EIF_IMM((val), 10);					\
500	flush(addr);							\
501	flush(addr + 1);						\
502} while (0)
503
504	if (tsb_kernel_ldd_phys == 0) {
505		asi = ASI_N;
506		ldd = ASI_NUCLEUS_QUAD_LDD;
507		off = (vm_offset_t)tsb_kernel;
508	} else {
509		asi = ASI_PHYS_USE_EC;
510		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
511		off = (vm_offset_t)tsb_kernel_phys;
512	}
513	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
514	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
515	    tsb_kernel_phys + tsb_kernel_size - 1);
516	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
517	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
518	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
519	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
520	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
521	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
522	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
523	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
524	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
525	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
526	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
527	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
528	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
529	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
530	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
531	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
532	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
533	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
534
535	/*
536	 * Enter fake 8k pages for the 4MB kernel pages, so that
537	 * pmap_kextract() will work for them.
538	 */
539	for (i = 0; i < kernel_tlb_slots; i++) {
540		pa = kernel_tlbs[i].te_pa;
541		va = kernel_tlbs[i].te_va;
542		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
543			tp = tsb_kvtotte(va + off);
544			vpn = TV_VPN(va + off, TS_8K);
545			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
546			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
547			pmap_bootstrap_set_tte(tp, vpn, data);
548		}
549	}
550
551	/*
552	 * Set the start and end of KVA.  The kernel is loaded starting
553	 * at the first available 4MB super page, so we advance to the
554	 * end of the last one used for it.
555	 */
556	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
557	virtual_end = vm_max_kernel_address;
558	kernel_vm_end = vm_max_kernel_address;
559
560	/*
561	 * Allocate kva space for temporary mappings.
562	 */
563	pmap_idle_map = virtual_avail;
564	virtual_avail += PAGE_SIZE * colors;
565	pmap_temp_map_1 = virtual_avail;
566	virtual_avail += PAGE_SIZE * colors;
567	pmap_temp_map_2 = virtual_avail;
568	virtual_avail += PAGE_SIZE * colors;
569
570	/*
571	 * Allocate a kernel stack with guard page for thread0 and map it
572	 * into the kernel TSB.  We must ensure that the virtual address is
573	 * colored properly for corresponding CPUs, since we're allocating
574	 * from phys_avail so the memory won't have an associated vm_page_t.
575	 */
576	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
577	kstack0_phys = pa;
578	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
579	kstack0 = virtual_avail;
580	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
581	if (dcache_color_ignore == 0)
582		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
583		    ("pmap_bootstrap: kstack0 miscolored"));
584	for (i = 0; i < KSTACK_PAGES; i++) {
585		pa = kstack0_phys + i * PAGE_SIZE;
586		va = kstack0 + i * PAGE_SIZE;
587		tp = tsb_kvtotte(va);
588		vpn = TV_VPN(va, TS_8K);
589		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
590		    TD_CV | TD_P | TD_W;
591		pmap_bootstrap_set_tte(tp, vpn, data);
592	}
593
594	/*
595	 * Calculate the last available physical address.
596	 */
597	for (i = 0; phys_avail[i + 2] != 0; i += 2)
598		;
599	Maxmem = sparc64_btop(phys_avail[i + 1]);
600
601	/*
602	 * Add the PROM mappings to the kernel TSB.
603	 */
604	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
605		OF_panic("%s: finddevice /virtual-memory", __func__);
606	if ((sz = OF_getproplen(vmem, "translations")) == -1)
607		OF_panic("%s: getproplen translations", __func__);
608	if (sizeof(translations) < sz)
609		OF_panic("%s: translations too small", __func__);
610	bzero(translations, sz);
611	if (OF_getprop(vmem, "translations", translations, sz) == -1)
612		OF_panic("%s: getprop /virtual-memory/translations",
613		    __func__);
614	sz /= sizeof(*translations);
615	translations_size = sz;
616	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
617	qsort(translations, sz, sizeof (*translations), om_cmp);
618	for (i = 0; i < sz; i++) {
619		CTR3(KTR_PMAP,
620		    "translation: start=%#lx size=%#lx tte=%#lx",
621		    translations[i].om_start, translations[i].om_size,
622		    translations[i].om_tte);
623		if ((translations[i].om_tte & TD_V) == 0)
624			continue;
625		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
626		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
627			continue;
628		for (off = 0; off < translations[i].om_size;
629		    off += PAGE_SIZE) {
630			va = translations[i].om_start + off;
631			tp = tsb_kvtotte(va);
632			vpn = TV_VPN(va, TS_8K);
633			data = ((translations[i].om_tte &
634			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
635			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
636			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
637			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
638			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
639			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
640			    off;
641			pmap_bootstrap_set_tte(tp, vpn, data);
642		}
643	}
644
645	/*
646	 * Get the available physical memory ranges from /memory/reg.  These
647	 * are only used for kernel dumps, but it may not be wise to do PROM
648	 * calls in that situation.
649	 */
650	if ((sz = OF_getproplen(pmem, "reg")) == -1)
651		OF_panic("%s: getproplen /memory/reg", __func__);
652	if (sizeof(sparc64_memreg) < sz)
653		OF_panic("%s: sparc64_memreg too small", __func__);
654	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
655		OF_panic("%s: getprop /memory/reg", __func__);
656	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
657
658	/*
659	 * Initialize the kernel pmap (which is statically allocated).
660	 */
661	pm = kernel_pmap;
662	PMAP_LOCK_INIT(pm);
663	for (i = 0; i < MAXCPU; i++)
664		pm->pm_context[i] = TLB_CTX_KERNEL;
665	CPU_FILL(&pm->pm_active);
666
667	/*
668	 * Initialize the global tte list lock, which is more commonly
669	 * known as the pmap pv global lock.
670	 */
671	rw_init(&tte_list_global_lock, "pmap pv global");
672
673	/*
674	 * Flush all non-locked TLB entries possibly left over by the
675	 * firmware.
676	 */
677	tlb_flush_nonlocked();
678}
679
680/*
681 * Map the 4MB kernel TSB pages.
682 */
683void
684pmap_map_tsb(void)
685{
686	vm_offset_t va;
687	vm_paddr_t pa;
688	u_long data;
689	int i;
690
691	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
692		va = (vm_offset_t)tsb_kernel + i;
693		pa = tsb_kernel_phys + i;
694		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
695		    TD_P | TD_W;
696		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
697		    TLB_TAR_CTX(TLB_CTX_KERNEL));
698		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
699	}
700}
701
702/*
703 * Set the secondary context to be the kernel context (needed for FP block
704 * operations in the kernel).
705 */
706void
707pmap_set_kctx(void)
708{
709
710	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
711	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
712	flush(KERNBASE);
713}
714
715/*
716 * Allocate a physical page of memory directly from the phys_avail map.
717 * Can only be called from pmap_bootstrap before avail start and end are
718 * calculated.
719 */
720static vm_paddr_t
721pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
722{
723	vm_paddr_t pa;
724	int i;
725
726	size = roundup(size, PAGE_SIZE * colors);
727	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
728		if (phys_avail[i + 1] - phys_avail[i] < size)
729			continue;
730		pa = phys_avail[i];
731		phys_avail[i] += size;
732		return (pa);
733	}
734	OF_panic("%s: no suitable region found", __func__);
735}
736
737/*
738 * Set a TTE.  This function is intended as a helper when tsb_kernel is
739 * direct-mapped but we haven't taken over the trap table, yet, as it's the
740 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
741 * the kernel TSB.
742 */
743void
744pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
745{
746
747	if (tsb_kernel_ldd_phys == 0) {
748		tp->tte_vpn = vpn;
749		tp->tte_data = data;
750	} else {
751		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
752		    ASI_PHYS_USE_EC, vpn);
753		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
754		    ASI_PHYS_USE_EC, data);
755	}
756}
757
758/*
759 * Initialize a vm_page's machine-dependent fields.
760 */
761void
762pmap_page_init(vm_page_t m)
763{
764
765	TAILQ_INIT(&m->md.tte_list);
766	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
767	m->md.flags = 0;
768	m->md.pmap = NULL;
769}
770
771/*
772 * Initialize the pmap module.
773 */
774void
775pmap_init(void)
776{
777	vm_offset_t addr;
778	vm_size_t size;
779	int result;
780	int i;
781
782	for (i = 0; i < translations_size; i++) {
783		addr = translations[i].om_start;
784		size = translations[i].om_size;
785		if ((translations[i].om_tte & TD_V) == 0)
786			continue;
787		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
788			continue;
789		result = vm_map_find(kernel_map, NULL, 0, &addr, size,
790		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
791		if (result != KERN_SUCCESS || addr != translations[i].om_start)
792			panic("pmap_init: vm_map_find");
793	}
794}
795
796/*
797 * Extract the physical page address associated with the given
798 * map/virtual_address pair.
799 */
800vm_paddr_t
801pmap_extract(pmap_t pm, vm_offset_t va)
802{
803	struct tte *tp;
804	vm_paddr_t pa;
805
806	if (pm == kernel_pmap)
807		return (pmap_kextract(va));
808	PMAP_LOCK(pm);
809	tp = tsb_tte_lookup(pm, va);
810	if (tp == NULL)
811		pa = 0;
812	else
813		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
814	PMAP_UNLOCK(pm);
815	return (pa);
816}
817
818/*
819 * Atomically extract and hold the physical page with the given
820 * pmap and virtual address pair if that mapping permits the given
821 * protection.
822 */
823vm_page_t
824pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
825{
826	struct tte *tp;
827	vm_page_t m;
828	vm_paddr_t pa;
829
830	m = NULL;
831	pa = 0;
832	PMAP_LOCK(pm);
833retry:
834	if (pm == kernel_pmap) {
835		if (va >= VM_MIN_DIRECT_ADDRESS) {
836			tp = NULL;
837			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
838			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
839			    &pa);
840			vm_page_hold(m);
841		} else {
842			tp = tsb_kvtotte(va);
843			if ((tp->tte_data & TD_V) == 0)
844				tp = NULL;
845		}
846	} else
847		tp = tsb_tte_lookup(pm, va);
848	if (tp != NULL && ((tp->tte_data & TD_SW) ||
849	    (prot & VM_PROT_WRITE) == 0)) {
850		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
851			goto retry;
852		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
853		vm_page_hold(m);
854	}
855	PA_UNLOCK_COND(pa);
856	PMAP_UNLOCK(pm);
857	return (m);
858}
859
860/*
861 * Extract the physical page address associated with the given kernel virtual
862 * address.
863 */
864vm_paddr_t
865pmap_kextract(vm_offset_t va)
866{
867	struct tte *tp;
868
869	if (va >= VM_MIN_DIRECT_ADDRESS)
870		return (TLB_DIRECT_TO_PHYS(va));
871	tp = tsb_kvtotte(va);
872	if ((tp->tte_data & TD_V) == 0)
873		return (0);
874	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
875}
876
877int
878pmap_cache_enter(vm_page_t m, vm_offset_t va)
879{
880	struct tte *tp;
881	int color;
882
883	rw_assert(&tte_list_global_lock, RA_WLOCKED);
884	KASSERT((m->flags & PG_FICTITIOUS) == 0,
885	    ("pmap_cache_enter: fake page"));
886	PMAP_STATS_INC(pmap_ncache_enter);
887
888	if (dcache_color_ignore != 0)
889		return (1);
890
891	/*
892	 * Find the color for this virtual address and note the added mapping.
893	 */
894	color = DCACHE_COLOR(va);
895	m->md.colors[color]++;
896
897	/*
898	 * If all existing mappings have the same color, the mapping is
899	 * cacheable.
900	 */
901	if (m->md.color == color) {
902		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
903		    ("pmap_cache_enter: cacheable, mappings of other color"));
904		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
905			PMAP_STATS_INC(pmap_ncache_enter_c);
906		else
907			PMAP_STATS_INC(pmap_ncache_enter_oc);
908		return (1);
909	}
910
911	/*
912	 * If there are no mappings of the other color, and the page still has
913	 * the wrong color, this must be a new mapping.  Change the color to
914	 * match the new mapping, which is cacheable.  We must flush the page
915	 * from the cache now.
916	 */
917	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
918		KASSERT(m->md.colors[color] == 1,
919		    ("pmap_cache_enter: changing color, not new mapping"));
920		dcache_page_inval(VM_PAGE_TO_PHYS(m));
921		m->md.color = color;
922		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
923			PMAP_STATS_INC(pmap_ncache_enter_cc);
924		else
925			PMAP_STATS_INC(pmap_ncache_enter_coc);
926		return (1);
927	}
928
929	/*
930	 * If the mapping is already non-cacheable, just return.
931	 */
932	if (m->md.color == -1) {
933		PMAP_STATS_INC(pmap_ncache_enter_nc);
934		return (0);
935	}
936
937	PMAP_STATS_INC(pmap_ncache_enter_cnc);
938
939	/*
940	 * Mark all mappings as uncacheable, flush any lines with the other
941	 * color out of the dcache, and set the color to none (-1).
942	 */
943	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
944		atomic_clear_long(&tp->tte_data, TD_CV);
945		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
946	}
947	dcache_page_inval(VM_PAGE_TO_PHYS(m));
948	m->md.color = -1;
949	return (0);
950}
951
952static void
953pmap_cache_remove(vm_page_t m, vm_offset_t va)
954{
955	struct tte *tp;
956	int color;
957
958	rw_assert(&tte_list_global_lock, RA_WLOCKED);
959	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
960	    m->md.colors[DCACHE_COLOR(va)]);
961	KASSERT((m->flags & PG_FICTITIOUS) == 0,
962	    ("pmap_cache_remove: fake page"));
963	PMAP_STATS_INC(pmap_ncache_remove);
964
965	if (dcache_color_ignore != 0)
966		return;
967
968	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
969	    ("pmap_cache_remove: no mappings %d <= 0",
970	    m->md.colors[DCACHE_COLOR(va)]));
971
972	/*
973	 * Find the color for this virtual address and note the removal of
974	 * the mapping.
975	 */
976	color = DCACHE_COLOR(va);
977	m->md.colors[color]--;
978
979	/*
980	 * If the page is cacheable, just return and keep the same color, even
981	 * if there are no longer any mappings.
982	 */
983	if (m->md.color != -1) {
984		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
985			PMAP_STATS_INC(pmap_ncache_remove_c);
986		else
987			PMAP_STATS_INC(pmap_ncache_remove_oc);
988		return;
989	}
990
991	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
992	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
993
994	/*
995	 * If the page is not cacheable (color is -1), and the number of
996	 * mappings for this color is not zero, just return.  There are
997	 * mappings of the other color still, so remain non-cacheable.
998	 */
999	if (m->md.colors[color] != 0) {
1000		PMAP_STATS_INC(pmap_ncache_remove_nc);
1001		return;
1002	}
1003
1004	/*
1005	 * The number of mappings for this color is now zero.  Recache the
1006	 * other colored mappings, and change the page color to the other
1007	 * color.  There should be no lines in the data cache for this page,
1008	 * so flushing should not be needed.
1009	 */
1010	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1011		atomic_set_long(&tp->tte_data, TD_CV);
1012		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1013	}
1014	m->md.color = DCACHE_OTHER_COLOR(color);
1015
1016	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1017		PMAP_STATS_INC(pmap_ncache_remove_cc);
1018	else
1019		PMAP_STATS_INC(pmap_ncache_remove_coc);
1020}
1021
1022/*
1023 * Map a wired page into kernel virtual address space.
1024 */
1025void
1026pmap_kenter(vm_offset_t va, vm_page_t m)
1027{
1028	vm_offset_t ova;
1029	struct tte *tp;
1030	vm_page_t om;
1031	u_long data;
1032
1033	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1034	PMAP_STATS_INC(pmap_nkenter);
1035	tp = tsb_kvtotte(va);
1036	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1037	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1038	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1039		CTR5(KTR_SPARE2,
1040	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1041		    va, VM_PAGE_TO_PHYS(m), m->object,
1042		    m->object ? m->object->type : -1,
1043		    m->pindex);
1044		PMAP_STATS_INC(pmap_nkenter_oc);
1045	}
1046	if ((tp->tte_data & TD_V) != 0) {
1047		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1048		ova = TTE_GET_VA(tp);
1049		if (m == om && va == ova) {
1050			PMAP_STATS_INC(pmap_nkenter_stupid);
1051			return;
1052		}
1053		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1054		pmap_cache_remove(om, ova);
1055		if (va != ova)
1056			tlb_page_demap(kernel_pmap, ova);
1057	}
1058	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1059	    TD_P | TD_W;
1060	if (pmap_cache_enter(m, va) != 0)
1061		data |= TD_CV;
1062	tp->tte_vpn = TV_VPN(va, TS_8K);
1063	tp->tte_data = data;
1064	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1065}
1066
1067/*
1068 * Map a wired page into kernel virtual address space.  This additionally
1069 * takes a flag argument which is or'ed to the TTE data.  This is used by
1070 * sparc64_bus_mem_map().
1071 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1072 * to flush entries that might still be in the cache, if applicable.
1073 */
1074void
1075pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1076{
1077	struct tte *tp;
1078
1079	tp = tsb_kvtotte(va);
1080	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1081	    va, pa, tp, tp->tte_data);
1082	tp->tte_vpn = TV_VPN(va, TS_8K);
1083	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1084}
1085
1086/*
1087 * Remove a wired page from kernel virtual address space.
1088 */
1089void
1090pmap_kremove(vm_offset_t va)
1091{
1092	struct tte *tp;
1093	vm_page_t m;
1094
1095	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1096	PMAP_STATS_INC(pmap_nkremove);
1097	tp = tsb_kvtotte(va);
1098	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1099	    tp->tte_data);
1100	if ((tp->tte_data & TD_V) == 0)
1101		return;
1102	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1103	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1104	pmap_cache_remove(m, va);
1105	TTE_ZERO(tp);
1106}
1107
1108/*
1109 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1110 */
1111void
1112pmap_kremove_flags(vm_offset_t va)
1113{
1114	struct tte *tp;
1115
1116	tp = tsb_kvtotte(va);
1117	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1118	    tp->tte_data);
1119	TTE_ZERO(tp);
1120}
1121
1122/*
1123 * Map a range of physical addresses into kernel virtual address space.
1124 *
1125 * The value passed in *virt is a suggested virtual address for the mapping.
1126 * Architectures which can support a direct-mapped physical to virtual region
1127 * can return the appropriate address within that region, leaving '*virt'
1128 * unchanged.
1129 */
1130vm_offset_t
1131pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1132{
1133
1134	return (TLB_PHYS_TO_DIRECT(start));
1135}
1136
1137/*
1138 * Map a list of wired pages into kernel virtual address space.  This is
1139 * intended for temporary mappings which do not need page modification or
1140 * references recorded.  Existing mappings in the region are overwritten.
1141 */
1142void
1143pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1144{
1145	vm_offset_t va;
1146
1147	PMAP_STATS_INC(pmap_nqenter);
1148	va = sva;
1149	rw_wlock(&tte_list_global_lock);
1150	while (count-- > 0) {
1151		pmap_kenter(va, *m);
1152		va += PAGE_SIZE;
1153		m++;
1154	}
1155	rw_wunlock(&tte_list_global_lock);
1156	tlb_range_demap(kernel_pmap, sva, va);
1157}
1158
1159/*
1160 * Remove page mappings from kernel virtual address space.  Intended for
1161 * temporary mappings entered by pmap_qenter.
1162 */
1163void
1164pmap_qremove(vm_offset_t sva, int count)
1165{
1166	vm_offset_t va;
1167
1168	PMAP_STATS_INC(pmap_nqremove);
1169	va = sva;
1170	rw_wlock(&tte_list_global_lock);
1171	while (count-- > 0) {
1172		pmap_kremove(va);
1173		va += PAGE_SIZE;
1174	}
1175	rw_wunlock(&tte_list_global_lock);
1176	tlb_range_demap(kernel_pmap, sva, va);
1177}
1178
1179/*
1180 * Initialize the pmap associated with process 0.
1181 */
1182void
1183pmap_pinit0(pmap_t pm)
1184{
1185	int i;
1186
1187	PMAP_LOCK_INIT(pm);
1188	for (i = 0; i < MAXCPU; i++)
1189		pm->pm_context[i] = TLB_CTX_KERNEL;
1190	CPU_ZERO(&pm->pm_active);
1191	pm->pm_tsb = NULL;
1192	pm->pm_tsb_obj = NULL;
1193	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1194}
1195
1196/*
1197 * Initialize a preallocated and zeroed pmap structure, such as one in a
1198 * vmspace structure.
1199 */
1200int
1201pmap_pinit(pmap_t pm)
1202{
1203	vm_page_t ma[TSB_PAGES];
1204	vm_page_t m;
1205	int i;
1206
1207	PMAP_LOCK_INIT(pm);
1208
1209	/*
1210	 * Allocate KVA space for the TSB.
1211	 */
1212	if (pm->pm_tsb == NULL) {
1213		pm->pm_tsb = (struct tte *)kmem_alloc_nofault(kernel_map,
1214		    TSB_BSIZE);
1215		if (pm->pm_tsb == NULL) {
1216			PMAP_LOCK_DESTROY(pm);
1217			return (0);
1218		}
1219	}
1220
1221	/*
1222	 * Allocate an object for it.
1223	 */
1224	if (pm->pm_tsb_obj == NULL)
1225		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1226
1227	for (i = 0; i < MAXCPU; i++)
1228		pm->pm_context[i] = -1;
1229	CPU_ZERO(&pm->pm_active);
1230
1231	VM_OBJECT_LOCK(pm->pm_tsb_obj);
1232	for (i = 0; i < TSB_PAGES; i++) {
1233		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1234		    VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1235		m->valid = VM_PAGE_BITS_ALL;
1236		m->md.pmap = pm;
1237		ma[i] = m;
1238	}
1239	VM_OBJECT_UNLOCK(pm->pm_tsb_obj);
1240	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1241
1242	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1243	return (1);
1244}
1245
1246/*
1247 * Release any resources held by the given physical map.
1248 * Called when a pmap initialized by pmap_pinit is being released.
1249 * Should only be called if the map contains no valid mappings.
1250 */
1251void
1252pmap_release(pmap_t pm)
1253{
1254	vm_object_t obj;
1255	vm_page_t m;
1256#ifdef SMP
1257	struct pcpu *pc;
1258#endif
1259
1260	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1261	    pm->pm_context[curcpu], pm->pm_tsb);
1262	KASSERT(pmap_resident_count(pm) == 0,
1263	    ("pmap_release: resident pages %ld != 0",
1264	    pmap_resident_count(pm)));
1265
1266	/*
1267	 * After the pmap was freed, it might be reallocated to a new process.
1268	 * When switching, this might lead us to wrongly assume that we need
1269	 * not switch contexts because old and new pmap pointer are equal.
1270	 * Therefore, make sure that this pmap is not referenced by any PCPU
1271	 * pointer any more.  This could happen in two cases:
1272	 * - A process that referenced the pmap is currently exiting on a CPU.
1273	 *   However, it is guaranteed to not switch in any more after setting
1274	 *   its state to PRS_ZOMBIE.
1275	 * - A process that referenced this pmap ran on a CPU, but we switched
1276	 *   to a kernel thread, leaving the pmap pointer unchanged.
1277	 */
1278#ifdef SMP
1279	sched_pin();
1280	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1281		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1282		    (uintptr_t)pm, (uintptr_t)NULL);
1283	sched_unpin();
1284#else
1285	critical_enter();
1286	if (PCPU_GET(pmap) == pm)
1287		PCPU_SET(pmap, NULL);
1288	critical_exit();
1289#endif
1290
1291	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1292	obj = pm->pm_tsb_obj;
1293	VM_OBJECT_LOCK(obj);
1294	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1295	while (!TAILQ_EMPTY(&obj->memq)) {
1296		m = TAILQ_FIRST(&obj->memq);
1297		m->md.pmap = NULL;
1298		m->wire_count--;
1299		atomic_subtract_int(&cnt.v_wire_count, 1);
1300		vm_page_free_zero(m);
1301	}
1302	VM_OBJECT_UNLOCK(obj);
1303	PMAP_LOCK_DESTROY(pm);
1304}
1305
1306/*
1307 * Grow the number of kernel page table entries.  Unneeded.
1308 */
1309void
1310pmap_growkernel(vm_offset_t addr)
1311{
1312
1313	panic("pmap_growkernel: can't grow kernel");
1314}
1315
1316int
1317pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1318    vm_offset_t va)
1319{
1320	vm_page_t m;
1321	u_long data;
1322
1323	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1324	data = atomic_readandclear_long(&tp->tte_data);
1325	if ((data & TD_FAKE) == 0) {
1326		m = PHYS_TO_VM_PAGE(TD_PA(data));
1327		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1328		if ((data & TD_WIRED) != 0)
1329			pm->pm_stats.wired_count--;
1330		if ((data & TD_PV) != 0) {
1331			if ((data & TD_W) != 0)
1332				vm_page_dirty(m);
1333			if ((data & TD_REF) != 0)
1334				vm_page_aflag_set(m, PGA_REFERENCED);
1335			if (TAILQ_EMPTY(&m->md.tte_list))
1336				vm_page_aflag_clear(m, PGA_WRITEABLE);
1337			pm->pm_stats.resident_count--;
1338		}
1339		pmap_cache_remove(m, va);
1340	}
1341	TTE_ZERO(tp);
1342	if (PMAP_REMOVE_DONE(pm))
1343		return (0);
1344	return (1);
1345}
1346
1347/*
1348 * Remove the given range of addresses from the specified map.
1349 */
1350void
1351pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1352{
1353	struct tte *tp;
1354	vm_offset_t va;
1355
1356	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1357	    pm->pm_context[curcpu], start, end);
1358	if (PMAP_REMOVE_DONE(pm))
1359		return;
1360	rw_wlock(&tte_list_global_lock);
1361	PMAP_LOCK(pm);
1362	if (end - start > PMAP_TSB_THRESH) {
1363		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1364		tlb_context_demap(pm);
1365	} else {
1366		for (va = start; va < end; va += PAGE_SIZE)
1367			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1368			    !pmap_remove_tte(pm, NULL, tp, va))
1369				break;
1370		tlb_range_demap(pm, start, end - 1);
1371	}
1372	PMAP_UNLOCK(pm);
1373	rw_wunlock(&tte_list_global_lock);
1374}
1375
1376void
1377pmap_remove_all(vm_page_t m)
1378{
1379	struct pmap *pm;
1380	struct tte *tpn;
1381	struct tte *tp;
1382	vm_offset_t va;
1383
1384	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1385	    ("pmap_remove_all: page %p is not managed", m));
1386	rw_wlock(&tte_list_global_lock);
1387	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1388		tpn = TAILQ_NEXT(tp, tte_link);
1389		if ((tp->tte_data & TD_PV) == 0)
1390			continue;
1391		pm = TTE_GET_PMAP(tp);
1392		va = TTE_GET_VA(tp);
1393		PMAP_LOCK(pm);
1394		if ((tp->tte_data & TD_WIRED) != 0)
1395			pm->pm_stats.wired_count--;
1396		if ((tp->tte_data & TD_REF) != 0)
1397			vm_page_aflag_set(m, PGA_REFERENCED);
1398		if ((tp->tte_data & TD_W) != 0)
1399			vm_page_dirty(m);
1400		tp->tte_data &= ~TD_V;
1401		tlb_page_demap(pm, va);
1402		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1403		pm->pm_stats.resident_count--;
1404		pmap_cache_remove(m, va);
1405		TTE_ZERO(tp);
1406		PMAP_UNLOCK(pm);
1407	}
1408	vm_page_aflag_clear(m, PGA_WRITEABLE);
1409	rw_wunlock(&tte_list_global_lock);
1410}
1411
1412static int
1413pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1414    vm_offset_t va)
1415{
1416	u_long data;
1417	vm_page_t m;
1418
1419	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1420	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1421	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1422		m = PHYS_TO_VM_PAGE(TD_PA(data));
1423		vm_page_dirty(m);
1424	}
1425	return (1);
1426}
1427
1428/*
1429 * Set the physical protection on the specified range of this map as requested.
1430 */
1431void
1432pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1433{
1434	vm_offset_t va;
1435	struct tte *tp;
1436
1437	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1438	    pm->pm_context[curcpu], sva, eva, prot);
1439
1440	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1441		pmap_remove(pm, sva, eva);
1442		return;
1443	}
1444
1445	if (prot & VM_PROT_WRITE)
1446		return;
1447
1448	PMAP_LOCK(pm);
1449	if (eva - sva > PMAP_TSB_THRESH) {
1450		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1451		tlb_context_demap(pm);
1452	} else {
1453		for (va = sva; va < eva; va += PAGE_SIZE)
1454			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1455				pmap_protect_tte(pm, NULL, tp, va);
1456		tlb_range_demap(pm, sva, eva - 1);
1457	}
1458	PMAP_UNLOCK(pm);
1459}
1460
1461/*
1462 * Map the given physical page at the specified virtual address in the
1463 * target pmap with the protection requested.  If specified the page
1464 * will be wired down.
1465 */
1466void
1467pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1468    vm_prot_t prot, boolean_t wired)
1469{
1470
1471	rw_wlock(&tte_list_global_lock);
1472	PMAP_LOCK(pm);
1473	pmap_enter_locked(pm, va, m, prot, wired);
1474	rw_wunlock(&tte_list_global_lock);
1475	PMAP_UNLOCK(pm);
1476}
1477
1478/*
1479 * Map the given physical page at the specified virtual address in the
1480 * target pmap with the protection requested.  If specified the page
1481 * will be wired down.
1482 *
1483 * The page queues and pmap must be locked.
1484 */
1485static void
1486pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1487    boolean_t wired)
1488{
1489	struct tte *tp;
1490	vm_paddr_t pa;
1491	vm_page_t real;
1492	u_long data;
1493
1494	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1495	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1496	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
1497	    VM_OBJECT_LOCKED(m->object),
1498	    ("pmap_enter_locked: page %p is not busy", m));
1499	PMAP_STATS_INC(pmap_nenter);
1500	pa = VM_PAGE_TO_PHYS(m);
1501
1502	/*
1503	 * If this is a fake page from the device_pager, but it covers actual
1504	 * physical memory, convert to the real backing page.
1505	 */
1506	if ((m->flags & PG_FICTITIOUS) != 0) {
1507		real = vm_phys_paddr_to_vm_page(pa);
1508		if (real != NULL)
1509			m = real;
1510	}
1511
1512	CTR6(KTR_PMAP,
1513	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1514	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1515
1516	/*
1517	 * If there is an existing mapping, and the physical address has not
1518	 * changed, must be protection or wiring change.
1519	 */
1520	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1521		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1522		PMAP_STATS_INC(pmap_nenter_update);
1523
1524		/*
1525		 * Wiring change, just update stats.
1526		 */
1527		if (wired) {
1528			if ((tp->tte_data & TD_WIRED) == 0) {
1529				tp->tte_data |= TD_WIRED;
1530				pm->pm_stats.wired_count++;
1531			}
1532		} else {
1533			if ((tp->tte_data & TD_WIRED) != 0) {
1534				tp->tte_data &= ~TD_WIRED;
1535				pm->pm_stats.wired_count--;
1536			}
1537		}
1538
1539		/*
1540		 * Save the old bits and clear the ones we're interested in.
1541		 */
1542		data = tp->tte_data;
1543		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1544
1545		/*
1546		 * If we're turning off write permissions, sense modify status.
1547		 */
1548		if ((prot & VM_PROT_WRITE) != 0) {
1549			tp->tte_data |= TD_SW;
1550			if (wired)
1551				tp->tte_data |= TD_W;
1552			if ((m->oflags & VPO_UNMANAGED) == 0)
1553				vm_page_aflag_set(m, PGA_WRITEABLE);
1554		} else if ((data & TD_W) != 0)
1555			vm_page_dirty(m);
1556
1557		/*
1558		 * If we're turning on execute permissions, flush the icache.
1559		 */
1560		if ((prot & VM_PROT_EXECUTE) != 0) {
1561			if ((data & TD_EXEC) == 0)
1562				icache_page_inval(pa);
1563			tp->tte_data |= TD_EXEC;
1564		}
1565
1566		/*
1567		 * Delete the old mapping.
1568		 */
1569		tlb_page_demap(pm, TTE_GET_VA(tp));
1570	} else {
1571		/*
1572		 * If there is an existing mapping, but its for a different
1573		 * physical address, delete the old mapping.
1574		 */
1575		if (tp != NULL) {
1576			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1577			PMAP_STATS_INC(pmap_nenter_replace);
1578			pmap_remove_tte(pm, NULL, tp, va);
1579			tlb_page_demap(pm, va);
1580		} else {
1581			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1582			PMAP_STATS_INC(pmap_nenter_new);
1583		}
1584
1585		/*
1586		 * Now set up the data and install the new mapping.
1587		 */
1588		data = TD_V | TD_8K | TD_PA(pa);
1589		if (pm == kernel_pmap)
1590			data |= TD_P;
1591		if ((prot & VM_PROT_WRITE) != 0) {
1592			data |= TD_SW;
1593			if ((m->oflags & VPO_UNMANAGED) == 0)
1594				vm_page_aflag_set(m, PGA_WRITEABLE);
1595		}
1596		if (prot & VM_PROT_EXECUTE) {
1597			data |= TD_EXEC;
1598			icache_page_inval(pa);
1599		}
1600
1601		/*
1602		 * If its wired update stats.  We also don't need reference or
1603		 * modify tracking for wired mappings, so set the bits now.
1604		 */
1605		if (wired) {
1606			pm->pm_stats.wired_count++;
1607			data |= TD_REF | TD_WIRED;
1608			if ((prot & VM_PROT_WRITE) != 0)
1609				data |= TD_W;
1610		}
1611
1612		tsb_tte_enter(pm, m, va, TS_8K, data);
1613	}
1614}
1615
1616/*
1617 * Maps a sequence of resident pages belonging to the same object.
1618 * The sequence begins with the given page m_start.  This page is
1619 * mapped at the given virtual address start.  Each subsequent page is
1620 * mapped at a virtual address that is offset from start by the same
1621 * amount as the page is offset from m_start within the object.  The
1622 * last page in the sequence is the page with the largest offset from
1623 * m_start that can be mapped at a virtual address less than the given
1624 * virtual address end.  Not every virtual page between start and end
1625 * is mapped; only those for which a resident page exists with the
1626 * corresponding offset from m_start are mapped.
1627 */
1628void
1629pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1630    vm_page_t m_start, vm_prot_t prot)
1631{
1632	vm_page_t m;
1633	vm_pindex_t diff, psize;
1634
1635	psize = atop(end - start);
1636	m = m_start;
1637	rw_wlock(&tte_list_global_lock);
1638	PMAP_LOCK(pm);
1639	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1640		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1641		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1642		m = TAILQ_NEXT(m, listq);
1643	}
1644	rw_wunlock(&tte_list_global_lock);
1645	PMAP_UNLOCK(pm);
1646}
1647
1648void
1649pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1650{
1651
1652	rw_wlock(&tte_list_global_lock);
1653	PMAP_LOCK(pm);
1654	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1655	    FALSE);
1656	rw_wunlock(&tte_list_global_lock);
1657	PMAP_UNLOCK(pm);
1658}
1659
1660void
1661pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1662    vm_pindex_t pindex, vm_size_t size)
1663{
1664
1665	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1666	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1667	    ("pmap_object_init_pt: non-device object"));
1668}
1669
1670/*
1671 * Change the wiring attribute for a map/virtual-address pair.
1672 * The mapping must already exist in the pmap.
1673 */
1674void
1675pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1676{
1677	struct tte *tp;
1678	u_long data;
1679
1680	PMAP_LOCK(pm);
1681	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1682		if (wired) {
1683			data = atomic_set_long(&tp->tte_data, TD_WIRED);
1684			if ((data & TD_WIRED) == 0)
1685				pm->pm_stats.wired_count++;
1686		} else {
1687			data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1688			if ((data & TD_WIRED) != 0)
1689				pm->pm_stats.wired_count--;
1690		}
1691	}
1692	PMAP_UNLOCK(pm);
1693}
1694
1695static int
1696pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1697    vm_offset_t va)
1698{
1699	vm_page_t m;
1700	u_long data;
1701
1702	if ((tp->tte_data & TD_FAKE) != 0)
1703		return (1);
1704	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1705		data = tp->tte_data &
1706		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1707		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1708		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1709	}
1710	return (1);
1711}
1712
1713void
1714pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1715    vm_size_t len, vm_offset_t src_addr)
1716{
1717	struct tte *tp;
1718	vm_offset_t va;
1719
1720	if (dst_addr != src_addr)
1721		return;
1722	rw_wlock(&tte_list_global_lock);
1723	if (dst_pmap < src_pmap) {
1724		PMAP_LOCK(dst_pmap);
1725		PMAP_LOCK(src_pmap);
1726	} else {
1727		PMAP_LOCK(src_pmap);
1728		PMAP_LOCK(dst_pmap);
1729	}
1730	if (len > PMAP_TSB_THRESH) {
1731		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1732		    pmap_copy_tte);
1733		tlb_context_demap(dst_pmap);
1734	} else {
1735		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1736			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1737				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1738		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1739	}
1740	rw_wunlock(&tte_list_global_lock);
1741	PMAP_UNLOCK(src_pmap);
1742	PMAP_UNLOCK(dst_pmap);
1743}
1744
1745void
1746pmap_zero_page(vm_page_t m)
1747{
1748	struct tte *tp;
1749	vm_offset_t va;
1750	vm_paddr_t pa;
1751
1752	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1753	    ("pmap_zero_page: fake page"));
1754	PMAP_STATS_INC(pmap_nzero_page);
1755	pa = VM_PAGE_TO_PHYS(m);
1756	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1757		PMAP_STATS_INC(pmap_nzero_page_c);
1758		va = TLB_PHYS_TO_DIRECT(pa);
1759		cpu_block_zero((void *)va, PAGE_SIZE);
1760	} else if (m->md.color == -1) {
1761		PMAP_STATS_INC(pmap_nzero_page_nc);
1762		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1763	} else {
1764		PMAP_STATS_INC(pmap_nzero_page_oc);
1765		PMAP_LOCK(kernel_pmap);
1766		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1767		tp = tsb_kvtotte(va);
1768		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1769		tp->tte_vpn = TV_VPN(va, TS_8K);
1770		cpu_block_zero((void *)va, PAGE_SIZE);
1771		tlb_page_demap(kernel_pmap, va);
1772		PMAP_UNLOCK(kernel_pmap);
1773	}
1774}
1775
1776void
1777pmap_zero_page_area(vm_page_t m, int off, int size)
1778{
1779	struct tte *tp;
1780	vm_offset_t va;
1781	vm_paddr_t pa;
1782
1783	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1784	    ("pmap_zero_page_area: fake page"));
1785	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1786	PMAP_STATS_INC(pmap_nzero_page_area);
1787	pa = VM_PAGE_TO_PHYS(m);
1788	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1789		PMAP_STATS_INC(pmap_nzero_page_area_c);
1790		va = TLB_PHYS_TO_DIRECT(pa);
1791		bzero((void *)(va + off), size);
1792	} else if (m->md.color == -1) {
1793		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1794		aszero(ASI_PHYS_USE_EC, pa + off, size);
1795	} else {
1796		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1797		PMAP_LOCK(kernel_pmap);
1798		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1799		tp = tsb_kvtotte(va);
1800		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1801		tp->tte_vpn = TV_VPN(va, TS_8K);
1802		bzero((void *)(va + off), size);
1803		tlb_page_demap(kernel_pmap, va);
1804		PMAP_UNLOCK(kernel_pmap);
1805	}
1806}
1807
1808void
1809pmap_zero_page_idle(vm_page_t m)
1810{
1811	struct tte *tp;
1812	vm_offset_t va;
1813	vm_paddr_t pa;
1814
1815	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1816	    ("pmap_zero_page_idle: fake page"));
1817	PMAP_STATS_INC(pmap_nzero_page_idle);
1818	pa = VM_PAGE_TO_PHYS(m);
1819	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1820		PMAP_STATS_INC(pmap_nzero_page_idle_c);
1821		va = TLB_PHYS_TO_DIRECT(pa);
1822		cpu_block_zero((void *)va, PAGE_SIZE);
1823	} else if (m->md.color == -1) {
1824		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1825		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1826	} else {
1827		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1828		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1829		tp = tsb_kvtotte(va);
1830		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1831		tp->tte_vpn = TV_VPN(va, TS_8K);
1832		cpu_block_zero((void *)va, PAGE_SIZE);
1833		tlb_page_demap(kernel_pmap, va);
1834	}
1835}
1836
1837void
1838pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1839{
1840	vm_offset_t vdst;
1841	vm_offset_t vsrc;
1842	vm_paddr_t pdst;
1843	vm_paddr_t psrc;
1844	struct tte *tp;
1845
1846	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1847	    ("pmap_copy_page: fake dst page"));
1848	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1849	    ("pmap_copy_page: fake src page"));
1850	PMAP_STATS_INC(pmap_ncopy_page);
1851	pdst = VM_PAGE_TO_PHYS(mdst);
1852	psrc = VM_PAGE_TO_PHYS(msrc);
1853	if (dcache_color_ignore != 0 ||
1854	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1855	    mdst->md.color == DCACHE_COLOR(pdst))) {
1856		PMAP_STATS_INC(pmap_ncopy_page_c);
1857		vdst = TLB_PHYS_TO_DIRECT(pdst);
1858		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1859		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1860	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1861		PMAP_STATS_INC(pmap_ncopy_page_nc);
1862		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1863	} else if (msrc->md.color == -1) {
1864		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1865			PMAP_STATS_INC(pmap_ncopy_page_dc);
1866			vdst = TLB_PHYS_TO_DIRECT(pdst);
1867			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1868			    PAGE_SIZE);
1869		} else {
1870			PMAP_STATS_INC(pmap_ncopy_page_doc);
1871			PMAP_LOCK(kernel_pmap);
1872			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1873			tp = tsb_kvtotte(vdst);
1874			tp->tte_data =
1875			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1876			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1877			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1878			    PAGE_SIZE);
1879			tlb_page_demap(kernel_pmap, vdst);
1880			PMAP_UNLOCK(kernel_pmap);
1881		}
1882	} else if (mdst->md.color == -1) {
1883		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1884			PMAP_STATS_INC(pmap_ncopy_page_sc);
1885			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1886			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1887			    PAGE_SIZE);
1888		} else {
1889			PMAP_STATS_INC(pmap_ncopy_page_soc);
1890			PMAP_LOCK(kernel_pmap);
1891			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1892			tp = tsb_kvtotte(vsrc);
1893			tp->tte_data =
1894			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1895			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1896			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1897			    PAGE_SIZE);
1898			tlb_page_demap(kernel_pmap, vsrc);
1899			PMAP_UNLOCK(kernel_pmap);
1900		}
1901	} else {
1902		PMAP_STATS_INC(pmap_ncopy_page_oc);
1903		PMAP_LOCK(kernel_pmap);
1904		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1905		tp = tsb_kvtotte(vdst);
1906		tp->tte_data =
1907		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1908		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1909		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1910		tp = tsb_kvtotte(vsrc);
1911		tp->tte_data =
1912		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1913		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1914		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1915		tlb_page_demap(kernel_pmap, vdst);
1916		tlb_page_demap(kernel_pmap, vsrc);
1917		PMAP_UNLOCK(kernel_pmap);
1918	}
1919}
1920
1921/*
1922 * Returns true if the pmap's pv is one of the first
1923 * 16 pvs linked to from this page.  This count may
1924 * be changed upwards or downwards in the future; it
1925 * is only necessary that true be returned for a small
1926 * subset of pmaps for proper page aging.
1927 */
1928boolean_t
1929pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1930{
1931	struct tte *tp;
1932	int loops;
1933	boolean_t rv;
1934
1935	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1936	    ("pmap_page_exists_quick: page %p is not managed", m));
1937	loops = 0;
1938	rv = FALSE;
1939	rw_wlock(&tte_list_global_lock);
1940	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1941		if ((tp->tte_data & TD_PV) == 0)
1942			continue;
1943		if (TTE_GET_PMAP(tp) == pm) {
1944			rv = TRUE;
1945			break;
1946		}
1947		if (++loops >= 16)
1948			break;
1949	}
1950	rw_wunlock(&tte_list_global_lock);
1951	return (rv);
1952}
1953
1954/*
1955 * Return the number of managed mappings to the given physical page
1956 * that are wired.
1957 */
1958int
1959pmap_page_wired_mappings(vm_page_t m)
1960{
1961	struct tte *tp;
1962	int count;
1963
1964	count = 0;
1965	if ((m->oflags & VPO_UNMANAGED) != 0)
1966		return (count);
1967	rw_wlock(&tte_list_global_lock);
1968	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1969		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1970			count++;
1971	rw_wunlock(&tte_list_global_lock);
1972	return (count);
1973}
1974
1975/*
1976 * Remove all pages from specified address space, this aids process exit
1977 * speeds.  This is much faster than pmap_remove in the case of running down
1978 * an entire address space.  Only works for the current pmap.
1979 */
1980void
1981pmap_remove_pages(pmap_t pm)
1982{
1983
1984}
1985
1986/*
1987 * Returns TRUE if the given page has a managed mapping.
1988 */
1989boolean_t
1990pmap_page_is_mapped(vm_page_t m)
1991{
1992	struct tte *tp;
1993	boolean_t rv;
1994
1995	rv = FALSE;
1996	if ((m->oflags & VPO_UNMANAGED) != 0)
1997		return (rv);
1998	rw_wlock(&tte_list_global_lock);
1999	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2000		if ((tp->tte_data & TD_PV) != 0) {
2001			rv = TRUE;
2002			break;
2003		}
2004	rw_wunlock(&tte_list_global_lock);
2005	return (rv);
2006}
2007
2008/*
2009 * Return a count of reference bits for a page, clearing those bits.
2010 * It is not necessary for every reference bit to be cleared, but it
2011 * is necessary that 0 only be returned when there are truly no
2012 * reference bits set.
2013 *
2014 * XXX: The exact number of bits to check and clear is a matter that
2015 * should be tested and standardized at some point in the future for
2016 * optimal aging of shared pages.
2017 */
2018int
2019pmap_ts_referenced(vm_page_t m)
2020{
2021	struct tte *tpf;
2022	struct tte *tpn;
2023	struct tte *tp;
2024	u_long data;
2025	int count;
2026
2027	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2028	    ("pmap_ts_referenced: page %p is not managed", m));
2029	count = 0;
2030	rw_wlock(&tte_list_global_lock);
2031	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2032		tpf = tp;
2033		do {
2034			tpn = TAILQ_NEXT(tp, tte_link);
2035			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2036			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2037			if ((tp->tte_data & TD_PV) == 0)
2038				continue;
2039			data = atomic_clear_long(&tp->tte_data, TD_REF);
2040			if ((data & TD_REF) != 0 && ++count > 4)
2041				break;
2042		} while ((tp = tpn) != NULL && tp != tpf);
2043	}
2044	rw_wunlock(&tte_list_global_lock);
2045	return (count);
2046}
2047
2048boolean_t
2049pmap_is_modified(vm_page_t m)
2050{
2051	struct tte *tp;
2052	boolean_t rv;
2053
2054	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2055	    ("pmap_is_modified: page %p is not managed", m));
2056	rv = FALSE;
2057
2058	/*
2059	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
2060	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2061	 * is clear, no TTEs can have TD_W set.
2062	 */
2063	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2064	if ((m->oflags & VPO_BUSY) == 0 &&
2065	    (m->aflags & PGA_WRITEABLE) == 0)
2066		return (rv);
2067	rw_wlock(&tte_list_global_lock);
2068	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2069		if ((tp->tte_data & TD_PV) == 0)
2070			continue;
2071		if ((tp->tte_data & TD_W) != 0) {
2072			rv = TRUE;
2073			break;
2074		}
2075	}
2076	rw_wunlock(&tte_list_global_lock);
2077	return (rv);
2078}
2079
2080/*
2081 *	pmap_is_prefaultable:
2082 *
2083 *	Return whether or not the specified virtual address is elgible
2084 *	for prefault.
2085 */
2086boolean_t
2087pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2088{
2089	boolean_t rv;
2090
2091	PMAP_LOCK(pmap);
2092	rv = tsb_tte_lookup(pmap, addr) == NULL;
2093	PMAP_UNLOCK(pmap);
2094	return (rv);
2095}
2096
2097/*
2098 * Return whether or not the specified physical page was referenced
2099 * in any physical maps.
2100 */
2101boolean_t
2102pmap_is_referenced(vm_page_t m)
2103{
2104	struct tte *tp;
2105	boolean_t rv;
2106
2107	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2108	    ("pmap_is_referenced: page %p is not managed", m));
2109	rv = FALSE;
2110	rw_wlock(&tte_list_global_lock);
2111	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2112		if ((tp->tte_data & TD_PV) == 0)
2113			continue;
2114		if ((tp->tte_data & TD_REF) != 0) {
2115			rv = TRUE;
2116			break;
2117		}
2118	}
2119	rw_wunlock(&tte_list_global_lock);
2120	return (rv);
2121}
2122
2123void
2124pmap_clear_modify(vm_page_t m)
2125{
2126	struct tte *tp;
2127	u_long data;
2128
2129	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2130	    ("pmap_clear_modify: page %p is not managed", m));
2131	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2132	KASSERT((m->oflags & VPO_BUSY) == 0,
2133	    ("pmap_clear_modify: page %p is busy", m));
2134
2135	/*
2136	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2137	 * If the object containing the page is locked and the page is not
2138	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
2139	 */
2140	if ((m->aflags & PGA_WRITEABLE) == 0)
2141		return;
2142	rw_wlock(&tte_list_global_lock);
2143	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2144		if ((tp->tte_data & TD_PV) == 0)
2145			continue;
2146		data = atomic_clear_long(&tp->tte_data, TD_W);
2147		if ((data & TD_W) != 0)
2148			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2149	}
2150	rw_wunlock(&tte_list_global_lock);
2151}
2152
2153void
2154pmap_clear_reference(vm_page_t m)
2155{
2156	struct tte *tp;
2157	u_long data;
2158
2159	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2160	    ("pmap_clear_reference: page %p is not managed", m));
2161	rw_wlock(&tte_list_global_lock);
2162	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2163		if ((tp->tte_data & TD_PV) == 0)
2164			continue;
2165		data = atomic_clear_long(&tp->tte_data, TD_REF);
2166		if ((data & TD_REF) != 0)
2167			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2168	}
2169	rw_wunlock(&tte_list_global_lock);
2170}
2171
2172void
2173pmap_remove_write(vm_page_t m)
2174{
2175	struct tte *tp;
2176	u_long data;
2177
2178	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2179	    ("pmap_remove_write: page %p is not managed", m));
2180
2181	/*
2182	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
2183	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
2184	 * is clear, no page table entries need updating.
2185	 */
2186	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2187	if ((m->oflags & VPO_BUSY) == 0 &&
2188	    (m->aflags & PGA_WRITEABLE) == 0)
2189		return;
2190	rw_wlock(&tte_list_global_lock);
2191	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2192		if ((tp->tte_data & TD_PV) == 0)
2193			continue;
2194		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2195		if ((data & TD_W) != 0) {
2196			vm_page_dirty(m);
2197			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2198		}
2199	}
2200	vm_page_aflag_clear(m, PGA_WRITEABLE);
2201	rw_wunlock(&tte_list_global_lock);
2202}
2203
2204int
2205pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2206{
2207
2208	/* TODO; */
2209	return (0);
2210}
2211
2212/*
2213 * Activate a user pmap.  The pmap must be activated before its address space
2214 * can be accessed in any way.
2215 */
2216void
2217pmap_activate(struct thread *td)
2218{
2219	struct vmspace *vm;
2220	struct pmap *pm;
2221	int context;
2222
2223	critical_enter();
2224	vm = td->td_proc->p_vmspace;
2225	pm = vmspace_pmap(vm);
2226
2227	context = PCPU_GET(tlb_ctx);
2228	if (context == PCPU_GET(tlb_ctx_max)) {
2229		tlb_flush_user();
2230		context = PCPU_GET(tlb_ctx_min);
2231	}
2232	PCPU_SET(tlb_ctx, context + 1);
2233
2234	pm->pm_context[curcpu] = context;
2235#ifdef SMP
2236	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2237	atomic_store_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2238#else
2239	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2240	PCPU_SET(pmap, pm);
2241#endif
2242
2243	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2244	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2245	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2246	    TLB_CXR_PGSZ_MASK) | context);
2247	flush(KERNBASE);
2248	critical_exit();
2249}
2250
2251void
2252pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2253{
2254
2255}
2256
2257/*
2258 * Increase the starting virtual address of the given mapping if a
2259 * different alignment might result in more superpage mappings.
2260 */
2261void
2262pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2263    vm_offset_t *addr, vm_size_t size)
2264{
2265
2266}
2267