pmap.c revision 220939
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/pmap.c 220939 2011-04-22 09:31:40Z marius $");
42
43/*
44 * Manages physical address maps.
45 *
46 * In addition to hardware address maps, this module is called upon to
47 * provide software-use-only maps which may or may not be stored in the
48 * same form as hardware maps.  These pseudo-maps are used to store
49 * intermediate results from copy operations to and from address spaces.
50 *
51 * Since the information managed by this module is also stored by the
52 * logical address mapping module, this module may throw away valid virtual
53 * to physical mappings at almost any time.  However, invalidations of
54 * mappings must be done as requested.
55 *
56 * In order to cope with hardware architectures which make virtual to
57 * physical map invalidates expensive, this module may delay invalidate
58 * reduced protection operations until such time as they are actually
59 * necessary.  This module is given full information as to which processors
60 * are currently using which maps, and to when physical maps must be made
61 * correct.
62 */
63
64#include "opt_kstack_pages.h"
65#include "opt_pmap.h"
66
67#include <sys/param.h>
68#include <sys/kernel.h>
69#include <sys/ktr.h>
70#include <sys/lock.h>
71#include <sys/msgbuf.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/smp.h>
75#include <sys/sysctl.h>
76#include <sys/systm.h>
77#include <sys/vmmeter.h>
78
79#include <dev/ofw/openfirm.h>
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_page.h>
85#include <vm/vm_map.h>
86#include <vm/vm_object.h>
87#include <vm/vm_extern.h>
88#include <vm/vm_pageout.h>
89#include <vm/vm_pager.h>
90
91#include <machine/cache.h>
92#include <machine/frame.h>
93#include <machine/instr.h>
94#include <machine/md_var.h>
95#include <machine/metadata.h>
96#include <machine/ofw_mem.h>
97#include <machine/smp.h>
98#include <machine/tlb.h>
99#include <machine/tte.h>
100#include <machine/tsb.h>
101#include <machine/ver.h>
102
103#define	PMAP_DEBUG
104
105#ifndef	PMAP_SHPGPERPROC
106#define	PMAP_SHPGPERPROC	200
107#endif
108
109/* XXX */
110#include "opt_sched.h"
111#ifndef SCHED_4BSD
112#error "sparc64 only works with SCHED_4BSD which uses a global scheduler lock."
113#endif
114extern struct mtx sched_lock;
115
116/*
117 * Virtual address of message buffer
118 */
119struct msgbuf *msgbufp;
120
121/*
122 * Map of physical memory reagions
123 */
124vm_paddr_t phys_avail[128];
125static struct ofw_mem_region mra[128];
126struct ofw_mem_region sparc64_memreg[128];
127int sparc64_nmemreg;
128static struct ofw_map translations[128];
129static int translations_size;
130
131static vm_offset_t pmap_idle_map;
132static vm_offset_t pmap_temp_map_1;
133static vm_offset_t pmap_temp_map_2;
134
135/*
136 * First and last available kernel virtual addresses
137 */
138vm_offset_t virtual_avail;
139vm_offset_t virtual_end;
140vm_offset_t kernel_vm_end;
141
142vm_offset_t vm_max_kernel_address;
143
144/*
145 * Kernel pmap
146 */
147struct pmap kernel_pmap_store;
148
149/*
150 * Allocate physical memory for use in pmap_bootstrap.
151 */
152static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
153
154static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
155
156/*
157 * Map the given physical page at the specified virtual address in the
158 * target pmap with the protection requested.  If specified the page
159 * will be wired down.
160 *
161 * The page queues and pmap must be locked.
162 */
163static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
164    vm_prot_t prot, boolean_t wired);
165
166extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
167extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
168extern int tl1_dmmu_miss_patch_asi_1[];
169extern int tl1_dmmu_miss_patch_quad_ldd_1[];
170extern int tl1_dmmu_miss_patch_tsb_1[];
171extern int tl1_dmmu_miss_patch_tsb_2[];
172extern int tl1_dmmu_miss_patch_tsb_mask_1[];
173extern int tl1_dmmu_miss_patch_tsb_mask_2[];
174extern int tl1_dmmu_prot_patch_asi_1[];
175extern int tl1_dmmu_prot_patch_quad_ldd_1[];
176extern int tl1_dmmu_prot_patch_tsb_1[];
177extern int tl1_dmmu_prot_patch_tsb_2[];
178extern int tl1_dmmu_prot_patch_tsb_mask_1[];
179extern int tl1_dmmu_prot_patch_tsb_mask_2[];
180extern int tl1_immu_miss_patch_asi_1[];
181extern int tl1_immu_miss_patch_quad_ldd_1[];
182extern int tl1_immu_miss_patch_tsb_1[];
183extern int tl1_immu_miss_patch_tsb_2[];
184extern int tl1_immu_miss_patch_tsb_mask_1[];
185extern int tl1_immu_miss_patch_tsb_mask_2[];
186
187/*
188 * If user pmap is processed with pmap_remove and with pmap_remove and the
189 * resident count drops to 0, there are no more pages to remove, so we
190 * need not continue.
191 */
192#define	PMAP_REMOVE_DONE(pm) \
193	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
194
195/*
196 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
197 * and pmap_protect() instead of trying each virtual address.
198 */
199#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
200
201SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
202
203PMAP_STATS_VAR(pmap_nenter);
204PMAP_STATS_VAR(pmap_nenter_update);
205PMAP_STATS_VAR(pmap_nenter_replace);
206PMAP_STATS_VAR(pmap_nenter_new);
207PMAP_STATS_VAR(pmap_nkenter);
208PMAP_STATS_VAR(pmap_nkenter_oc);
209PMAP_STATS_VAR(pmap_nkenter_stupid);
210PMAP_STATS_VAR(pmap_nkremove);
211PMAP_STATS_VAR(pmap_nqenter);
212PMAP_STATS_VAR(pmap_nqremove);
213PMAP_STATS_VAR(pmap_ncache_enter);
214PMAP_STATS_VAR(pmap_ncache_enter_c);
215PMAP_STATS_VAR(pmap_ncache_enter_oc);
216PMAP_STATS_VAR(pmap_ncache_enter_cc);
217PMAP_STATS_VAR(pmap_ncache_enter_coc);
218PMAP_STATS_VAR(pmap_ncache_enter_nc);
219PMAP_STATS_VAR(pmap_ncache_enter_cnc);
220PMAP_STATS_VAR(pmap_ncache_remove);
221PMAP_STATS_VAR(pmap_ncache_remove_c);
222PMAP_STATS_VAR(pmap_ncache_remove_oc);
223PMAP_STATS_VAR(pmap_ncache_remove_cc);
224PMAP_STATS_VAR(pmap_ncache_remove_coc);
225PMAP_STATS_VAR(pmap_ncache_remove_nc);
226PMAP_STATS_VAR(pmap_nzero_page);
227PMAP_STATS_VAR(pmap_nzero_page_c);
228PMAP_STATS_VAR(pmap_nzero_page_oc);
229PMAP_STATS_VAR(pmap_nzero_page_nc);
230PMAP_STATS_VAR(pmap_nzero_page_area);
231PMAP_STATS_VAR(pmap_nzero_page_area_c);
232PMAP_STATS_VAR(pmap_nzero_page_area_oc);
233PMAP_STATS_VAR(pmap_nzero_page_area_nc);
234PMAP_STATS_VAR(pmap_nzero_page_idle);
235PMAP_STATS_VAR(pmap_nzero_page_idle_c);
236PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
237PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
238PMAP_STATS_VAR(pmap_ncopy_page);
239PMAP_STATS_VAR(pmap_ncopy_page_c);
240PMAP_STATS_VAR(pmap_ncopy_page_oc);
241PMAP_STATS_VAR(pmap_ncopy_page_nc);
242PMAP_STATS_VAR(pmap_ncopy_page_dc);
243PMAP_STATS_VAR(pmap_ncopy_page_doc);
244PMAP_STATS_VAR(pmap_ncopy_page_sc);
245PMAP_STATS_VAR(pmap_ncopy_page_soc);
246
247PMAP_STATS_VAR(pmap_nnew_thread);
248PMAP_STATS_VAR(pmap_nnew_thread_oc);
249
250static inline u_long dtlb_get_data(u_int slot);
251
252/*
253 * Quick sort callout for comparing memory regions
254 */
255static int mr_cmp(const void *a, const void *b);
256static int om_cmp(const void *a, const void *b);
257
258static int
259mr_cmp(const void *a, const void *b)
260{
261	const struct ofw_mem_region *mra;
262	const struct ofw_mem_region *mrb;
263
264	mra = a;
265	mrb = b;
266	if (mra->mr_start < mrb->mr_start)
267		return (-1);
268	else if (mra->mr_start > mrb->mr_start)
269		return (1);
270	else
271		return (0);
272}
273
274static int
275om_cmp(const void *a, const void *b)
276{
277	const struct ofw_map *oma;
278	const struct ofw_map *omb;
279
280	oma = a;
281	omb = b;
282	if (oma->om_start < omb->om_start)
283		return (-1);
284	else if (oma->om_start > omb->om_start)
285		return (1);
286	else
287		return (0);
288}
289
290static inline u_long
291dtlb_get_data(u_int slot)
292{
293
294	/*
295	 * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work
296	 * around errata of USIII and beyond.
297	 */
298	(void)ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG);
299	return (ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG));
300}
301
302/*
303 * Bootstrap the system enough to run with virtual memory.
304 */
305void
306pmap_bootstrap(u_int cpu_impl)
307{
308	struct pmap *pm;
309	struct tte *tp;
310	vm_offset_t off;
311	vm_offset_t va;
312	vm_paddr_t pa;
313	vm_size_t physsz;
314	vm_size_t virtsz;
315	u_long data;
316	u_long vpn;
317	phandle_t pmem;
318	phandle_t vmem;
319	u_int dtlb_slots_avail;
320	int i;
321	int j;
322	int sz;
323	uint32_t asi;
324	uint32_t colors;
325	uint32_t ldd;
326
327	/*
328	 * Set the kernel context.
329	 */
330	pmap_set_kctx();
331
332	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
333
334	/*
335	 * Find out what physical memory is available from the PROM and
336	 * initialize the phys_avail array.  This must be done before
337	 * pmap_bootstrap_alloc is called.
338	 */
339	if ((pmem = OF_finddevice("/memory")) == -1)
340		panic("pmap_bootstrap: finddevice /memory");
341	if ((sz = OF_getproplen(pmem, "available")) == -1)
342		panic("pmap_bootstrap: getproplen /memory/available");
343	if (sizeof(phys_avail) < sz)
344		panic("pmap_bootstrap: phys_avail too small");
345	if (sizeof(mra) < sz)
346		panic("pmap_bootstrap: mra too small");
347	bzero(mra, sz);
348	if (OF_getprop(pmem, "available", mra, sz) == -1)
349		panic("pmap_bootstrap: getprop /memory/available");
350	sz /= sizeof(*mra);
351	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
352	qsort(mra, sz, sizeof (*mra), mr_cmp);
353	physsz = 0;
354	getenv_quad("hw.physmem", &physmem);
355	physmem = btoc(physmem);
356	for (i = 0, j = 0; i < sz; i++, j += 2) {
357		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
358		    mra[i].mr_size);
359		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
360			if (btoc(physsz) < physmem) {
361				phys_avail[j] = mra[i].mr_start;
362				phys_avail[j + 1] = mra[i].mr_start +
363				    (ctob(physmem) - physsz);
364				physsz = ctob(physmem);
365			}
366			break;
367		}
368		phys_avail[j] = mra[i].mr_start;
369		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
370		physsz += mra[i].mr_size;
371	}
372	physmem = btoc(physsz);
373
374	/*
375	 * Calculate the size of kernel virtual memory, and the size and mask
376	 * for the kernel TSB based on the phsyical memory size but limited
377	 * by the amount of dTLB slots available for locked entries if we have
378	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
379	 * of the dt64 slots can hold locked entries but there is no large
380	 * dTLB for unlocked ones, we don't use more than half of it for the
381	 * TSB).
382	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
383	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
384	 * public documentation is available for these, the latter just might
385	 * not support it, yet.
386	 */
387	virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
388	if (cpu_impl == CPU_IMPL_SPARC64V ||
389	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp)
390		tsb_kernel_ldd_phys = 1;
391	else {
392		dtlb_slots_avail = 0;
393		for (i = 0; i < dtlb_slots; i++) {
394			data = dtlb_get_data(i);
395			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
396				dtlb_slots_avail++;
397		}
398#ifdef SMP
399		dtlb_slots_avail -= PCPU_PAGES;
400#endif
401		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
402		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
403			dtlb_slots_avail /= 2;
404		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
405		    (PAGE_SHIFT - TTE_SHIFT));
406	}
407	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
408	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
409	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
410
411	/*
412	 * Allocate the kernel TSB and lock it in the TLB if necessary.
413	 */
414	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
415	if (pa & PAGE_MASK_4M)
416		panic("pmap_bootstrap: TSB unaligned\n");
417	tsb_kernel_phys = pa;
418	if (tsb_kernel_ldd_phys == 0) {
419		tsb_kernel =
420		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
421		pmap_map_tsb();
422		bzero(tsb_kernel, tsb_kernel_size);
423	} else {
424		tsb_kernel =
425		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
426		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
427	}
428
429	/*
430	 * Allocate and map the dynamic per-CPU area for the BSP.
431	 */
432	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
433	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
434
435	/*
436	 * Allocate and map the message buffer.
437	 */
438	pa = pmap_bootstrap_alloc(msgbufsize, colors);
439	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
440
441	/*
442	 * Patch the TSB addresses and mask as well as the ASIs used to load
443	 * it into the trap table.
444	 */
445
446#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
447	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
448	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
449	    EIF_F3_RS2(rs2))
450#define	OR_R_I_R(rd, imm13, rs1)					\
451	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
452	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
453#define	SETHI(rd, imm22)						\
454	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
455	    EIF_IMM((imm22) >> 10, 22))
456#define	WR_R_I(rd, imm13, rs1)						\
457	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
458	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459
460#define	PATCH_ASI(addr, asi) do {					\
461	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
462	    IF_F3_RS1(addr[0])))					\
463		panic("%s: patched instructions have changed",		\
464		    __func__);						\
465	addr[0] |= EIF_IMM((asi), 13);					\
466	flush(addr);							\
467} while (0)
468
469#define	PATCH_LDD(addr, asi) do {					\
470	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
471	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
472		panic("%s: patched instructions have changed",		\
473		    __func__);						\
474	addr[0] |= EIF_F3_IMM_ASI(asi);					\
475	flush(addr);							\
476} while (0)
477
478#define	PATCH_TSB(addr, val) do {					\
479	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
480	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
481	    IF_F3_RS1(addr[1]))	||					\
482	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
483		panic("%s: patched instructions have changed",		\
484		    __func__);						\
485	addr[0] |= EIF_IMM((val) >> 42, 22);				\
486	addr[1] |= EIF_IMM((val) >> 32, 10);				\
487	addr[3] |= EIF_IMM((val) >> 10, 22);				\
488	flush(addr);							\
489	flush(addr + 1);						\
490	flush(addr + 3);						\
491} while (0)
492
493#define	PATCH_TSB_MASK(addr, val) do {					\
494	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
495	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
496	    IF_F3_RS1(addr[1])))					\
497		panic("%s: patched instructions have changed",		\
498		    __func__);						\
499	addr[0] |= EIF_IMM((val) >> 10, 22);				\
500	addr[1] |= EIF_IMM((val), 10);					\
501	flush(addr);							\
502	flush(addr + 1);						\
503} while (0)
504
505	if (tsb_kernel_ldd_phys == 0) {
506		asi = ASI_N;
507		ldd = ASI_NUCLEUS_QUAD_LDD;
508		off = (vm_offset_t)tsb_kernel;
509	} else {
510		asi = ASI_PHYS_USE_EC;
511		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
512		off = (vm_offset_t)tsb_kernel_phys;
513	}
514	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
515	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
516	    tsb_kernel_phys + tsb_kernel_size - 1);
517	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
518	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
519	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
520	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
521	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
522	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
523	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
524	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
525	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
526	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
527	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
528	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
529	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
530	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
531	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
532	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
533	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
534	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
535
536	/*
537	 * Enter fake 8k pages for the 4MB kernel pages, so that
538	 * pmap_kextract() will work for them.
539	 */
540	for (i = 0; i < kernel_tlb_slots; i++) {
541		pa = kernel_tlbs[i].te_pa;
542		va = kernel_tlbs[i].te_va;
543		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
544			tp = tsb_kvtotte(va + off);
545			vpn = TV_VPN(va + off, TS_8K);
546			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
547			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
548			pmap_bootstrap_set_tte(tp, vpn, data);
549		}
550	}
551
552	/*
553	 * Set the start and end of KVA.  The kernel is loaded starting
554	 * at the first available 4MB super page, so we advance to the
555	 * end of the last one used for it.
556	 */
557	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
558	virtual_end = vm_max_kernel_address;
559	kernel_vm_end = vm_max_kernel_address;
560
561	/*
562	 * Allocate kva space for temporary mappings.
563	 */
564	pmap_idle_map = virtual_avail;
565	virtual_avail += PAGE_SIZE * colors;
566	pmap_temp_map_1 = virtual_avail;
567	virtual_avail += PAGE_SIZE * colors;
568	pmap_temp_map_2 = virtual_avail;
569	virtual_avail += PAGE_SIZE * colors;
570
571	/*
572	 * Allocate a kernel stack with guard page for thread0 and map it
573	 * into the kernel TSB.  We must ensure that the virtual address is
574	 * colored properly for corresponding CPUs, since we're allocating
575	 * from phys_avail so the memory won't have an associated vm_page_t.
576	 */
577	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
578	kstack0_phys = pa;
579	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
580	kstack0 = virtual_avail;
581	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
582	if (dcache_color_ignore == 0)
583		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
584		    ("pmap_bootstrap: kstack0 miscolored"));
585	for (i = 0; i < KSTACK_PAGES; i++) {
586		pa = kstack0_phys + i * PAGE_SIZE;
587		va = kstack0 + i * PAGE_SIZE;
588		tp = tsb_kvtotte(va);
589		vpn = TV_VPN(va, TS_8K);
590		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
591		    TD_CV | TD_P | TD_W;
592		pmap_bootstrap_set_tte(tp, vpn, data);
593	}
594
595	/*
596	 * Calculate the last available physical address.
597	 */
598	for (i = 0; phys_avail[i + 2] != 0; i += 2)
599		;
600	Maxmem = sparc64_btop(phys_avail[i + 1]);
601
602	/*
603	 * Add the PROM mappings to the kernel TSB.
604	 */
605	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
606		panic("pmap_bootstrap: finddevice /virtual-memory");
607	if ((sz = OF_getproplen(vmem, "translations")) == -1)
608		panic("pmap_bootstrap: getproplen translations");
609	if (sizeof(translations) < sz)
610		panic("pmap_bootstrap: translations too small");
611	bzero(translations, sz);
612	if (OF_getprop(vmem, "translations", translations, sz) == -1)
613		panic("pmap_bootstrap: getprop /virtual-memory/translations");
614	sz /= sizeof(*translations);
615	translations_size = sz;
616	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
617	qsort(translations, sz, sizeof (*translations), om_cmp);
618	for (i = 0; i < sz; i++) {
619		CTR3(KTR_PMAP,
620		    "translation: start=%#lx size=%#lx tte=%#lx",
621		    translations[i].om_start, translations[i].om_size,
622		    translations[i].om_tte);
623		if ((translations[i].om_tte & TD_V) == 0)
624			continue;
625		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
626		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
627			continue;
628		for (off = 0; off < translations[i].om_size;
629		    off += PAGE_SIZE) {
630			va = translations[i].om_start + off;
631			tp = tsb_kvtotte(va);
632			vpn = TV_VPN(va, TS_8K);
633			data = ((translations[i].om_tte &
634			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
635			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
636			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
637			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
638			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
639			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
640			    off;
641			pmap_bootstrap_set_tte(tp, vpn, data);
642		}
643	}
644
645	/*
646	 * Get the available physical memory ranges from /memory/reg.  These
647	 * are only used for kernel dumps, but it may not be wise to do PROM
648	 * calls in that situation.
649	 */
650	if ((sz = OF_getproplen(pmem, "reg")) == -1)
651		panic("pmap_bootstrap: getproplen /memory/reg");
652	if (sizeof(sparc64_memreg) < sz)
653		panic("pmap_bootstrap: sparc64_memreg too small");
654	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
655		panic("pmap_bootstrap: getprop /memory/reg");
656	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
657
658	/*
659	 * Initialize the kernel pmap (which is statically allocated).
660	 * NOTE: PMAP_LOCK_INIT() is needed as part of the initialization
661	 * but sparc64 start up is not ready to initialize mutexes yet.
662	 * It is called in machdep.c.
663	 */
664	pm = kernel_pmap;
665	for (i = 0; i < MAXCPU; i++)
666		pm->pm_context[i] = TLB_CTX_KERNEL;
667	pm->pm_active = ~0;
668
669	/*
670	 * Flush all non-locked TLB entries possibly left over by the
671	 * firmware.
672	 */
673	tlb_flush_nonlocked();
674}
675
676/*
677 * Map the 4MB kernel TSB pages.
678 */
679void
680pmap_map_tsb(void)
681{
682	vm_offset_t va;
683	vm_paddr_t pa;
684	u_long data;
685	int i;
686
687	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
688		va = (vm_offset_t)tsb_kernel + i;
689		pa = tsb_kernel_phys + i;
690		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
691		    TD_P | TD_W;
692		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
693		    TLB_TAR_CTX(TLB_CTX_KERNEL));
694		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
695	}
696}
697
698/*
699 * Set the secondary context to be the kernel context (needed for FP block
700 * operations in the kernel).
701 */
702void
703pmap_set_kctx(void)
704{
705
706	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
707	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
708	flush(KERNBASE);
709}
710
711/*
712 * Allocate a physical page of memory directly from the phys_avail map.
713 * Can only be called from pmap_bootstrap before avail start and end are
714 * calculated.
715 */
716static vm_paddr_t
717pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
718{
719	vm_paddr_t pa;
720	int i;
721
722	size = roundup(size, PAGE_SIZE * colors);
723	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
724		if (phys_avail[i + 1] - phys_avail[i] < size)
725			continue;
726		pa = phys_avail[i];
727		phys_avail[i] += size;
728		return (pa);
729	}
730	panic("pmap_bootstrap_alloc");
731}
732
733/*
734 * Set a TTE.  This function is intended as a helper when tsb_kernel is
735 * direct-mapped but we haven't taken over the trap table, yet, as it's the
736 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
737 * the kernel TSB.
738 */
739void
740pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
741{
742
743	if (tsb_kernel_ldd_phys == 0) {
744		tp->tte_vpn = vpn;
745		tp->tte_data = data;
746	} else {
747		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
748		    ASI_PHYS_USE_EC, vpn);
749		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
750		    ASI_PHYS_USE_EC, data);
751	}
752}
753
754/*
755 * Initialize a vm_page's machine-dependent fields.
756 */
757void
758pmap_page_init(vm_page_t m)
759{
760
761	TAILQ_INIT(&m->md.tte_list);
762	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
763	m->md.flags = 0;
764	m->md.pmap = NULL;
765}
766
767/*
768 * Initialize the pmap module.
769 */
770void
771pmap_init(void)
772{
773	vm_offset_t addr;
774	vm_size_t size;
775	int result;
776	int i;
777
778	for (i = 0; i < translations_size; i++) {
779		addr = translations[i].om_start;
780		size = translations[i].om_size;
781		if ((translations[i].om_tte & TD_V) == 0)
782			continue;
783		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
784			continue;
785		result = vm_map_find(kernel_map, NULL, 0, &addr, size,
786		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
787		if (result != KERN_SUCCESS || addr != translations[i].om_start)
788			panic("pmap_init: vm_map_find");
789	}
790}
791
792/*
793 * Extract the physical page address associated with the given
794 * map/virtual_address pair.
795 */
796vm_paddr_t
797pmap_extract(pmap_t pm, vm_offset_t va)
798{
799	struct tte *tp;
800	vm_paddr_t pa;
801
802	if (pm == kernel_pmap)
803		return (pmap_kextract(va));
804	PMAP_LOCK(pm);
805	tp = tsb_tte_lookup(pm, va);
806	if (tp == NULL)
807		pa = 0;
808	else
809		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
810	PMAP_UNLOCK(pm);
811	return (pa);
812}
813
814/*
815 * Atomically extract and hold the physical page with the given
816 * pmap and virtual address pair if that mapping permits the given
817 * protection.
818 */
819vm_page_t
820pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
821{
822	struct tte *tp;
823	vm_page_t m;
824	vm_paddr_t pa;
825
826	m = NULL;
827	pa = 0;
828	PMAP_LOCK(pm);
829retry:
830	if (pm == kernel_pmap) {
831		if (va >= VM_MIN_DIRECT_ADDRESS) {
832			tp = NULL;
833			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
834			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
835			    &pa);
836			vm_page_hold(m);
837		} else {
838			tp = tsb_kvtotte(va);
839			if ((tp->tte_data & TD_V) == 0)
840				tp = NULL;
841		}
842	} else
843		tp = tsb_tte_lookup(pm, va);
844	if (tp != NULL && ((tp->tte_data & TD_SW) ||
845	    (prot & VM_PROT_WRITE) == 0)) {
846		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
847			goto retry;
848		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
849		vm_page_hold(m);
850	}
851	PA_UNLOCK_COND(pa);
852	PMAP_UNLOCK(pm);
853	return (m);
854}
855
856/*
857 * Extract the physical page address associated with the given kernel virtual
858 * address.
859 */
860vm_paddr_t
861pmap_kextract(vm_offset_t va)
862{
863	struct tte *tp;
864
865	if (va >= VM_MIN_DIRECT_ADDRESS)
866		return (TLB_DIRECT_TO_PHYS(va));
867	tp = tsb_kvtotte(va);
868	if ((tp->tte_data & TD_V) == 0)
869		return (0);
870	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
871}
872
873int
874pmap_cache_enter(vm_page_t m, vm_offset_t va)
875{
876	struct tte *tp;
877	int color;
878
879	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
880	KASSERT((m->flags & PG_FICTITIOUS) == 0,
881	    ("pmap_cache_enter: fake page"));
882	PMAP_STATS_INC(pmap_ncache_enter);
883
884	if (dcache_color_ignore != 0)
885		return (1);
886
887	/*
888	 * Find the color for this virtual address and note the added mapping.
889	 */
890	color = DCACHE_COLOR(va);
891	m->md.colors[color]++;
892
893	/*
894	 * If all existing mappings have the same color, the mapping is
895	 * cacheable.
896	 */
897	if (m->md.color == color) {
898		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
899		    ("pmap_cache_enter: cacheable, mappings of other color"));
900		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
901			PMAP_STATS_INC(pmap_ncache_enter_c);
902		else
903			PMAP_STATS_INC(pmap_ncache_enter_oc);
904		return (1);
905	}
906
907	/*
908	 * If there are no mappings of the other color, and the page still has
909	 * the wrong color, this must be a new mapping.  Change the color to
910	 * match the new mapping, which is cacheable.  We must flush the page
911	 * from the cache now.
912	 */
913	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
914		KASSERT(m->md.colors[color] == 1,
915		    ("pmap_cache_enter: changing color, not new mapping"));
916		dcache_page_inval(VM_PAGE_TO_PHYS(m));
917		m->md.color = color;
918		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
919			PMAP_STATS_INC(pmap_ncache_enter_cc);
920		else
921			PMAP_STATS_INC(pmap_ncache_enter_coc);
922		return (1);
923	}
924
925	/*
926	 * If the mapping is already non-cacheable, just return.
927	 */
928	if (m->md.color == -1) {
929		PMAP_STATS_INC(pmap_ncache_enter_nc);
930		return (0);
931	}
932
933	PMAP_STATS_INC(pmap_ncache_enter_cnc);
934
935	/*
936	 * Mark all mappings as uncacheable, flush any lines with the other
937	 * color out of the dcache, and set the color to none (-1).
938	 */
939	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
940		atomic_clear_long(&tp->tte_data, TD_CV);
941		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
942	}
943	dcache_page_inval(VM_PAGE_TO_PHYS(m));
944	m->md.color = -1;
945	return (0);
946}
947
948void
949pmap_cache_remove(vm_page_t m, vm_offset_t va)
950{
951	struct tte *tp;
952	int color;
953
954	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
955	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
956	    m->md.colors[DCACHE_COLOR(va)]);
957	KASSERT((m->flags & PG_FICTITIOUS) == 0,
958	    ("pmap_cache_remove: fake page"));
959	PMAP_STATS_INC(pmap_ncache_remove);
960
961	if (dcache_color_ignore != 0)
962		return;
963
964	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
965	    ("pmap_cache_remove: no mappings %d <= 0",
966	    m->md.colors[DCACHE_COLOR(va)]));
967
968	/*
969	 * Find the color for this virtual address and note the removal of
970	 * the mapping.
971	 */
972	color = DCACHE_COLOR(va);
973	m->md.colors[color]--;
974
975	/*
976	 * If the page is cacheable, just return and keep the same color, even
977	 * if there are no longer any mappings.
978	 */
979	if (m->md.color != -1) {
980		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
981			PMAP_STATS_INC(pmap_ncache_remove_c);
982		else
983			PMAP_STATS_INC(pmap_ncache_remove_oc);
984		return;
985	}
986
987	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
988	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
989
990	/*
991	 * If the page is not cacheable (color is -1), and the number of
992	 * mappings for this color is not zero, just return.  There are
993	 * mappings of the other color still, so remain non-cacheable.
994	 */
995	if (m->md.colors[color] != 0) {
996		PMAP_STATS_INC(pmap_ncache_remove_nc);
997		return;
998	}
999
1000	/*
1001	 * The number of mappings for this color is now zero.  Recache the
1002	 * other colored mappings, and change the page color to the other
1003	 * color.  There should be no lines in the data cache for this page,
1004	 * so flushing should not be needed.
1005	 */
1006	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1007		atomic_set_long(&tp->tte_data, TD_CV);
1008		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1009	}
1010	m->md.color = DCACHE_OTHER_COLOR(color);
1011
1012	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1013		PMAP_STATS_INC(pmap_ncache_remove_cc);
1014	else
1015		PMAP_STATS_INC(pmap_ncache_remove_coc);
1016}
1017
1018/*
1019 * Map a wired page into kernel virtual address space.
1020 */
1021void
1022pmap_kenter(vm_offset_t va, vm_page_t m)
1023{
1024	vm_offset_t ova;
1025	struct tte *tp;
1026	vm_page_t om;
1027	u_long data;
1028
1029	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1030	PMAP_STATS_INC(pmap_nkenter);
1031	tp = tsb_kvtotte(va);
1032	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1033	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1034	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1035		CTR5(KTR_SPARE2,
1036	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1037		    va, VM_PAGE_TO_PHYS(m), m->object,
1038		    m->object ? m->object->type : -1,
1039		    m->pindex);
1040		PMAP_STATS_INC(pmap_nkenter_oc);
1041	}
1042	if ((tp->tte_data & TD_V) != 0) {
1043		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1044		ova = TTE_GET_VA(tp);
1045		if (m == om && va == ova) {
1046			PMAP_STATS_INC(pmap_nkenter_stupid);
1047			return;
1048		}
1049		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1050		pmap_cache_remove(om, ova);
1051		if (va != ova)
1052			tlb_page_demap(kernel_pmap, ova);
1053	}
1054	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1055	    TD_P | TD_W;
1056	if (pmap_cache_enter(m, va) != 0)
1057		data |= TD_CV;
1058	tp->tte_vpn = TV_VPN(va, TS_8K);
1059	tp->tte_data = data;
1060	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1061}
1062
1063/*
1064 * Map a wired page into kernel virtual address space.  This additionally
1065 * takes a flag argument which is or'ed to the TTE data.  This is used by
1066 * sparc64_bus_mem_map().
1067 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1068 * to flush entries that might still be in the cache, if applicable.
1069 */
1070void
1071pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1072{
1073	struct tte *tp;
1074
1075	tp = tsb_kvtotte(va);
1076	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1077	    va, pa, tp, tp->tte_data);
1078	tp->tte_vpn = TV_VPN(va, TS_8K);
1079	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1080}
1081
1082/*
1083 * Remove a wired page from kernel virtual address space.
1084 */
1085void
1086pmap_kremove(vm_offset_t va)
1087{
1088	struct tte *tp;
1089	vm_page_t m;
1090
1091	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1092	PMAP_STATS_INC(pmap_nkremove);
1093	tp = tsb_kvtotte(va);
1094	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1095	    tp->tte_data);
1096	if ((tp->tte_data & TD_V) == 0)
1097		return;
1098	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1099	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1100	pmap_cache_remove(m, va);
1101	TTE_ZERO(tp);
1102}
1103
1104/*
1105 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1106 */
1107void
1108pmap_kremove_flags(vm_offset_t va)
1109{
1110	struct tte *tp;
1111
1112	tp = tsb_kvtotte(va);
1113	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1114	    tp->tte_data);
1115	TTE_ZERO(tp);
1116}
1117
1118/*
1119 * Map a range of physical addresses into kernel virtual address space.
1120 *
1121 * The value passed in *virt is a suggested virtual address for the mapping.
1122 * Architectures which can support a direct-mapped physical to virtual region
1123 * can return the appropriate address within that region, leaving '*virt'
1124 * unchanged.
1125 */
1126vm_offset_t
1127pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1128{
1129
1130	return (TLB_PHYS_TO_DIRECT(start));
1131}
1132
1133/*
1134 * Map a list of wired pages into kernel virtual address space.  This is
1135 * intended for temporary mappings which do not need page modification or
1136 * references recorded.  Existing mappings in the region are overwritten.
1137 */
1138void
1139pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1140{
1141	vm_offset_t va;
1142	int locked;
1143
1144	PMAP_STATS_INC(pmap_nqenter);
1145	va = sva;
1146	if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1147		vm_page_lock_queues();
1148	while (count-- > 0) {
1149		pmap_kenter(va, *m);
1150		va += PAGE_SIZE;
1151		m++;
1152	}
1153	if (!locked)
1154		vm_page_unlock_queues();
1155	tlb_range_demap(kernel_pmap, sva, va);
1156}
1157
1158/*
1159 * Remove page mappings from kernel virtual address space.  Intended for
1160 * temporary mappings entered by pmap_qenter.
1161 */
1162void
1163pmap_qremove(vm_offset_t sva, int count)
1164{
1165	vm_offset_t va;
1166	int locked;
1167
1168	PMAP_STATS_INC(pmap_nqremove);
1169	va = sva;
1170	if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1171		vm_page_lock_queues();
1172	while (count-- > 0) {
1173		pmap_kremove(va);
1174		va += PAGE_SIZE;
1175	}
1176	if (!locked)
1177		vm_page_unlock_queues();
1178	tlb_range_demap(kernel_pmap, sva, va);
1179}
1180
1181/*
1182 * Initialize the pmap associated with process 0.
1183 */
1184void
1185pmap_pinit0(pmap_t pm)
1186{
1187	int i;
1188
1189	PMAP_LOCK_INIT(pm);
1190	for (i = 0; i < MAXCPU; i++)
1191		pm->pm_context[i] = TLB_CTX_KERNEL;
1192	pm->pm_active = 0;
1193	pm->pm_tsb = NULL;
1194	pm->pm_tsb_obj = NULL;
1195	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1196}
1197
1198/*
1199 * Initialize a preallocated and zeroed pmap structure, such as one in a
1200 * vmspace structure.
1201 */
1202int
1203pmap_pinit(pmap_t pm)
1204{
1205	vm_page_t ma[TSB_PAGES];
1206	vm_page_t m;
1207	int i;
1208
1209	PMAP_LOCK_INIT(pm);
1210
1211	/*
1212	 * Allocate KVA space for the TSB.
1213	 */
1214	if (pm->pm_tsb == NULL) {
1215		pm->pm_tsb = (struct tte *)kmem_alloc_nofault(kernel_map,
1216		    TSB_BSIZE);
1217		if (pm->pm_tsb == NULL) {
1218			PMAP_LOCK_DESTROY(pm);
1219			return (0);
1220		}
1221	}
1222
1223	/*
1224	 * Allocate an object for it.
1225	 */
1226	if (pm->pm_tsb_obj == NULL)
1227		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1228
1229	mtx_lock_spin(&sched_lock);
1230	for (i = 0; i < MAXCPU; i++)
1231		pm->pm_context[i] = -1;
1232	pm->pm_active = 0;
1233	mtx_unlock_spin(&sched_lock);
1234
1235	VM_OBJECT_LOCK(pm->pm_tsb_obj);
1236	for (i = 0; i < TSB_PAGES; i++) {
1237		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1238		    VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1239		m->valid = VM_PAGE_BITS_ALL;
1240		m->md.pmap = pm;
1241		ma[i] = m;
1242	}
1243	VM_OBJECT_UNLOCK(pm->pm_tsb_obj);
1244	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1245
1246	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1247	return (1);
1248}
1249
1250/*
1251 * Release any resources held by the given physical map.
1252 * Called when a pmap initialized by pmap_pinit is being released.
1253 * Should only be called if the map contains no valid mappings.
1254 */
1255void
1256pmap_release(pmap_t pm)
1257{
1258	vm_object_t obj;
1259	vm_page_t m;
1260	struct pcpu *pc;
1261
1262	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1263	    pm->pm_context[curcpu], pm->pm_tsb);
1264	KASSERT(pmap_resident_count(pm) == 0,
1265	    ("pmap_release: resident pages %ld != 0",
1266	    pmap_resident_count(pm)));
1267
1268	/*
1269	 * After the pmap was freed, it might be reallocated to a new process.
1270	 * When switching, this might lead us to wrongly assume that we need
1271	 * not switch contexts because old and new pmap pointer are equal.
1272	 * Therefore, make sure that this pmap is not referenced by any PCPU
1273	 * pointer any more.  This could happen in two cases:
1274	 * - A process that referenced the pmap is currently exiting on a CPU.
1275	 *   However, it is guaranteed to not switch in any more after setting
1276	 *   its state to PRS_ZOMBIE.
1277	 * - A process that referenced this pmap ran on a CPU, but we switched
1278	 *   to a kernel thread, leaving the pmap pointer unchanged.
1279	 */
1280	mtx_lock_spin(&sched_lock);
1281	SLIST_FOREACH(pc, &cpuhead, pc_allcpu)
1282		if (pc->pc_pmap == pm)
1283			pc->pc_pmap = NULL;
1284	mtx_unlock_spin(&sched_lock);
1285
1286	obj = pm->pm_tsb_obj;
1287	VM_OBJECT_LOCK(obj);
1288	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1289	while (!TAILQ_EMPTY(&obj->memq)) {
1290		m = TAILQ_FIRST(&obj->memq);
1291		m->md.pmap = NULL;
1292		m->wire_count--;
1293		atomic_subtract_int(&cnt.v_wire_count, 1);
1294		vm_page_free_zero(m);
1295	}
1296	VM_OBJECT_UNLOCK(obj);
1297	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1298	PMAP_LOCK_DESTROY(pm);
1299}
1300
1301/*
1302 * Grow the number of kernel page table entries.  Unneeded.
1303 */
1304void
1305pmap_growkernel(vm_offset_t addr)
1306{
1307
1308	panic("pmap_growkernel: can't grow kernel");
1309}
1310
1311int
1312pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1313    vm_offset_t va)
1314{
1315	vm_page_t m;
1316	u_long data;
1317
1318	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1319	data = atomic_readandclear_long(&tp->tte_data);
1320	if ((data & TD_FAKE) == 0) {
1321		m = PHYS_TO_VM_PAGE(TD_PA(data));
1322		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1323		if ((data & TD_WIRED) != 0)
1324			pm->pm_stats.wired_count--;
1325		if ((data & TD_PV) != 0) {
1326			if ((data & TD_W) != 0)
1327				vm_page_dirty(m);
1328			if ((data & TD_REF) != 0)
1329				vm_page_flag_set(m, PG_REFERENCED);
1330			if (TAILQ_EMPTY(&m->md.tte_list))
1331				vm_page_flag_clear(m, PG_WRITEABLE);
1332			pm->pm_stats.resident_count--;
1333		}
1334		pmap_cache_remove(m, va);
1335	}
1336	TTE_ZERO(tp);
1337	if (PMAP_REMOVE_DONE(pm))
1338		return (0);
1339	return (1);
1340}
1341
1342/*
1343 * Remove the given range of addresses from the specified map.
1344 */
1345void
1346pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1347{
1348	struct tte *tp;
1349	vm_offset_t va;
1350
1351	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1352	    pm->pm_context[curcpu], start, end);
1353	if (PMAP_REMOVE_DONE(pm))
1354		return;
1355	vm_page_lock_queues();
1356	PMAP_LOCK(pm);
1357	if (end - start > PMAP_TSB_THRESH) {
1358		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1359		tlb_context_demap(pm);
1360	} else {
1361		for (va = start; va < end; va += PAGE_SIZE)
1362			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1363			    !pmap_remove_tte(pm, NULL, tp, va))
1364				break;
1365		tlb_range_demap(pm, start, end - 1);
1366	}
1367	PMAP_UNLOCK(pm);
1368	vm_page_unlock_queues();
1369}
1370
1371void
1372pmap_remove_all(vm_page_t m)
1373{
1374	struct pmap *pm;
1375	struct tte *tpn;
1376	struct tte *tp;
1377	vm_offset_t va;
1378
1379	vm_page_lock_queues();
1380	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1381		tpn = TAILQ_NEXT(tp, tte_link);
1382		if ((tp->tte_data & TD_PV) == 0)
1383			continue;
1384		pm = TTE_GET_PMAP(tp);
1385		va = TTE_GET_VA(tp);
1386		PMAP_LOCK(pm);
1387		if ((tp->tte_data & TD_WIRED) != 0)
1388			pm->pm_stats.wired_count--;
1389		if ((tp->tte_data & TD_REF) != 0)
1390			vm_page_flag_set(m, PG_REFERENCED);
1391		if ((tp->tte_data & TD_W) != 0)
1392			vm_page_dirty(m);
1393		tp->tte_data &= ~TD_V;
1394		tlb_page_demap(pm, va);
1395		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1396		pm->pm_stats.resident_count--;
1397		pmap_cache_remove(m, va);
1398		TTE_ZERO(tp);
1399		PMAP_UNLOCK(pm);
1400	}
1401	vm_page_flag_clear(m, PG_WRITEABLE);
1402	vm_page_unlock_queues();
1403}
1404
1405int
1406pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1407    vm_offset_t va)
1408{
1409	u_long data;
1410	vm_page_t m;
1411
1412	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1413	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1414		m = PHYS_TO_VM_PAGE(TD_PA(data));
1415		vm_page_dirty(m);
1416	}
1417	return (1);
1418}
1419
1420/*
1421 * Set the physical protection on the specified range of this map as requested.
1422 */
1423void
1424pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1425{
1426	vm_offset_t va;
1427	struct tte *tp;
1428
1429	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1430	    pm->pm_context[curcpu], sva, eva, prot);
1431
1432	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1433		pmap_remove(pm, sva, eva);
1434		return;
1435	}
1436
1437	if (prot & VM_PROT_WRITE)
1438		return;
1439
1440	vm_page_lock_queues();
1441	PMAP_LOCK(pm);
1442	if (eva - sva > PMAP_TSB_THRESH) {
1443		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1444		tlb_context_demap(pm);
1445	} else {
1446		for (va = sva; va < eva; va += PAGE_SIZE)
1447			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1448				pmap_protect_tte(pm, NULL, tp, va);
1449		tlb_range_demap(pm, sva, eva - 1);
1450	}
1451	PMAP_UNLOCK(pm);
1452	vm_page_unlock_queues();
1453}
1454
1455/*
1456 * Map the given physical page at the specified virtual address in the
1457 * target pmap with the protection requested.  If specified the page
1458 * will be wired down.
1459 */
1460void
1461pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1462    vm_prot_t prot, boolean_t wired)
1463{
1464
1465	vm_page_lock_queues();
1466	PMAP_LOCK(pm);
1467	pmap_enter_locked(pm, va, m, prot, wired);
1468	vm_page_unlock_queues();
1469	PMAP_UNLOCK(pm);
1470}
1471
1472/*
1473 * Map the given physical page at the specified virtual address in the
1474 * target pmap with the protection requested.  If specified the page
1475 * will be wired down.
1476 *
1477 * The page queues and pmap must be locked.
1478 */
1479static void
1480pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1481    boolean_t wired)
1482{
1483	struct tte *tp;
1484	vm_paddr_t pa;
1485	u_long data;
1486	int i;
1487
1488	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1489	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1490	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1491	    (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object),
1492	    ("pmap_enter_locked: page %p is not busy", m));
1493	PMAP_STATS_INC(pmap_nenter);
1494	pa = VM_PAGE_TO_PHYS(m);
1495
1496	/*
1497	 * If this is a fake page from the device_pager, but it covers actual
1498	 * physical memory, convert to the real backing page.
1499	 */
1500	if ((m->flags & PG_FICTITIOUS) != 0) {
1501		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1502			if (pa >= phys_avail[i] && pa <= phys_avail[i + 1]) {
1503				m = PHYS_TO_VM_PAGE(pa);
1504				break;
1505			}
1506		}
1507	}
1508
1509	CTR6(KTR_PMAP,
1510	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1511	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1512
1513	/*
1514	 * If there is an existing mapping, and the physical address has not
1515	 * changed, must be protection or wiring change.
1516	 */
1517	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1518		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1519		PMAP_STATS_INC(pmap_nenter_update);
1520
1521		/*
1522		 * Wiring change, just update stats.
1523		 */
1524		if (wired) {
1525			if ((tp->tte_data & TD_WIRED) == 0) {
1526				tp->tte_data |= TD_WIRED;
1527				pm->pm_stats.wired_count++;
1528			}
1529		} else {
1530			if ((tp->tte_data & TD_WIRED) != 0) {
1531				tp->tte_data &= ~TD_WIRED;
1532				pm->pm_stats.wired_count--;
1533			}
1534		}
1535
1536		/*
1537		 * Save the old bits and clear the ones we're interested in.
1538		 */
1539		data = tp->tte_data;
1540		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1541
1542		/*
1543		 * If we're turning off write permissions, sense modify status.
1544		 */
1545		if ((prot & VM_PROT_WRITE) != 0) {
1546			tp->tte_data |= TD_SW;
1547			if (wired)
1548				tp->tte_data |= TD_W;
1549			if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
1550				vm_page_flag_set(m, PG_WRITEABLE);
1551		} else if ((data & TD_W) != 0)
1552			vm_page_dirty(m);
1553
1554		/*
1555		 * If we're turning on execute permissions, flush the icache.
1556		 */
1557		if ((prot & VM_PROT_EXECUTE) != 0) {
1558			if ((data & TD_EXEC) == 0)
1559				icache_page_inval(pa);
1560			tp->tte_data |= TD_EXEC;
1561		}
1562
1563		/*
1564		 * Delete the old mapping.
1565		 */
1566		tlb_page_demap(pm, TTE_GET_VA(tp));
1567	} else {
1568		/*
1569		 * If there is an existing mapping, but its for a different
1570		 * physical address, delete the old mapping.
1571		 */
1572		if (tp != NULL) {
1573			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1574			PMAP_STATS_INC(pmap_nenter_replace);
1575			pmap_remove_tte(pm, NULL, tp, va);
1576			tlb_page_demap(pm, va);
1577		} else {
1578			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1579			PMAP_STATS_INC(pmap_nenter_new);
1580		}
1581
1582		/*
1583		 * Now set up the data and install the new mapping.
1584		 */
1585		data = TD_V | TD_8K | TD_PA(pa);
1586		if (pm == kernel_pmap)
1587			data |= TD_P;
1588		if ((prot & VM_PROT_WRITE) != 0) {
1589			data |= TD_SW;
1590			if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
1591				vm_page_flag_set(m, PG_WRITEABLE);
1592		}
1593		if (prot & VM_PROT_EXECUTE) {
1594			data |= TD_EXEC;
1595			icache_page_inval(pa);
1596		}
1597
1598		/*
1599		 * If its wired update stats.  We also don't need reference or
1600		 * modify tracking for wired mappings, so set the bits now.
1601		 */
1602		if (wired) {
1603			pm->pm_stats.wired_count++;
1604			data |= TD_REF | TD_WIRED;
1605			if ((prot & VM_PROT_WRITE) != 0)
1606				data |= TD_W;
1607		}
1608
1609		tsb_tte_enter(pm, m, va, TS_8K, data);
1610	}
1611}
1612
1613/*
1614 * Maps a sequence of resident pages belonging to the same object.
1615 * The sequence begins with the given page m_start.  This page is
1616 * mapped at the given virtual address start.  Each subsequent page is
1617 * mapped at a virtual address that is offset from start by the same
1618 * amount as the page is offset from m_start within the object.  The
1619 * last page in the sequence is the page with the largest offset from
1620 * m_start that can be mapped at a virtual address less than the given
1621 * virtual address end.  Not every virtual page between start and end
1622 * is mapped; only those for which a resident page exists with the
1623 * corresponding offset from m_start are mapped.
1624 */
1625void
1626pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1627    vm_page_t m_start, vm_prot_t prot)
1628{
1629	vm_page_t m;
1630	vm_pindex_t diff, psize;
1631
1632	psize = atop(end - start);
1633	m = m_start;
1634	vm_page_lock_queues();
1635	PMAP_LOCK(pm);
1636	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1637		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1638		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1639		m = TAILQ_NEXT(m, listq);
1640	}
1641	vm_page_unlock_queues();
1642	PMAP_UNLOCK(pm);
1643}
1644
1645void
1646pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1647{
1648
1649	vm_page_lock_queues();
1650	PMAP_LOCK(pm);
1651	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1652	    FALSE);
1653	vm_page_unlock_queues();
1654	PMAP_UNLOCK(pm);
1655}
1656
1657void
1658pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1659    vm_pindex_t pindex, vm_size_t size)
1660{
1661
1662	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1663	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1664	    ("pmap_object_init_pt: non-device object"));
1665}
1666
1667/*
1668 * Change the wiring attribute for a map/virtual-address pair.
1669 * The mapping must already exist in the pmap.
1670 */
1671void
1672pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1673{
1674	struct tte *tp;
1675	u_long data;
1676
1677	PMAP_LOCK(pm);
1678	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1679		if (wired) {
1680			data = atomic_set_long(&tp->tte_data, TD_WIRED);
1681			if ((data & TD_WIRED) == 0)
1682				pm->pm_stats.wired_count++;
1683		} else {
1684			data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1685			if ((data & TD_WIRED) != 0)
1686				pm->pm_stats.wired_count--;
1687		}
1688	}
1689	PMAP_UNLOCK(pm);
1690}
1691
1692static int
1693pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1694    vm_offset_t va)
1695{
1696	vm_page_t m;
1697	u_long data;
1698
1699	if ((tp->tte_data & TD_FAKE) != 0)
1700		return (1);
1701	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1702		data = tp->tte_data &
1703		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1704		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1705		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1706	}
1707	return (1);
1708}
1709
1710void
1711pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1712    vm_size_t len, vm_offset_t src_addr)
1713{
1714	struct tte *tp;
1715	vm_offset_t va;
1716
1717	if (dst_addr != src_addr)
1718		return;
1719	vm_page_lock_queues();
1720	if (dst_pmap < src_pmap) {
1721		PMAP_LOCK(dst_pmap);
1722		PMAP_LOCK(src_pmap);
1723	} else {
1724		PMAP_LOCK(src_pmap);
1725		PMAP_LOCK(dst_pmap);
1726	}
1727	if (len > PMAP_TSB_THRESH) {
1728		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1729		    pmap_copy_tte);
1730		tlb_context_demap(dst_pmap);
1731	} else {
1732		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1733			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1734				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1735		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1736	}
1737	vm_page_unlock_queues();
1738	PMAP_UNLOCK(src_pmap);
1739	PMAP_UNLOCK(dst_pmap);
1740}
1741
1742void
1743pmap_zero_page(vm_page_t m)
1744{
1745	struct tte *tp;
1746	vm_offset_t va;
1747	vm_paddr_t pa;
1748
1749	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1750	    ("pmap_zero_page: fake page"));
1751	PMAP_STATS_INC(pmap_nzero_page);
1752	pa = VM_PAGE_TO_PHYS(m);
1753	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1754		PMAP_STATS_INC(pmap_nzero_page_c);
1755		va = TLB_PHYS_TO_DIRECT(pa);
1756		cpu_block_zero((void *)va, PAGE_SIZE);
1757	} else if (m->md.color == -1) {
1758		PMAP_STATS_INC(pmap_nzero_page_nc);
1759		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1760	} else {
1761		PMAP_STATS_INC(pmap_nzero_page_oc);
1762		PMAP_LOCK(kernel_pmap);
1763		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1764		tp = tsb_kvtotte(va);
1765		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1766		tp->tte_vpn = TV_VPN(va, TS_8K);
1767		cpu_block_zero((void *)va, PAGE_SIZE);
1768		tlb_page_demap(kernel_pmap, va);
1769		PMAP_UNLOCK(kernel_pmap);
1770	}
1771}
1772
1773void
1774pmap_zero_page_area(vm_page_t m, int off, int size)
1775{
1776	struct tte *tp;
1777	vm_offset_t va;
1778	vm_paddr_t pa;
1779
1780	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1781	    ("pmap_zero_page_area: fake page"));
1782	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1783	PMAP_STATS_INC(pmap_nzero_page_area);
1784	pa = VM_PAGE_TO_PHYS(m);
1785	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1786		PMAP_STATS_INC(pmap_nzero_page_area_c);
1787		va = TLB_PHYS_TO_DIRECT(pa);
1788		bzero((void *)(va + off), size);
1789	} else if (m->md.color == -1) {
1790		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1791		aszero(ASI_PHYS_USE_EC, pa + off, size);
1792	} else {
1793		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1794		PMAP_LOCK(kernel_pmap);
1795		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1796		tp = tsb_kvtotte(va);
1797		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1798		tp->tte_vpn = TV_VPN(va, TS_8K);
1799		bzero((void *)(va + off), size);
1800		tlb_page_demap(kernel_pmap, va);
1801		PMAP_UNLOCK(kernel_pmap);
1802	}
1803}
1804
1805void
1806pmap_zero_page_idle(vm_page_t m)
1807{
1808	struct tte *tp;
1809	vm_offset_t va;
1810	vm_paddr_t pa;
1811
1812	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1813	    ("pmap_zero_page_idle: fake page"));
1814	PMAP_STATS_INC(pmap_nzero_page_idle);
1815	pa = VM_PAGE_TO_PHYS(m);
1816	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1817		PMAP_STATS_INC(pmap_nzero_page_idle_c);
1818		va = TLB_PHYS_TO_DIRECT(pa);
1819		cpu_block_zero((void *)va, PAGE_SIZE);
1820	} else if (m->md.color == -1) {
1821		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1822		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1823	} else {
1824		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1825		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1826		tp = tsb_kvtotte(va);
1827		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1828		tp->tte_vpn = TV_VPN(va, TS_8K);
1829		cpu_block_zero((void *)va, PAGE_SIZE);
1830		tlb_page_demap(kernel_pmap, va);
1831	}
1832}
1833
1834void
1835pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1836{
1837	vm_offset_t vdst;
1838	vm_offset_t vsrc;
1839	vm_paddr_t pdst;
1840	vm_paddr_t psrc;
1841	struct tte *tp;
1842
1843	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1844	    ("pmap_copy_page: fake dst page"));
1845	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1846	    ("pmap_copy_page: fake src page"));
1847	PMAP_STATS_INC(pmap_ncopy_page);
1848	pdst = VM_PAGE_TO_PHYS(mdst);
1849	psrc = VM_PAGE_TO_PHYS(msrc);
1850	if (dcache_color_ignore != 0 ||
1851	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1852	    mdst->md.color == DCACHE_COLOR(pdst))) {
1853		PMAP_STATS_INC(pmap_ncopy_page_c);
1854		vdst = TLB_PHYS_TO_DIRECT(pdst);
1855		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1856		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1857	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1858		PMAP_STATS_INC(pmap_ncopy_page_nc);
1859		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1860	} else if (msrc->md.color == -1) {
1861		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1862			PMAP_STATS_INC(pmap_ncopy_page_dc);
1863			vdst = TLB_PHYS_TO_DIRECT(pdst);
1864			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1865			    PAGE_SIZE);
1866		} else {
1867			PMAP_STATS_INC(pmap_ncopy_page_doc);
1868			PMAP_LOCK(kernel_pmap);
1869			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1870			tp = tsb_kvtotte(vdst);
1871			tp->tte_data =
1872			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1873			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1874			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1875			    PAGE_SIZE);
1876			tlb_page_demap(kernel_pmap, vdst);
1877			PMAP_UNLOCK(kernel_pmap);
1878		}
1879	} else if (mdst->md.color == -1) {
1880		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1881			PMAP_STATS_INC(pmap_ncopy_page_sc);
1882			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1883			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1884			    PAGE_SIZE);
1885		} else {
1886			PMAP_STATS_INC(pmap_ncopy_page_soc);
1887			PMAP_LOCK(kernel_pmap);
1888			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1889			tp = tsb_kvtotte(vsrc);
1890			tp->tte_data =
1891			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1892			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1893			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1894			    PAGE_SIZE);
1895			tlb_page_demap(kernel_pmap, vsrc);
1896			PMAP_UNLOCK(kernel_pmap);
1897		}
1898	} else {
1899		PMAP_STATS_INC(pmap_ncopy_page_oc);
1900		PMAP_LOCK(kernel_pmap);
1901		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1902		tp = tsb_kvtotte(vdst);
1903		tp->tte_data =
1904		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1905		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1906		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1907		tp = tsb_kvtotte(vsrc);
1908		tp->tte_data =
1909		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1910		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1911		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1912		tlb_page_demap(kernel_pmap, vdst);
1913		tlb_page_demap(kernel_pmap, vsrc);
1914		PMAP_UNLOCK(kernel_pmap);
1915	}
1916}
1917
1918/*
1919 * Returns true if the pmap's pv is one of the first
1920 * 16 pvs linked to from this page.  This count may
1921 * be changed upwards or downwards in the future; it
1922 * is only necessary that true be returned for a small
1923 * subset of pmaps for proper page aging.
1924 */
1925boolean_t
1926pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1927{
1928	struct tte *tp;
1929	int loops;
1930	boolean_t rv;
1931
1932	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1933	    ("pmap_page_exists_quick: page %p is not managed", m));
1934	loops = 0;
1935	rv = FALSE;
1936	vm_page_lock_queues();
1937	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1938		if ((tp->tte_data & TD_PV) == 0)
1939			continue;
1940		if (TTE_GET_PMAP(tp) == pm) {
1941			rv = TRUE;
1942			break;
1943		}
1944		if (++loops >= 16)
1945			break;
1946	}
1947	vm_page_unlock_queues();
1948	return (rv);
1949}
1950
1951/*
1952 * Return the number of managed mappings to the given physical page
1953 * that are wired.
1954 */
1955int
1956pmap_page_wired_mappings(vm_page_t m)
1957{
1958	struct tte *tp;
1959	int count;
1960
1961	count = 0;
1962	if ((m->flags & PG_FICTITIOUS) != 0)
1963		return (count);
1964	vm_page_lock_queues();
1965	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1966		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1967			count++;
1968	vm_page_unlock_queues();
1969	return (count);
1970}
1971
1972/*
1973 * Remove all pages from specified address space, this aids process exit
1974 * speeds.  This is much faster than pmap_remove n the case of running down
1975 * an entire address space.  Only works for the current pmap.
1976 */
1977void
1978pmap_remove_pages(pmap_t pm)
1979{
1980
1981}
1982
1983/*
1984 * Returns TRUE if the given page has a managed mapping.
1985 */
1986boolean_t
1987pmap_page_is_mapped(vm_page_t m)
1988{
1989	struct tte *tp;
1990	boolean_t rv;
1991
1992	rv = FALSE;
1993	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1994		return (rv);
1995	vm_page_lock_queues();
1996	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1997		if ((tp->tte_data & TD_PV) != 0) {
1998			rv = TRUE;
1999			break;
2000		}
2001	vm_page_unlock_queues();
2002	return (rv);
2003}
2004
2005/*
2006 * Return a count of reference bits for a page, clearing those bits.
2007 * It is not necessary for every reference bit to be cleared, but it
2008 * is necessary that 0 only be returned when there are truly no
2009 * reference bits set.
2010 *
2011 * XXX: The exact number of bits to check and clear is a matter that
2012 * should be tested and standardized at some point in the future for
2013 * optimal aging of shared pages.
2014 */
2015int
2016pmap_ts_referenced(vm_page_t m)
2017{
2018	struct tte *tpf;
2019	struct tte *tpn;
2020	struct tte *tp;
2021	u_long data;
2022	int count;
2023
2024	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2025	    ("pmap_ts_referenced: page %p is not managed", m));
2026	count = 0;
2027	vm_page_lock_queues();
2028	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2029		tpf = tp;
2030		do {
2031			tpn = TAILQ_NEXT(tp, tte_link);
2032			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2033			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2034			if ((tp->tte_data & TD_PV) == 0)
2035				continue;
2036			data = atomic_clear_long(&tp->tte_data, TD_REF);
2037			if ((data & TD_REF) != 0 && ++count > 4)
2038				break;
2039		} while ((tp = tpn) != NULL && tp != tpf);
2040	}
2041	vm_page_unlock_queues();
2042	return (count);
2043}
2044
2045boolean_t
2046pmap_is_modified(vm_page_t m)
2047{
2048	struct tte *tp;
2049	boolean_t rv;
2050
2051	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2052	    ("pmap_is_modified: page %p is not managed", m));
2053	rv = FALSE;
2054
2055	/*
2056	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
2057	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
2058	 * is clear, no TTEs can have TD_W set.
2059	 */
2060	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2061	if ((m->oflags & VPO_BUSY) == 0 &&
2062	    (m->flags & PG_WRITEABLE) == 0)
2063		return (rv);
2064	vm_page_lock_queues();
2065	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2066		if ((tp->tte_data & TD_PV) == 0)
2067			continue;
2068		if ((tp->tte_data & TD_W) != 0) {
2069			rv = TRUE;
2070			break;
2071		}
2072	}
2073	vm_page_unlock_queues();
2074	return (rv);
2075}
2076
2077/*
2078 *	pmap_is_prefaultable:
2079 *
2080 *	Return whether or not the specified virtual address is elgible
2081 *	for prefault.
2082 */
2083boolean_t
2084pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2085{
2086	boolean_t rv;
2087
2088	PMAP_LOCK(pmap);
2089	rv = tsb_tte_lookup(pmap, addr) == NULL;
2090	PMAP_UNLOCK(pmap);
2091	return (rv);
2092}
2093
2094/*
2095 * Return whether or not the specified physical page was referenced
2096 * in any physical maps.
2097 */
2098boolean_t
2099pmap_is_referenced(vm_page_t m)
2100{
2101	struct tte *tp;
2102	boolean_t rv;
2103
2104	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2105	    ("pmap_is_referenced: page %p is not managed", m));
2106	rv = FALSE;
2107	vm_page_lock_queues();
2108	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2109		if ((tp->tte_data & TD_PV) == 0)
2110			continue;
2111		if ((tp->tte_data & TD_REF) != 0) {
2112			rv = TRUE;
2113			break;
2114		}
2115	}
2116	vm_page_unlock_queues();
2117	return (rv);
2118}
2119
2120void
2121pmap_clear_modify(vm_page_t m)
2122{
2123	struct tte *tp;
2124	u_long data;
2125
2126	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2127	    ("pmap_clear_modify: page %p is not managed", m));
2128	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2129	KASSERT((m->oflags & VPO_BUSY) == 0,
2130	    ("pmap_clear_modify: page %p is busy", m));
2131
2132	/*
2133	 * If the page is not PG_WRITEABLE, then no TTEs can have TD_W set.
2134	 * If the object containing the page is locked and the page is not
2135	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
2136	 */
2137	if ((m->flags & PG_WRITEABLE) == 0)
2138		return;
2139	vm_page_lock_queues();
2140	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2141		if ((tp->tte_data & TD_PV) == 0)
2142			continue;
2143		data = atomic_clear_long(&tp->tte_data, TD_W);
2144		if ((data & TD_W) != 0)
2145			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2146	}
2147	vm_page_unlock_queues();
2148}
2149
2150void
2151pmap_clear_reference(vm_page_t m)
2152{
2153	struct tte *tp;
2154	u_long data;
2155
2156	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2157	    ("pmap_clear_reference: page %p is not managed", m));
2158	vm_page_lock_queues();
2159	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2160		if ((tp->tte_data & TD_PV) == 0)
2161			continue;
2162		data = atomic_clear_long(&tp->tte_data, TD_REF);
2163		if ((data & TD_REF) != 0)
2164			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2165	}
2166	vm_page_unlock_queues();
2167}
2168
2169void
2170pmap_remove_write(vm_page_t m)
2171{
2172	struct tte *tp;
2173	u_long data;
2174
2175	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2176	    ("pmap_remove_write: page %p is not managed", m));
2177
2178	/*
2179	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
2180	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
2181	 * is clear, no page table entries need updating.
2182	 */
2183	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2184	if ((m->oflags & VPO_BUSY) == 0 &&
2185	    (m->flags & PG_WRITEABLE) == 0)
2186		return;
2187	vm_page_lock_queues();
2188	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2189		if ((tp->tte_data & TD_PV) == 0)
2190			continue;
2191		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2192		if ((data & TD_W) != 0) {
2193			vm_page_dirty(m);
2194			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2195		}
2196	}
2197	vm_page_flag_clear(m, PG_WRITEABLE);
2198	vm_page_unlock_queues();
2199}
2200
2201int
2202pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2203{
2204
2205	/* TODO; */
2206	return (0);
2207}
2208
2209/*
2210 * Activate a user pmap.  The pmap must be activated before its address space
2211 * can be accessed in any way.
2212 */
2213void
2214pmap_activate(struct thread *td)
2215{
2216	struct vmspace *vm;
2217	struct pmap *pm;
2218	int context;
2219
2220	vm = td->td_proc->p_vmspace;
2221	pm = vmspace_pmap(vm);
2222
2223	mtx_lock_spin(&sched_lock);
2224
2225	context = PCPU_GET(tlb_ctx);
2226	if (context == PCPU_GET(tlb_ctx_max)) {
2227		tlb_flush_user();
2228		context = PCPU_GET(tlb_ctx_min);
2229	}
2230	PCPU_SET(tlb_ctx, context + 1);
2231
2232	pm->pm_context[curcpu] = context;
2233	pm->pm_active |= PCPU_GET(cpumask);
2234	PCPU_SET(pmap, pm);
2235
2236	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2237	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2238	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2239	    TLB_CXR_PGSZ_MASK) | context);
2240	flush(KERNBASE);
2241
2242	mtx_unlock_spin(&sched_lock);
2243}
2244
2245void
2246pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2247{
2248
2249}
2250
2251/*
2252 * Increase the starting virtual address of the given mapping if a
2253 * different alignment might result in more superpage mappings.
2254 */
2255void
2256pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2257    vm_offset_t *addr, vm_size_t size)
2258{
2259
2260}
2261