1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1991 Regents of the University of California.
5 * All rights reserved.
6 * Copyright (c) 1994 John S. Dyson
7 * All rights reserved.
8 * Copyright (c) 1994 David Greenman
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
40 */
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD$");
44
45/*
46 * Manages physical address maps.
47 *
48 * Since the information managed by this module is also stored by the
49 * logical address mapping module, this module may throw away valid virtual
50 * to physical mappings at almost any time.  However, invalidations of
51 * mappings must be done as requested.
52 *
53 * In order to cope with hardware architectures which make virtual to
54 * physical map invalidates expensive, this module may delay invalidate
55 * reduced protection operations until such time as they are actually
56 * necessary.  This module is given full information as to which processors
57 * are currently using which maps, and to when physical maps must be made
58 * correct.
59 */
60
61#include "opt_kstack_pages.h"
62#include "opt_pmap.h"
63
64#include <sys/param.h>
65#include <sys/kernel.h>
66#include <sys/ktr.h>
67#include <sys/lock.h>
68#include <sys/msgbuf.h>
69#include <sys/mutex.h>
70#include <sys/proc.h>
71#include <sys/rwlock.h>
72#include <sys/smp.h>
73#include <sys/sysctl.h>
74#include <sys/systm.h>
75#include <sys/vmmeter.h>
76
77#include <dev/ofw/openfirm.h>
78
79#include <vm/vm.h>
80#include <vm/vm_param.h>
81#include <vm/vm_kern.h>
82#include <vm/vm_page.h>
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_extern.h>
86#include <vm/vm_pageout.h>
87#include <vm/vm_pager.h>
88#include <vm/vm_phys.h>
89
90#include <machine/cache.h>
91#include <machine/frame.h>
92#include <machine/instr.h>
93#include <machine/md_var.h>
94#include <machine/metadata.h>
95#include <machine/ofw_mem.h>
96#include <machine/smp.h>
97#include <machine/tlb.h>
98#include <machine/tte.h>
99#include <machine/tsb.h>
100#include <machine/ver.h>
101
102/*
103 * Map of physical memory reagions
104 */
105vm_paddr_t phys_avail[128];
106static struct ofw_mem_region mra[128];
107struct ofw_mem_region sparc64_memreg[128];
108int sparc64_nmemreg;
109static struct ofw_map translations[128];
110static int translations_size;
111
112static vm_offset_t pmap_idle_map;
113static vm_offset_t pmap_temp_map_1;
114static vm_offset_t pmap_temp_map_2;
115
116/*
117 * First and last available kernel virtual addresses
118 */
119vm_offset_t virtual_avail;
120vm_offset_t virtual_end;
121vm_offset_t kernel_vm_end;
122
123vm_offset_t vm_max_kernel_address;
124
125/*
126 * Kernel pmap
127 */
128struct pmap kernel_pmap_store;
129
130struct rwlock_padalign tte_list_global_lock;
131
132/*
133 * Allocate physical memory for use in pmap_bootstrap.
134 */
135static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
136
137static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
138static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
139static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
140    struct tte *tp, vm_offset_t va);
141static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp,
142    vm_offset_t va);
143static void pmap_init_qpages(void);
144
145/*
146 * Map the given physical page at the specified virtual address in the
147 * target pmap with the protection requested.  If specified the page
148 * will be wired down.
149 *
150 * The page queues and pmap must be locked.
151 */
152static int pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
153    vm_prot_t prot, u_int flags, int8_t psind);
154
155extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
156extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
157extern int tl1_dmmu_miss_patch_asi_1[];
158extern int tl1_dmmu_miss_patch_quad_ldd_1[];
159extern int tl1_dmmu_miss_patch_tsb_1[];
160extern int tl1_dmmu_miss_patch_tsb_2[];
161extern int tl1_dmmu_miss_patch_tsb_mask_1[];
162extern int tl1_dmmu_miss_patch_tsb_mask_2[];
163extern int tl1_dmmu_prot_patch_asi_1[];
164extern int tl1_dmmu_prot_patch_quad_ldd_1[];
165extern int tl1_dmmu_prot_patch_tsb_1[];
166extern int tl1_dmmu_prot_patch_tsb_2[];
167extern int tl1_dmmu_prot_patch_tsb_mask_1[];
168extern int tl1_dmmu_prot_patch_tsb_mask_2[];
169extern int tl1_immu_miss_patch_asi_1[];
170extern int tl1_immu_miss_patch_quad_ldd_1[];
171extern int tl1_immu_miss_patch_tsb_1[];
172extern int tl1_immu_miss_patch_tsb_2[];
173extern int tl1_immu_miss_patch_tsb_mask_1[];
174extern int tl1_immu_miss_patch_tsb_mask_2[];
175
176/*
177 * If user pmap is processed with pmap_remove and with pmap_remove and the
178 * resident count drops to 0, there are no more pages to remove, so we
179 * need not continue.
180 */
181#define	PMAP_REMOVE_DONE(pm) \
182	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
183
184/*
185 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
186 * and pmap_protect() instead of trying each virtual address.
187 */
188#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
189
190SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
191
192PMAP_STATS_VAR(pmap_nenter);
193PMAP_STATS_VAR(pmap_nenter_update);
194PMAP_STATS_VAR(pmap_nenter_replace);
195PMAP_STATS_VAR(pmap_nenter_new);
196PMAP_STATS_VAR(pmap_nkenter);
197PMAP_STATS_VAR(pmap_nkenter_oc);
198PMAP_STATS_VAR(pmap_nkenter_stupid);
199PMAP_STATS_VAR(pmap_nkremove);
200PMAP_STATS_VAR(pmap_nqenter);
201PMAP_STATS_VAR(pmap_nqremove);
202PMAP_STATS_VAR(pmap_ncache_enter);
203PMAP_STATS_VAR(pmap_ncache_enter_c);
204PMAP_STATS_VAR(pmap_ncache_enter_oc);
205PMAP_STATS_VAR(pmap_ncache_enter_cc);
206PMAP_STATS_VAR(pmap_ncache_enter_coc);
207PMAP_STATS_VAR(pmap_ncache_enter_nc);
208PMAP_STATS_VAR(pmap_ncache_enter_cnc);
209PMAP_STATS_VAR(pmap_ncache_remove);
210PMAP_STATS_VAR(pmap_ncache_remove_c);
211PMAP_STATS_VAR(pmap_ncache_remove_oc);
212PMAP_STATS_VAR(pmap_ncache_remove_cc);
213PMAP_STATS_VAR(pmap_ncache_remove_coc);
214PMAP_STATS_VAR(pmap_ncache_remove_nc);
215PMAP_STATS_VAR(pmap_nzero_page);
216PMAP_STATS_VAR(pmap_nzero_page_c);
217PMAP_STATS_VAR(pmap_nzero_page_oc);
218PMAP_STATS_VAR(pmap_nzero_page_nc);
219PMAP_STATS_VAR(pmap_nzero_page_area);
220PMAP_STATS_VAR(pmap_nzero_page_area_c);
221PMAP_STATS_VAR(pmap_nzero_page_area_oc);
222PMAP_STATS_VAR(pmap_nzero_page_area_nc);
223PMAP_STATS_VAR(pmap_ncopy_page);
224PMAP_STATS_VAR(pmap_ncopy_page_c);
225PMAP_STATS_VAR(pmap_ncopy_page_oc);
226PMAP_STATS_VAR(pmap_ncopy_page_nc);
227PMAP_STATS_VAR(pmap_ncopy_page_dc);
228PMAP_STATS_VAR(pmap_ncopy_page_doc);
229PMAP_STATS_VAR(pmap_ncopy_page_sc);
230PMAP_STATS_VAR(pmap_ncopy_page_soc);
231
232PMAP_STATS_VAR(pmap_nnew_thread);
233PMAP_STATS_VAR(pmap_nnew_thread_oc);
234
235static inline u_long dtlb_get_data(u_int tlb, u_int slot);
236
237/*
238 * Quick sort callout for comparing memory regions
239 */
240static int mr_cmp(const void *a, const void *b);
241static int om_cmp(const void *a, const void *b);
242
243static int
244mr_cmp(const void *a, const void *b)
245{
246	const struct ofw_mem_region *mra;
247	const struct ofw_mem_region *mrb;
248
249	mra = a;
250	mrb = b;
251	if (mra->mr_start < mrb->mr_start)
252		return (-1);
253	else if (mra->mr_start > mrb->mr_start)
254		return (1);
255	else
256		return (0);
257}
258
259static int
260om_cmp(const void *a, const void *b)
261{
262	const struct ofw_map *oma;
263	const struct ofw_map *omb;
264
265	oma = a;
266	omb = b;
267	if (oma->om_start < omb->om_start)
268		return (-1);
269	else if (oma->om_start > omb->om_start)
270		return (1);
271	else
272		return (0);
273}
274
275static inline u_long
276dtlb_get_data(u_int tlb, u_int slot)
277{
278	u_long data;
279	register_t s;
280
281	slot = TLB_DAR_SLOT(tlb, slot);
282	/*
283	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
284	 * work around errata of USIII and beyond.
285	 */
286	s = intr_disable();
287	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
288	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
289	intr_restore(s);
290	return (data);
291}
292
293/*
294 * Bootstrap the system enough to run with virtual memory.
295 */
296void
297pmap_bootstrap(u_int cpu_impl)
298{
299	struct pmap *pm;
300	struct tte *tp;
301	vm_offset_t off;
302	vm_offset_t va;
303	vm_paddr_t pa;
304	vm_size_t physsz;
305	vm_size_t virtsz;
306	u_long data;
307	u_long vpn;
308	phandle_t pmem;
309	phandle_t vmem;
310	u_int dtlb_slots_avail;
311	int i;
312	int j;
313	int sz;
314	uint32_t asi;
315	uint32_t colors;
316	uint32_t ldd;
317
318	/*
319	 * Set the kernel context.
320	 */
321	pmap_set_kctx();
322
323	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
324
325	/*
326	 * Find out what physical memory is available from the PROM and
327	 * initialize the phys_avail array.  This must be done before
328	 * pmap_bootstrap_alloc is called.
329	 */
330	if ((pmem = OF_finddevice("/memory")) == -1)
331		OF_panic("%s: finddevice /memory", __func__);
332	if ((sz = OF_getproplen(pmem, "available")) == -1)
333		OF_panic("%s: getproplen /memory/available", __func__);
334	if (sizeof(phys_avail) < sz)
335		OF_panic("%s: phys_avail too small", __func__);
336	if (sizeof(mra) < sz)
337		OF_panic("%s: mra too small", __func__);
338	bzero(mra, sz);
339	if (OF_getprop(pmem, "available", mra, sz) == -1)
340		OF_panic("%s: getprop /memory/available", __func__);
341	sz /= sizeof(*mra);
342#ifdef DIAGNOSTIC
343	OF_printf("pmap_bootstrap: physical memory\n");
344#endif
345	qsort(mra, sz, sizeof (*mra), mr_cmp);
346	physsz = 0;
347	getenv_quad("hw.physmem", &physmem);
348	physmem = btoc(physmem);
349	for (i = 0, j = 0; i < sz; i++, j += 2) {
350#ifdef DIAGNOSTIC
351		OF_printf("start=%#lx size=%#lx\n", mra[i].mr_start,
352		    mra[i].mr_size);
353#endif
354		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
355			if (btoc(physsz) < physmem) {
356				phys_avail[j] = mra[i].mr_start;
357				phys_avail[j + 1] = mra[i].mr_start +
358				    (ctob(physmem) - physsz);
359				physsz = ctob(physmem);
360			}
361			break;
362		}
363		phys_avail[j] = mra[i].mr_start;
364		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
365		physsz += mra[i].mr_size;
366	}
367	physmem = btoc(physsz);
368
369	/*
370	 * Calculate the size of kernel virtual memory, and the size and mask
371	 * for the kernel TSB based on the phsyical memory size but limited
372	 * by the amount of dTLB slots available for locked entries if we have
373	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
374	 * of the dt64 slots can hold locked entries but there is no large
375	 * dTLB for unlocked ones, we don't use more than half of it for the
376	 * TSB).
377	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
378	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
379	 * public documentation is available for these, the latter just might
380	 * not support it, yet.
381	 */
382	if (cpu_impl == CPU_IMPL_SPARC64V ||
383	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
384		tsb_kernel_ldd_phys = 1;
385		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
386		    (PAGE_SHIFT - TTE_SHIFT));
387	} else {
388		dtlb_slots_avail = 0;
389		for (i = 0; i < dtlb_slots; i++) {
390			data = dtlb_get_data(cpu_impl ==
391			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
392			    TLB_DAR_T32, i);
393			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
394				dtlb_slots_avail++;
395		}
396#ifdef SMP
397		dtlb_slots_avail -= PCPU_PAGES;
398#endif
399		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
400		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
401			dtlb_slots_avail /= 2;
402		virtsz = roundup(physsz, PAGE_SIZE_4M <<
403		    (PAGE_SHIFT - TTE_SHIFT));
404		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
405		    (PAGE_SHIFT - TTE_SHIFT));
406	}
407	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
408	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
409	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
410
411	/*
412	 * Allocate the kernel TSB and lock it in the TLB if necessary.
413	 */
414	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
415	if (pa & PAGE_MASK_4M)
416		OF_panic("%s: TSB unaligned", __func__);
417	tsb_kernel_phys = pa;
418	if (tsb_kernel_ldd_phys == 0) {
419		tsb_kernel =
420		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
421		pmap_map_tsb();
422		bzero(tsb_kernel, tsb_kernel_size);
423	} else {
424		tsb_kernel =
425		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
426		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
427	}
428
429	/*
430	 * Allocate and map the dynamic per-CPU area for the BSP.
431	 */
432	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
433	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
434
435	/*
436	 * Allocate and map the message buffer.
437	 */
438	pa = pmap_bootstrap_alloc(msgbufsize, colors);
439	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
440
441	/*
442	 * Patch the TSB addresses and mask as well as the ASIs used to load
443	 * it into the trap table.
444	 */
445
446#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
447	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
448	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
449	    EIF_F3_RS2(rs2))
450#define	OR_R_I_R(rd, imm13, rs1)					\
451	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
452	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
453#define	SETHI(rd, imm22)						\
454	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
455	    EIF_IMM((imm22) >> 10, 22))
456#define	WR_R_I(rd, imm13, rs1)						\
457	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
458	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459
460#define	PATCH_ASI(addr, asi) do {					\
461	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
462	    IF_F3_RS1(addr[0])))					\
463		OF_panic("%s: patched instructions have changed",	\
464		    __func__);						\
465	addr[0] |= EIF_IMM((asi), 13);					\
466	flush(addr);							\
467} while (0)
468
469#define	PATCH_LDD(addr, asi) do {					\
470	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
471	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
472		OF_panic("%s: patched instructions have changed",	\
473		    __func__);						\
474	addr[0] |= EIF_F3_IMM_ASI(asi);					\
475	flush(addr);							\
476} while (0)
477
478#define	PATCH_TSB(addr, val) do {					\
479	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
480	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
481	    IF_F3_RS1(addr[1]))	||					\
482	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
483		OF_panic("%s: patched instructions have changed",	\
484		    __func__);						\
485	addr[0] |= EIF_IMM((val) >> 42, 22);				\
486	addr[1] |= EIF_IMM((val) >> 32, 10);				\
487	addr[3] |= EIF_IMM((val) >> 10, 22);				\
488	flush(addr);							\
489	flush(addr + 1);						\
490	flush(addr + 3);						\
491} while (0)
492
493#define	PATCH_TSB_MASK(addr, val) do {					\
494	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
495	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
496	    IF_F3_RS1(addr[1])))					\
497		OF_panic("%s: patched instructions have changed",	\
498		    __func__);						\
499	addr[0] |= EIF_IMM((val) >> 10, 22);				\
500	addr[1] |= EIF_IMM((val), 10);					\
501	flush(addr);							\
502	flush(addr + 1);						\
503} while (0)
504
505	if (tsb_kernel_ldd_phys == 0) {
506		asi = ASI_N;
507		ldd = ASI_NUCLEUS_QUAD_LDD;
508		off = (vm_offset_t)tsb_kernel;
509	} else {
510		asi = ASI_PHYS_USE_EC;
511		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
512		off = (vm_offset_t)tsb_kernel_phys;
513	}
514	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
515	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
516	    tsb_kernel_phys + tsb_kernel_size - 1);
517	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
518	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
519	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
520	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
521	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
522	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
523	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
524	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
525	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
526	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
527	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
528	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
529	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
530	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
531	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
532	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
533	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
534	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
535
536	/*
537	 * Enter fake 8k pages for the 4MB kernel pages, so that
538	 * pmap_kextract() will work for them.
539	 */
540	for (i = 0; i < kernel_tlb_slots; i++) {
541		pa = kernel_tlbs[i].te_pa;
542		va = kernel_tlbs[i].te_va;
543		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
544			tp = tsb_kvtotte(va + off);
545			vpn = TV_VPN(va + off, TS_8K);
546			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
547			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
548			pmap_bootstrap_set_tte(tp, vpn, data);
549		}
550	}
551
552	/*
553	 * Set the start and end of KVA.  The kernel is loaded starting
554	 * at the first available 4MB super page, so we advance to the
555	 * end of the last one used for it.
556	 */
557	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
558	virtual_end = vm_max_kernel_address;
559	kernel_vm_end = vm_max_kernel_address;
560
561	/*
562	 * Allocate kva space for temporary mappings.
563	 */
564	pmap_idle_map = virtual_avail;
565	virtual_avail += PAGE_SIZE * colors;
566	pmap_temp_map_1 = virtual_avail;
567	virtual_avail += PAGE_SIZE * colors;
568	pmap_temp_map_2 = virtual_avail;
569	virtual_avail += PAGE_SIZE * colors;
570
571	/*
572	 * Allocate a kernel stack with guard page for thread0 and map it
573	 * into the kernel TSB.  We must ensure that the virtual address is
574	 * colored properly for corresponding CPUs, since we're allocating
575	 * from phys_avail so the memory won't have an associated vm_page_t.
576	 */
577	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
578	kstack0_phys = pa;
579	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
580	kstack0 = virtual_avail;
581	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
582	if (dcache_color_ignore == 0)
583		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
584		    ("pmap_bootstrap: kstack0 miscolored"));
585	for (i = 0; i < KSTACK_PAGES; i++) {
586		pa = kstack0_phys + i * PAGE_SIZE;
587		va = kstack0 + i * PAGE_SIZE;
588		tp = tsb_kvtotte(va);
589		vpn = TV_VPN(va, TS_8K);
590		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
591		    TD_CV | TD_P | TD_W;
592		pmap_bootstrap_set_tte(tp, vpn, data);
593	}
594
595	/*
596	 * Calculate the last available physical address.
597	 */
598	for (i = 0; phys_avail[i + 2] != 0; i += 2)
599		;
600	Maxmem = sparc64_btop(phys_avail[i + 1]);
601
602	/*
603	 * Add the PROM mappings to the kernel TSB.
604	 */
605	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
606		OF_panic("%s: finddevice /virtual-memory", __func__);
607	if ((sz = OF_getproplen(vmem, "translations")) == -1)
608		OF_panic("%s: getproplen translations", __func__);
609	if (sizeof(translations) < sz)
610		OF_panic("%s: translations too small", __func__);
611	bzero(translations, sz);
612	if (OF_getprop(vmem, "translations", translations, sz) == -1)
613		OF_panic("%s: getprop /virtual-memory/translations",
614		    __func__);
615	sz /= sizeof(*translations);
616	translations_size = sz;
617#ifdef DIAGNOSTIC
618	OF_printf("pmap_bootstrap: translations\n");
619#endif
620	qsort(translations, sz, sizeof (*translations), om_cmp);
621	for (i = 0; i < sz; i++) {
622#ifdef DIAGNOSTIC
623		OF_printf("translation: start=%#lx size=%#lx tte=%#lx\n",
624		    translations[i].om_start, translations[i].om_size,
625		    translations[i].om_tte);
626#endif
627		if ((translations[i].om_tte & TD_V) == 0)
628			continue;
629		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
630		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
631			continue;
632		for (off = 0; off < translations[i].om_size;
633		    off += PAGE_SIZE) {
634			va = translations[i].om_start + off;
635			tp = tsb_kvtotte(va);
636			vpn = TV_VPN(va, TS_8K);
637			data = ((translations[i].om_tte &
638			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
639			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
640			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
641			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
642			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
643			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
644			    off;
645			pmap_bootstrap_set_tte(tp, vpn, data);
646		}
647	}
648
649	/*
650	 * Get the available physical memory ranges from /memory/reg.  These
651	 * are only used for kernel dumps, but it may not be wise to do PROM
652	 * calls in that situation.
653	 */
654	if ((sz = OF_getproplen(pmem, "reg")) == -1)
655		OF_panic("%s: getproplen /memory/reg", __func__);
656	if (sizeof(sparc64_memreg) < sz)
657		OF_panic("%s: sparc64_memreg too small", __func__);
658	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
659		OF_panic("%s: getprop /memory/reg", __func__);
660	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
661
662	/*
663	 * Initialize the kernel pmap (which is statically allocated).
664	 */
665	pm = kernel_pmap;
666	PMAP_LOCK_INIT(pm);
667	for (i = 0; i < MAXCPU; i++)
668		pm->pm_context[i] = TLB_CTX_KERNEL;
669	CPU_FILL(&pm->pm_active);
670
671	/*
672	 * Initialize the global tte list lock, which is more commonly
673	 * known as the pmap pv global lock.
674	 */
675	rw_init(&tte_list_global_lock, "pmap pv global");
676
677	/*
678	 * Flush all non-locked TLB entries possibly left over by the
679	 * firmware.
680	 */
681	tlb_flush_nonlocked();
682}
683
684static void
685pmap_init_qpages(void)
686{
687	struct pcpu *pc;
688	int i;
689
690	if (dcache_color_ignore != 0)
691		return;
692
693	CPU_FOREACH(i) {
694		pc = pcpu_find(i);
695		pc->pc_qmap_addr = kva_alloc(PAGE_SIZE * DCACHE_COLORS);
696		if (pc->pc_qmap_addr == 0)
697			panic("pmap_init_qpages: unable to allocate KVA");
698	}
699}
700
701SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_qpages, NULL);
702
703/*
704 * Map the 4MB kernel TSB pages.
705 */
706void
707pmap_map_tsb(void)
708{
709	vm_offset_t va;
710	vm_paddr_t pa;
711	u_long data;
712	int i;
713
714	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
715		va = (vm_offset_t)tsb_kernel + i;
716		pa = tsb_kernel_phys + i;
717		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
718		    TD_P | TD_W;
719		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
720		    TLB_TAR_CTX(TLB_CTX_KERNEL));
721		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
722	}
723}
724
725/*
726 * Set the secondary context to be the kernel context (needed for FP block
727 * operations in the kernel).
728 */
729void
730pmap_set_kctx(void)
731{
732
733	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
734	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
735	flush(KERNBASE);
736}
737
738/*
739 * Allocate a physical page of memory directly from the phys_avail map.
740 * Can only be called from pmap_bootstrap before avail start and end are
741 * calculated.
742 */
743static vm_paddr_t
744pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
745{
746	vm_paddr_t pa;
747	int i;
748
749	size = roundup(size, PAGE_SIZE * colors);
750	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
751		if (phys_avail[i + 1] - phys_avail[i] < size)
752			continue;
753		pa = phys_avail[i];
754		phys_avail[i] += size;
755		return (pa);
756	}
757	OF_panic("%s: no suitable region found", __func__);
758}
759
760/*
761 * Set a TTE.  This function is intended as a helper when tsb_kernel is
762 * direct-mapped but we haven't taken over the trap table, yet, as it's the
763 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
764 * the kernel TSB.
765 */
766void
767pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
768{
769
770	if (tsb_kernel_ldd_phys == 0) {
771		tp->tte_vpn = vpn;
772		tp->tte_data = data;
773	} else {
774		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
775		    ASI_PHYS_USE_EC, vpn);
776		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
777		    ASI_PHYS_USE_EC, data);
778	}
779}
780
781/*
782 * Initialize a vm_page's machine-dependent fields.
783 */
784void
785pmap_page_init(vm_page_t m)
786{
787
788	TAILQ_INIT(&m->md.tte_list);
789	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
790	m->md.pmap = NULL;
791}
792
793/*
794 * Initialize the pmap module.
795 */
796void
797pmap_init(void)
798{
799	vm_offset_t addr;
800	vm_size_t size;
801	int result;
802	int i;
803
804	for (i = 0; i < translations_size; i++) {
805		addr = translations[i].om_start;
806		size = translations[i].om_size;
807		if ((translations[i].om_tte & TD_V) == 0)
808			continue;
809		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
810			continue;
811		result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
812		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
813		if (result != KERN_SUCCESS || addr != translations[i].om_start)
814			panic("pmap_init: vm_map_find");
815	}
816}
817
818/*
819 * Extract the physical page address associated with the given
820 * map/virtual_address pair.
821 */
822vm_paddr_t
823pmap_extract(pmap_t pm, vm_offset_t va)
824{
825	struct tte *tp;
826	vm_paddr_t pa;
827
828	if (pm == kernel_pmap)
829		return (pmap_kextract(va));
830	PMAP_LOCK(pm);
831	tp = tsb_tte_lookup(pm, va);
832	if (tp == NULL)
833		pa = 0;
834	else
835		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
836	PMAP_UNLOCK(pm);
837	return (pa);
838}
839
840/*
841 * Atomically extract and hold the physical page with the given
842 * pmap and virtual address pair if that mapping permits the given
843 * protection.
844 */
845vm_page_t
846pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
847{
848	struct tte *tp;
849	vm_page_t m;
850	vm_paddr_t pa;
851
852	m = NULL;
853	pa = 0;
854	PMAP_LOCK(pm);
855retry:
856	if (pm == kernel_pmap) {
857		if (va >= VM_MIN_DIRECT_ADDRESS) {
858			tp = NULL;
859			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
860			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
861			    &pa);
862			vm_page_hold(m);
863		} else {
864			tp = tsb_kvtotte(va);
865			if ((tp->tte_data & TD_V) == 0)
866				tp = NULL;
867		}
868	} else
869		tp = tsb_tte_lookup(pm, va);
870	if (tp != NULL && ((tp->tte_data & TD_SW) ||
871	    (prot & VM_PROT_WRITE) == 0)) {
872		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
873			goto retry;
874		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
875		vm_page_hold(m);
876	}
877	PA_UNLOCK_COND(pa);
878	PMAP_UNLOCK(pm);
879	return (m);
880}
881
882/*
883 * Extract the physical page address associated with the given kernel virtual
884 * address.
885 */
886vm_paddr_t
887pmap_kextract(vm_offset_t va)
888{
889	struct tte *tp;
890
891	if (va >= VM_MIN_DIRECT_ADDRESS)
892		return (TLB_DIRECT_TO_PHYS(va));
893	tp = tsb_kvtotte(va);
894	if ((tp->tte_data & TD_V) == 0)
895		return (0);
896	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
897}
898
899int
900pmap_cache_enter(vm_page_t m, vm_offset_t va)
901{
902	struct tte *tp;
903	int color;
904
905	rw_assert(&tte_list_global_lock, RA_WLOCKED);
906	KASSERT((m->flags & PG_FICTITIOUS) == 0,
907	    ("pmap_cache_enter: fake page"));
908	PMAP_STATS_INC(pmap_ncache_enter);
909
910	if (dcache_color_ignore != 0)
911		return (1);
912
913	/*
914	 * Find the color for this virtual address and note the added mapping.
915	 */
916	color = DCACHE_COLOR(va);
917	m->md.colors[color]++;
918
919	/*
920	 * If all existing mappings have the same color, the mapping is
921	 * cacheable.
922	 */
923	if (m->md.color == color) {
924		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
925		    ("pmap_cache_enter: cacheable, mappings of other color"));
926		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
927			PMAP_STATS_INC(pmap_ncache_enter_c);
928		else
929			PMAP_STATS_INC(pmap_ncache_enter_oc);
930		return (1);
931	}
932
933	/*
934	 * If there are no mappings of the other color, and the page still has
935	 * the wrong color, this must be a new mapping.  Change the color to
936	 * match the new mapping, which is cacheable.  We must flush the page
937	 * from the cache now.
938	 */
939	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
940		KASSERT(m->md.colors[color] == 1,
941		    ("pmap_cache_enter: changing color, not new mapping"));
942		dcache_page_inval(VM_PAGE_TO_PHYS(m));
943		m->md.color = color;
944		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
945			PMAP_STATS_INC(pmap_ncache_enter_cc);
946		else
947			PMAP_STATS_INC(pmap_ncache_enter_coc);
948		return (1);
949	}
950
951	/*
952	 * If the mapping is already non-cacheable, just return.
953	 */
954	if (m->md.color == -1) {
955		PMAP_STATS_INC(pmap_ncache_enter_nc);
956		return (0);
957	}
958
959	PMAP_STATS_INC(pmap_ncache_enter_cnc);
960
961	/*
962	 * Mark all mappings as uncacheable, flush any lines with the other
963	 * color out of the dcache, and set the color to none (-1).
964	 */
965	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
966		atomic_clear_long(&tp->tte_data, TD_CV);
967		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
968	}
969	dcache_page_inval(VM_PAGE_TO_PHYS(m));
970	m->md.color = -1;
971	return (0);
972}
973
974static void
975pmap_cache_remove(vm_page_t m, vm_offset_t va)
976{
977	struct tte *tp;
978	int color;
979
980	rw_assert(&tte_list_global_lock, RA_WLOCKED);
981	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
982	    m->md.colors[DCACHE_COLOR(va)]);
983	KASSERT((m->flags & PG_FICTITIOUS) == 0,
984	    ("pmap_cache_remove: fake page"));
985	PMAP_STATS_INC(pmap_ncache_remove);
986
987	if (dcache_color_ignore != 0)
988		return;
989
990	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
991	    ("pmap_cache_remove: no mappings %d <= 0",
992	    m->md.colors[DCACHE_COLOR(va)]));
993
994	/*
995	 * Find the color for this virtual address and note the removal of
996	 * the mapping.
997	 */
998	color = DCACHE_COLOR(va);
999	m->md.colors[color]--;
1000
1001	/*
1002	 * If the page is cacheable, just return and keep the same color, even
1003	 * if there are no longer any mappings.
1004	 */
1005	if (m->md.color != -1) {
1006		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1007			PMAP_STATS_INC(pmap_ncache_remove_c);
1008		else
1009			PMAP_STATS_INC(pmap_ncache_remove_oc);
1010		return;
1011	}
1012
1013	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
1014	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
1015
1016	/*
1017	 * If the page is not cacheable (color is -1), and the number of
1018	 * mappings for this color is not zero, just return.  There are
1019	 * mappings of the other color still, so remain non-cacheable.
1020	 */
1021	if (m->md.colors[color] != 0) {
1022		PMAP_STATS_INC(pmap_ncache_remove_nc);
1023		return;
1024	}
1025
1026	/*
1027	 * The number of mappings for this color is now zero.  Recache the
1028	 * other colored mappings, and change the page color to the other
1029	 * color.  There should be no lines in the data cache for this page,
1030	 * so flushing should not be needed.
1031	 */
1032	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1033		atomic_set_long(&tp->tte_data, TD_CV);
1034		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1035	}
1036	m->md.color = DCACHE_OTHER_COLOR(color);
1037
1038	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1039		PMAP_STATS_INC(pmap_ncache_remove_cc);
1040	else
1041		PMAP_STATS_INC(pmap_ncache_remove_coc);
1042}
1043
1044/*
1045 * Map a wired page into kernel virtual address space.
1046 */
1047void
1048pmap_kenter(vm_offset_t va, vm_page_t m)
1049{
1050	vm_offset_t ova;
1051	struct tte *tp;
1052	vm_page_t om;
1053	u_long data;
1054
1055	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1056	PMAP_STATS_INC(pmap_nkenter);
1057	tp = tsb_kvtotte(va);
1058	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1059	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1060	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1061		CTR5(KTR_SPARE2,
1062	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1063		    va, VM_PAGE_TO_PHYS(m), m->object,
1064		    m->object ? m->object->type : -1,
1065		    m->pindex);
1066		PMAP_STATS_INC(pmap_nkenter_oc);
1067	}
1068	if ((tp->tte_data & TD_V) != 0) {
1069		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1070		ova = TTE_GET_VA(tp);
1071		if (m == om && va == ova) {
1072			PMAP_STATS_INC(pmap_nkenter_stupid);
1073			return;
1074		}
1075		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1076		pmap_cache_remove(om, ova);
1077		if (va != ova)
1078			tlb_page_demap(kernel_pmap, ova);
1079	}
1080	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1081	    TD_P | TD_W;
1082	if (pmap_cache_enter(m, va) != 0)
1083		data |= TD_CV;
1084	tp->tte_vpn = TV_VPN(va, TS_8K);
1085	tp->tte_data = data;
1086	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1087}
1088
1089/*
1090 * Map a wired page into kernel virtual address space.  This additionally
1091 * takes a flag argument which is or'ed to the TTE data.  This is used by
1092 * sparc64_bus_mem_map().
1093 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1094 * to flush entries that might still be in the cache, if applicable.
1095 */
1096void
1097pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1098{
1099	struct tte *tp;
1100
1101	tp = tsb_kvtotte(va);
1102	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1103	    va, pa, tp, tp->tte_data);
1104	tp->tte_vpn = TV_VPN(va, TS_8K);
1105	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1106}
1107
1108/*
1109 * Remove a wired page from kernel virtual address space.
1110 */
1111void
1112pmap_kremove(vm_offset_t va)
1113{
1114	struct tte *tp;
1115	vm_page_t m;
1116
1117	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1118	PMAP_STATS_INC(pmap_nkremove);
1119	tp = tsb_kvtotte(va);
1120	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1121	    tp->tte_data);
1122	if ((tp->tte_data & TD_V) == 0)
1123		return;
1124	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1125	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1126	pmap_cache_remove(m, va);
1127	TTE_ZERO(tp);
1128}
1129
1130/*
1131 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1132 */
1133void
1134pmap_kremove_flags(vm_offset_t va)
1135{
1136	struct tte *tp;
1137
1138	tp = tsb_kvtotte(va);
1139	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1140	    tp->tte_data);
1141	TTE_ZERO(tp);
1142}
1143
1144/*
1145 * Map a range of physical addresses into kernel virtual address space.
1146 *
1147 * The value passed in *virt is a suggested virtual address for the mapping.
1148 * Architectures which can support a direct-mapped physical to virtual region
1149 * can return the appropriate address within that region, leaving '*virt'
1150 * unchanged.
1151 */
1152vm_offset_t
1153pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1154{
1155
1156	return (TLB_PHYS_TO_DIRECT(start));
1157}
1158
1159/*
1160 * Map a list of wired pages into kernel virtual address space.  This is
1161 * intended for temporary mappings which do not need page modification or
1162 * references recorded.  Existing mappings in the region are overwritten.
1163 */
1164void
1165pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1166{
1167	vm_offset_t va;
1168
1169	PMAP_STATS_INC(pmap_nqenter);
1170	va = sva;
1171	rw_wlock(&tte_list_global_lock);
1172	while (count-- > 0) {
1173		pmap_kenter(va, *m);
1174		va += PAGE_SIZE;
1175		m++;
1176	}
1177	rw_wunlock(&tte_list_global_lock);
1178	tlb_range_demap(kernel_pmap, sva, va);
1179}
1180
1181/*
1182 * Remove page mappings from kernel virtual address space.  Intended for
1183 * temporary mappings entered by pmap_qenter.
1184 */
1185void
1186pmap_qremove(vm_offset_t sva, int count)
1187{
1188	vm_offset_t va;
1189
1190	PMAP_STATS_INC(pmap_nqremove);
1191	va = sva;
1192	rw_wlock(&tte_list_global_lock);
1193	while (count-- > 0) {
1194		pmap_kremove(va);
1195		va += PAGE_SIZE;
1196	}
1197	rw_wunlock(&tte_list_global_lock);
1198	tlb_range_demap(kernel_pmap, sva, va);
1199}
1200
1201/*
1202 * Initialize the pmap associated with process 0.
1203 */
1204void
1205pmap_pinit0(pmap_t pm)
1206{
1207	int i;
1208
1209	PMAP_LOCK_INIT(pm);
1210	for (i = 0; i < MAXCPU; i++)
1211		pm->pm_context[i] = TLB_CTX_KERNEL;
1212	CPU_ZERO(&pm->pm_active);
1213	pm->pm_tsb = NULL;
1214	pm->pm_tsb_obj = NULL;
1215	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1216}
1217
1218/*
1219 * Initialize a preallocated and zeroed pmap structure, such as one in a
1220 * vmspace structure.
1221 */
1222int
1223pmap_pinit(pmap_t pm)
1224{
1225	vm_page_t ma[TSB_PAGES];
1226	int i;
1227
1228	/*
1229	 * Allocate KVA space for the TSB.
1230	 */
1231	if (pm->pm_tsb == NULL) {
1232		pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
1233		if (pm->pm_tsb == NULL)
1234			return (0);
1235		}
1236
1237	/*
1238	 * Allocate an object for it.
1239	 */
1240	if (pm->pm_tsb_obj == NULL)
1241		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1242
1243	for (i = 0; i < MAXCPU; i++)
1244		pm->pm_context[i] = -1;
1245	CPU_ZERO(&pm->pm_active);
1246
1247	VM_OBJECT_WLOCK(pm->pm_tsb_obj);
1248	(void)vm_page_grab_pages(pm->pm_tsb_obj, 0, VM_ALLOC_NORMAL |
1249	    VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO, ma, TSB_PAGES);
1250	VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
1251	for (i = 0; i < TSB_PAGES; i++)
1252		ma[i]->md.pmap = pm;
1253	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1254
1255	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1256	return (1);
1257}
1258
1259/*
1260 * Release any resources held by the given physical map.
1261 * Called when a pmap initialized by pmap_pinit is being released.
1262 * Should only be called if the map contains no valid mappings.
1263 */
1264void
1265pmap_release(pmap_t pm)
1266{
1267	vm_object_t obj;
1268	vm_page_t m;
1269#ifdef SMP
1270	struct pcpu *pc;
1271#endif
1272
1273	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1274	    pm->pm_context[curcpu], pm->pm_tsb);
1275	KASSERT(pmap_resident_count(pm) == 0,
1276	    ("pmap_release: resident pages %ld != 0",
1277	    pmap_resident_count(pm)));
1278
1279	/*
1280	 * After the pmap was freed, it might be reallocated to a new process.
1281	 * When switching, this might lead us to wrongly assume that we need
1282	 * not switch contexts because old and new pmap pointer are equal.
1283	 * Therefore, make sure that this pmap is not referenced by any PCPU
1284	 * pointer any more.  This could happen in two cases:
1285	 * - A process that referenced the pmap is currently exiting on a CPU.
1286	 *   However, it is guaranteed to not switch in any more after setting
1287	 *   its state to PRS_ZOMBIE.
1288	 * - A process that referenced this pmap ran on a CPU, but we switched
1289	 *   to a kernel thread, leaving the pmap pointer unchanged.
1290	 */
1291#ifdef SMP
1292	sched_pin();
1293	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1294		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1295		    (uintptr_t)pm, (uintptr_t)NULL);
1296	sched_unpin();
1297#else
1298	critical_enter();
1299	if (PCPU_GET(pmap) == pm)
1300		PCPU_SET(pmap, NULL);
1301	critical_exit();
1302#endif
1303
1304	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1305	obj = pm->pm_tsb_obj;
1306	VM_OBJECT_WLOCK(obj);
1307	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1308	while (!TAILQ_EMPTY(&obj->memq)) {
1309		m = TAILQ_FIRST(&obj->memq);
1310		m->md.pmap = NULL;
1311		vm_page_unwire_noq(m);
1312		vm_page_free_zero(m);
1313	}
1314	VM_OBJECT_WUNLOCK(obj);
1315}
1316
1317/*
1318 * Grow the number of kernel page table entries.  Unneeded.
1319 */
1320void
1321pmap_growkernel(vm_offset_t addr)
1322{
1323
1324	panic("pmap_growkernel: can't grow kernel");
1325}
1326
1327int
1328pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1329    vm_offset_t va)
1330{
1331	vm_page_t m;
1332	u_long data;
1333
1334	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1335	data = atomic_readandclear_long(&tp->tte_data);
1336	if ((data & TD_FAKE) == 0) {
1337		m = PHYS_TO_VM_PAGE(TD_PA(data));
1338		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1339		if ((data & TD_WIRED) != 0)
1340			pm->pm_stats.wired_count--;
1341		if ((data & TD_PV) != 0) {
1342			if ((data & TD_W) != 0)
1343				vm_page_dirty(m);
1344			if ((data & TD_REF) != 0)
1345				vm_page_aflag_set(m, PGA_REFERENCED);
1346			if (TAILQ_EMPTY(&m->md.tte_list))
1347				vm_page_aflag_clear(m, PGA_WRITEABLE);
1348			pm->pm_stats.resident_count--;
1349		}
1350		pmap_cache_remove(m, va);
1351	}
1352	TTE_ZERO(tp);
1353	if (PMAP_REMOVE_DONE(pm))
1354		return (0);
1355	return (1);
1356}
1357
1358/*
1359 * Remove the given range of addresses from the specified map.
1360 */
1361void
1362pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1363{
1364	struct tte *tp;
1365	vm_offset_t va;
1366
1367	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1368	    pm->pm_context[curcpu], start, end);
1369	if (PMAP_REMOVE_DONE(pm))
1370		return;
1371	rw_wlock(&tte_list_global_lock);
1372	PMAP_LOCK(pm);
1373	if (end - start > PMAP_TSB_THRESH) {
1374		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1375		tlb_context_demap(pm);
1376	} else {
1377		for (va = start; va < end; va += PAGE_SIZE)
1378			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1379			    !pmap_remove_tte(pm, NULL, tp, va))
1380				break;
1381		tlb_range_demap(pm, start, end - 1);
1382	}
1383	PMAP_UNLOCK(pm);
1384	rw_wunlock(&tte_list_global_lock);
1385}
1386
1387void
1388pmap_remove_all(vm_page_t m)
1389{
1390	struct pmap *pm;
1391	struct tte *tpn;
1392	struct tte *tp;
1393	vm_offset_t va;
1394
1395	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1396	    ("pmap_remove_all: page %p is not managed", m));
1397	rw_wlock(&tte_list_global_lock);
1398	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1399		tpn = TAILQ_NEXT(tp, tte_link);
1400		if ((tp->tte_data & TD_PV) == 0)
1401			continue;
1402		pm = TTE_GET_PMAP(tp);
1403		va = TTE_GET_VA(tp);
1404		PMAP_LOCK(pm);
1405		if ((tp->tte_data & TD_WIRED) != 0)
1406			pm->pm_stats.wired_count--;
1407		if ((tp->tte_data & TD_REF) != 0)
1408			vm_page_aflag_set(m, PGA_REFERENCED);
1409		if ((tp->tte_data & TD_W) != 0)
1410			vm_page_dirty(m);
1411		tp->tte_data &= ~TD_V;
1412		tlb_page_demap(pm, va);
1413		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1414		pm->pm_stats.resident_count--;
1415		pmap_cache_remove(m, va);
1416		TTE_ZERO(tp);
1417		PMAP_UNLOCK(pm);
1418	}
1419	vm_page_aflag_clear(m, PGA_WRITEABLE);
1420	rw_wunlock(&tte_list_global_lock);
1421}
1422
1423static int
1424pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1425    vm_offset_t va)
1426{
1427	u_long data;
1428	vm_page_t m;
1429
1430	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1431	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1432	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1433		m = PHYS_TO_VM_PAGE(TD_PA(data));
1434		vm_page_dirty(m);
1435	}
1436	return (1);
1437}
1438
1439/*
1440 * Set the physical protection on the specified range of this map as requested.
1441 */
1442void
1443pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1444{
1445	vm_offset_t va;
1446	struct tte *tp;
1447
1448	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1449	    pm->pm_context[curcpu], sva, eva, prot);
1450
1451	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1452		pmap_remove(pm, sva, eva);
1453		return;
1454	}
1455
1456	if (prot & VM_PROT_WRITE)
1457		return;
1458
1459	PMAP_LOCK(pm);
1460	if (eva - sva > PMAP_TSB_THRESH) {
1461		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1462		tlb_context_demap(pm);
1463	} else {
1464		for (va = sva; va < eva; va += PAGE_SIZE)
1465			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1466				pmap_protect_tte(pm, NULL, tp, va);
1467		tlb_range_demap(pm, sva, eva - 1);
1468	}
1469	PMAP_UNLOCK(pm);
1470}
1471
1472/*
1473 * Map the given physical page at the specified virtual address in the
1474 * target pmap with the protection requested.  If specified the page
1475 * will be wired down.
1476 */
1477int
1478pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1479    u_int flags, int8_t psind)
1480{
1481	int rv;
1482
1483	rw_wlock(&tte_list_global_lock);
1484	PMAP_LOCK(pm);
1485	rv = pmap_enter_locked(pm, va, m, prot, flags, psind);
1486	rw_wunlock(&tte_list_global_lock);
1487	PMAP_UNLOCK(pm);
1488	return (rv);
1489}
1490
1491/*
1492 * Map the given physical page at the specified virtual address in the
1493 * target pmap with the protection requested.  If specified the page
1494 * will be wired down.
1495 *
1496 * The page queues and pmap must be locked.
1497 */
1498static int
1499pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1500    u_int flags, int8_t psind __unused)
1501{
1502	struct tte *tp;
1503	vm_paddr_t pa;
1504	vm_page_t real;
1505	u_long data;
1506	boolean_t wired;
1507
1508	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1509	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1510	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1511		VM_OBJECT_ASSERT_LOCKED(m->object);
1512	PMAP_STATS_INC(pmap_nenter);
1513	pa = VM_PAGE_TO_PHYS(m);
1514	wired = (flags & PMAP_ENTER_WIRED) != 0;
1515
1516	/*
1517	 * If this is a fake page from the device_pager, but it covers actual
1518	 * physical memory, convert to the real backing page.
1519	 */
1520	if ((m->flags & PG_FICTITIOUS) != 0) {
1521		real = vm_phys_paddr_to_vm_page(pa);
1522		if (real != NULL)
1523			m = real;
1524	}
1525
1526	CTR6(KTR_PMAP,
1527	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1528	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1529
1530	/*
1531	 * If there is an existing mapping, and the physical address has not
1532	 * changed, must be protection or wiring change.
1533	 */
1534	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1535		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1536		PMAP_STATS_INC(pmap_nenter_update);
1537
1538		/*
1539		 * Wiring change, just update stats.
1540		 */
1541		if (wired) {
1542			if ((tp->tte_data & TD_WIRED) == 0) {
1543				tp->tte_data |= TD_WIRED;
1544				pm->pm_stats.wired_count++;
1545			}
1546		} else {
1547			if ((tp->tte_data & TD_WIRED) != 0) {
1548				tp->tte_data &= ~TD_WIRED;
1549				pm->pm_stats.wired_count--;
1550			}
1551		}
1552
1553		/*
1554		 * Save the old bits and clear the ones we're interested in.
1555		 */
1556		data = tp->tte_data;
1557		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1558
1559		/*
1560		 * If we're turning off write permissions, sense modify status.
1561		 */
1562		if ((prot & VM_PROT_WRITE) != 0) {
1563			tp->tte_data |= TD_SW;
1564			if (wired)
1565				tp->tte_data |= TD_W;
1566			if ((m->oflags & VPO_UNMANAGED) == 0)
1567				vm_page_aflag_set(m, PGA_WRITEABLE);
1568		} else if ((data & TD_W) != 0)
1569			vm_page_dirty(m);
1570
1571		/*
1572		 * If we're turning on execute permissions, flush the icache.
1573		 */
1574		if ((prot & VM_PROT_EXECUTE) != 0) {
1575			if ((data & TD_EXEC) == 0)
1576				icache_page_inval(pa);
1577			tp->tte_data |= TD_EXEC;
1578		}
1579
1580		/*
1581		 * Delete the old mapping.
1582		 */
1583		tlb_page_demap(pm, TTE_GET_VA(tp));
1584	} else {
1585		/*
1586		 * If there is an existing mapping, but its for a different
1587		 * physical address, delete the old mapping.
1588		 */
1589		if (tp != NULL) {
1590			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1591			PMAP_STATS_INC(pmap_nenter_replace);
1592			pmap_remove_tte(pm, NULL, tp, va);
1593			tlb_page_demap(pm, va);
1594		} else {
1595			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1596			PMAP_STATS_INC(pmap_nenter_new);
1597		}
1598
1599		/*
1600		 * Now set up the data and install the new mapping.
1601		 */
1602		data = TD_V | TD_8K | TD_PA(pa);
1603		if (pm == kernel_pmap)
1604			data |= TD_P;
1605		if ((prot & VM_PROT_WRITE) != 0) {
1606			data |= TD_SW;
1607			if ((m->oflags & VPO_UNMANAGED) == 0)
1608				vm_page_aflag_set(m, PGA_WRITEABLE);
1609		}
1610		if (prot & VM_PROT_EXECUTE) {
1611			data |= TD_EXEC;
1612			icache_page_inval(pa);
1613		}
1614
1615		/*
1616		 * If its wired update stats.  We also don't need reference or
1617		 * modify tracking for wired mappings, so set the bits now.
1618		 */
1619		if (wired) {
1620			pm->pm_stats.wired_count++;
1621			data |= TD_REF | TD_WIRED;
1622			if ((prot & VM_PROT_WRITE) != 0)
1623				data |= TD_W;
1624		}
1625
1626		tsb_tte_enter(pm, m, va, TS_8K, data);
1627	}
1628
1629	return (KERN_SUCCESS);
1630}
1631
1632/*
1633 * Maps a sequence of resident pages belonging to the same object.
1634 * The sequence begins with the given page m_start.  This page is
1635 * mapped at the given virtual address start.  Each subsequent page is
1636 * mapped at a virtual address that is offset from start by the same
1637 * amount as the page is offset from m_start within the object.  The
1638 * last page in the sequence is the page with the largest offset from
1639 * m_start that can be mapped at a virtual address less than the given
1640 * virtual address end.  Not every virtual page between start and end
1641 * is mapped; only those for which a resident page exists with the
1642 * corresponding offset from m_start are mapped.
1643 */
1644void
1645pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1646    vm_page_t m_start, vm_prot_t prot)
1647{
1648	vm_page_t m;
1649	vm_pindex_t diff, psize;
1650
1651	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1652
1653	psize = atop(end - start);
1654	m = m_start;
1655	rw_wlock(&tte_list_global_lock);
1656	PMAP_LOCK(pm);
1657	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1658		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1659		    (VM_PROT_READ | VM_PROT_EXECUTE), 0, 0);
1660		m = TAILQ_NEXT(m, listq);
1661	}
1662	rw_wunlock(&tte_list_global_lock);
1663	PMAP_UNLOCK(pm);
1664}
1665
1666void
1667pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1668{
1669
1670	rw_wlock(&tte_list_global_lock);
1671	PMAP_LOCK(pm);
1672	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1673	    0, 0);
1674	rw_wunlock(&tte_list_global_lock);
1675	PMAP_UNLOCK(pm);
1676}
1677
1678void
1679pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1680    vm_pindex_t pindex, vm_size_t size)
1681{
1682
1683	VM_OBJECT_ASSERT_WLOCKED(object);
1684	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1685	    ("pmap_object_init_pt: non-device object"));
1686}
1687
1688static int
1689pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va)
1690{
1691
1692	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1693	if ((tp->tte_data & TD_WIRED) == 0)
1694		panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp);
1695	atomic_clear_long(&tp->tte_data, TD_WIRED);
1696	pm->pm_stats.wired_count--;
1697	return (1);
1698}
1699
1700/*
1701 * Clear the wired attribute from the mappings for the specified range of
1702 * addresses in the given pmap.  Every valid mapping within that range must
1703 * have the wired attribute set.  In contrast, invalid mappings cannot have
1704 * the wired attribute set, so they are ignored.
1705 *
1706 * The wired attribute of the translation table entry is not a hardware
1707 * feature, so there is no need to invalidate any TLB entries.
1708 */
1709void
1710pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1711{
1712	vm_offset_t va;
1713	struct tte *tp;
1714
1715	PMAP_LOCK(pm);
1716	if (eva - sva > PMAP_TSB_THRESH)
1717		tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte);
1718	else {
1719		for (va = sva; va < eva; va += PAGE_SIZE)
1720			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1721				pmap_unwire_tte(pm, NULL, tp, va);
1722	}
1723	PMAP_UNLOCK(pm);
1724}
1725
1726static int
1727pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1728    vm_offset_t va)
1729{
1730	vm_page_t m;
1731	u_long data;
1732
1733	if ((tp->tte_data & TD_FAKE) != 0)
1734		return (1);
1735	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1736		data = tp->tte_data &
1737		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1738		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1739		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1740	}
1741	return (1);
1742}
1743
1744void
1745pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1746    vm_size_t len, vm_offset_t src_addr)
1747{
1748	struct tte *tp;
1749	vm_offset_t va;
1750
1751	if (dst_addr != src_addr)
1752		return;
1753	rw_wlock(&tte_list_global_lock);
1754	if (dst_pmap < src_pmap) {
1755		PMAP_LOCK(dst_pmap);
1756		PMAP_LOCK(src_pmap);
1757	} else {
1758		PMAP_LOCK(src_pmap);
1759		PMAP_LOCK(dst_pmap);
1760	}
1761	if (len > PMAP_TSB_THRESH) {
1762		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1763		    pmap_copy_tte);
1764		tlb_context_demap(dst_pmap);
1765	} else {
1766		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1767			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1768				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1769		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1770	}
1771	rw_wunlock(&tte_list_global_lock);
1772	PMAP_UNLOCK(src_pmap);
1773	PMAP_UNLOCK(dst_pmap);
1774}
1775
1776void
1777pmap_zero_page(vm_page_t m)
1778{
1779	struct tte *tp;
1780	vm_offset_t va;
1781	vm_paddr_t pa;
1782
1783	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1784	    ("pmap_zero_page: fake page"));
1785	PMAP_STATS_INC(pmap_nzero_page);
1786	pa = VM_PAGE_TO_PHYS(m);
1787	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1788		PMAP_STATS_INC(pmap_nzero_page_c);
1789		va = TLB_PHYS_TO_DIRECT(pa);
1790		cpu_block_zero((void *)va, PAGE_SIZE);
1791	} else if (m->md.color == -1) {
1792		PMAP_STATS_INC(pmap_nzero_page_nc);
1793		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1794	} else {
1795		PMAP_STATS_INC(pmap_nzero_page_oc);
1796		PMAP_LOCK(kernel_pmap);
1797		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1798		tp = tsb_kvtotte(va);
1799		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1800		tp->tte_vpn = TV_VPN(va, TS_8K);
1801		cpu_block_zero((void *)va, PAGE_SIZE);
1802		tlb_page_demap(kernel_pmap, va);
1803		PMAP_UNLOCK(kernel_pmap);
1804	}
1805}
1806
1807void
1808pmap_zero_page_area(vm_page_t m, int off, int size)
1809{
1810	struct tte *tp;
1811	vm_offset_t va;
1812	vm_paddr_t pa;
1813
1814	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1815	    ("pmap_zero_page_area: fake page"));
1816	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1817	PMAP_STATS_INC(pmap_nzero_page_area);
1818	pa = VM_PAGE_TO_PHYS(m);
1819	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1820		PMAP_STATS_INC(pmap_nzero_page_area_c);
1821		va = TLB_PHYS_TO_DIRECT(pa);
1822		bzero((void *)(va + off), size);
1823	} else if (m->md.color == -1) {
1824		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1825		aszero(ASI_PHYS_USE_EC, pa + off, size);
1826	} else {
1827		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1828		PMAP_LOCK(kernel_pmap);
1829		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1830		tp = tsb_kvtotte(va);
1831		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1832		tp->tte_vpn = TV_VPN(va, TS_8K);
1833		bzero((void *)(va + off), size);
1834		tlb_page_demap(kernel_pmap, va);
1835		PMAP_UNLOCK(kernel_pmap);
1836	}
1837}
1838
1839void
1840pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1841{
1842	vm_offset_t vdst;
1843	vm_offset_t vsrc;
1844	vm_paddr_t pdst;
1845	vm_paddr_t psrc;
1846	struct tte *tp;
1847
1848	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1849	    ("pmap_copy_page: fake dst page"));
1850	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1851	    ("pmap_copy_page: fake src page"));
1852	PMAP_STATS_INC(pmap_ncopy_page);
1853	pdst = VM_PAGE_TO_PHYS(mdst);
1854	psrc = VM_PAGE_TO_PHYS(msrc);
1855	if (dcache_color_ignore != 0 ||
1856	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1857	    mdst->md.color == DCACHE_COLOR(pdst))) {
1858		PMAP_STATS_INC(pmap_ncopy_page_c);
1859		vdst = TLB_PHYS_TO_DIRECT(pdst);
1860		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1861		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1862	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1863		PMAP_STATS_INC(pmap_ncopy_page_nc);
1864		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1865	} else if (msrc->md.color == -1) {
1866		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1867			PMAP_STATS_INC(pmap_ncopy_page_dc);
1868			vdst = TLB_PHYS_TO_DIRECT(pdst);
1869			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1870			    PAGE_SIZE);
1871		} else {
1872			PMAP_STATS_INC(pmap_ncopy_page_doc);
1873			PMAP_LOCK(kernel_pmap);
1874			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1875			tp = tsb_kvtotte(vdst);
1876			tp->tte_data =
1877			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1878			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1879			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1880			    PAGE_SIZE);
1881			tlb_page_demap(kernel_pmap, vdst);
1882			PMAP_UNLOCK(kernel_pmap);
1883		}
1884	} else if (mdst->md.color == -1) {
1885		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1886			PMAP_STATS_INC(pmap_ncopy_page_sc);
1887			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1888			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1889			    PAGE_SIZE);
1890		} else {
1891			PMAP_STATS_INC(pmap_ncopy_page_soc);
1892			PMAP_LOCK(kernel_pmap);
1893			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1894			tp = tsb_kvtotte(vsrc);
1895			tp->tte_data =
1896			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1897			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1898			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1899			    PAGE_SIZE);
1900			tlb_page_demap(kernel_pmap, vsrc);
1901			PMAP_UNLOCK(kernel_pmap);
1902		}
1903	} else {
1904		PMAP_STATS_INC(pmap_ncopy_page_oc);
1905		PMAP_LOCK(kernel_pmap);
1906		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1907		tp = tsb_kvtotte(vdst);
1908		tp->tte_data =
1909		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1910		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1911		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1912		tp = tsb_kvtotte(vsrc);
1913		tp->tte_data =
1914		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1915		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1916		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1917		tlb_page_demap(kernel_pmap, vdst);
1918		tlb_page_demap(kernel_pmap, vsrc);
1919		PMAP_UNLOCK(kernel_pmap);
1920	}
1921}
1922
1923vm_offset_t
1924pmap_quick_enter_page(vm_page_t m)
1925{
1926	vm_paddr_t pa;
1927	vm_offset_t qaddr;
1928	struct tte *tp;
1929
1930	pa = VM_PAGE_TO_PHYS(m);
1931	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa))
1932		return (TLB_PHYS_TO_DIRECT(pa));
1933
1934	critical_enter();
1935	qaddr = PCPU_GET(qmap_addr);
1936	qaddr += (PAGE_SIZE * ((DCACHE_COLORS + DCACHE_COLOR(pa) -
1937	    DCACHE_COLOR(qaddr)) % DCACHE_COLORS));
1938	tp = tsb_kvtotte(qaddr);
1939
1940	KASSERT(tp->tte_data == 0, ("pmap_quick_enter_page: PTE busy"));
1941
1942	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1943	tp->tte_vpn = TV_VPN(qaddr, TS_8K);
1944
1945	return (qaddr);
1946}
1947
1948void
1949pmap_quick_remove_page(vm_offset_t addr)
1950{
1951	vm_offset_t qaddr;
1952	struct tte *tp;
1953
1954	if (addr >= VM_MIN_DIRECT_ADDRESS)
1955		return;
1956
1957	tp = tsb_kvtotte(addr);
1958	qaddr = PCPU_GET(qmap_addr);
1959
1960	KASSERT((addr >= qaddr) && (addr < (qaddr + (PAGE_SIZE * DCACHE_COLORS))),
1961	    ("pmap_quick_remove_page: invalid address"));
1962	KASSERT(tp->tte_data != 0, ("pmap_quick_remove_page: PTE not in use"));
1963
1964	stxa(TLB_DEMAP_VA(addr) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0);
1965	stxa(TLB_DEMAP_VA(addr) | TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0);
1966	flush(KERNBASE);
1967	TTE_ZERO(tp);
1968	critical_exit();
1969}
1970
1971int unmapped_buf_allowed;
1972
1973void
1974pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1975    vm_offset_t b_offset, int xfersize)
1976{
1977
1978	panic("pmap_copy_pages: not implemented");
1979}
1980
1981/*
1982 * Returns true if the pmap's pv is one of the first
1983 * 16 pvs linked to from this page.  This count may
1984 * be changed upwards or downwards in the future; it
1985 * is only necessary that true be returned for a small
1986 * subset of pmaps for proper page aging.
1987 */
1988boolean_t
1989pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1990{
1991	struct tte *tp;
1992	int loops;
1993	boolean_t rv;
1994
1995	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1996	    ("pmap_page_exists_quick: page %p is not managed", m));
1997	loops = 0;
1998	rv = FALSE;
1999	rw_wlock(&tte_list_global_lock);
2000	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2001		if ((tp->tte_data & TD_PV) == 0)
2002			continue;
2003		if (TTE_GET_PMAP(tp) == pm) {
2004			rv = TRUE;
2005			break;
2006		}
2007		if (++loops >= 16)
2008			break;
2009	}
2010	rw_wunlock(&tte_list_global_lock);
2011	return (rv);
2012}
2013
2014/*
2015 * Return the number of managed mappings to the given physical page
2016 * that are wired.
2017 */
2018int
2019pmap_page_wired_mappings(vm_page_t m)
2020{
2021	struct tte *tp;
2022	int count;
2023
2024	count = 0;
2025	if ((m->oflags & VPO_UNMANAGED) != 0)
2026		return (count);
2027	rw_wlock(&tte_list_global_lock);
2028	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2029		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
2030			count++;
2031	rw_wunlock(&tte_list_global_lock);
2032	return (count);
2033}
2034
2035/*
2036 * Remove all pages from specified address space, this aids process exit
2037 * speeds.  This is much faster than pmap_remove in the case of running down
2038 * an entire address space.  Only works for the current pmap.
2039 */
2040void
2041pmap_remove_pages(pmap_t pm)
2042{
2043
2044}
2045
2046/*
2047 * Returns TRUE if the given page has a managed mapping.
2048 */
2049boolean_t
2050pmap_page_is_mapped(vm_page_t m)
2051{
2052	struct tte *tp;
2053	boolean_t rv;
2054
2055	rv = FALSE;
2056	if ((m->oflags & VPO_UNMANAGED) != 0)
2057		return (rv);
2058	rw_wlock(&tte_list_global_lock);
2059	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2060		if ((tp->tte_data & TD_PV) != 0) {
2061			rv = TRUE;
2062			break;
2063		}
2064	rw_wunlock(&tte_list_global_lock);
2065	return (rv);
2066}
2067
2068/*
2069 * Return a count of reference bits for a page, clearing those bits.
2070 * It is not necessary for every reference bit to be cleared, but it
2071 * is necessary that 0 only be returned when there are truly no
2072 * reference bits set.
2073 *
2074 * As an optimization, update the page's dirty field if a modified bit is
2075 * found while counting reference bits.  This opportunistic update can be
2076 * performed at low cost and can eliminate the need for some future calls
2077 * to pmap_is_modified().  However, since this function stops after
2078 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
2079 * dirty pages.  Those dirty pages will only be detected by a future call
2080 * to pmap_is_modified().
2081 */
2082int
2083pmap_ts_referenced(vm_page_t m)
2084{
2085	struct tte *tpf;
2086	struct tte *tpn;
2087	struct tte *tp;
2088	u_long data;
2089	int count;
2090
2091	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2092	    ("pmap_ts_referenced: page %p is not managed", m));
2093	count = 0;
2094	rw_wlock(&tte_list_global_lock);
2095	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2096		tpf = tp;
2097		do {
2098			tpn = TAILQ_NEXT(tp, tte_link);
2099			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2100			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2101			if ((tp->tte_data & TD_PV) == 0)
2102				continue;
2103			data = atomic_clear_long(&tp->tte_data, TD_REF);
2104			if ((data & TD_W) != 0)
2105				vm_page_dirty(m);
2106			if ((data & TD_REF) != 0 && ++count >=
2107			    PMAP_TS_REFERENCED_MAX)
2108				break;
2109		} while ((tp = tpn) != NULL && tp != tpf);
2110	}
2111	rw_wunlock(&tte_list_global_lock);
2112	return (count);
2113}
2114
2115boolean_t
2116pmap_is_modified(vm_page_t m)
2117{
2118	struct tte *tp;
2119	boolean_t rv;
2120
2121	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2122	    ("pmap_is_modified: page %p is not managed", m));
2123	rv = FALSE;
2124
2125	/*
2126	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2127	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2128	 * is clear, no TTEs can have TD_W set.
2129	 */
2130	VM_OBJECT_ASSERT_WLOCKED(m->object);
2131	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2132		return (rv);
2133	rw_wlock(&tte_list_global_lock);
2134	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2135		if ((tp->tte_data & TD_PV) == 0)
2136			continue;
2137		if ((tp->tte_data & TD_W) != 0) {
2138			rv = TRUE;
2139			break;
2140		}
2141	}
2142	rw_wunlock(&tte_list_global_lock);
2143	return (rv);
2144}
2145
2146/*
2147 *	pmap_is_prefaultable:
2148 *
2149 *	Return whether or not the specified virtual address is elgible
2150 *	for prefault.
2151 */
2152boolean_t
2153pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2154{
2155	boolean_t rv;
2156
2157	PMAP_LOCK(pmap);
2158	rv = tsb_tte_lookup(pmap, addr) == NULL;
2159	PMAP_UNLOCK(pmap);
2160	return (rv);
2161}
2162
2163/*
2164 * Return whether or not the specified physical page was referenced
2165 * in any physical maps.
2166 */
2167boolean_t
2168pmap_is_referenced(vm_page_t m)
2169{
2170	struct tte *tp;
2171	boolean_t rv;
2172
2173	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2174	    ("pmap_is_referenced: page %p is not managed", m));
2175	rv = FALSE;
2176	rw_wlock(&tte_list_global_lock);
2177	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2178		if ((tp->tte_data & TD_PV) == 0)
2179			continue;
2180		if ((tp->tte_data & TD_REF) != 0) {
2181			rv = TRUE;
2182			break;
2183		}
2184	}
2185	rw_wunlock(&tte_list_global_lock);
2186	return (rv);
2187}
2188
2189/*
2190 * This function is advisory.
2191 */
2192void
2193pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2194{
2195}
2196
2197void
2198pmap_clear_modify(vm_page_t m)
2199{
2200	struct tte *tp;
2201	u_long data;
2202
2203	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2204	    ("pmap_clear_modify: page %p is not managed", m));
2205	VM_OBJECT_ASSERT_WLOCKED(m->object);
2206	KASSERT(!vm_page_xbusied(m),
2207	    ("pmap_clear_modify: page %p is exclusive busied", m));
2208
2209	/*
2210	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2211	 * If the object containing the page is locked and the page is not
2212	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2213	 */
2214	if ((m->aflags & PGA_WRITEABLE) == 0)
2215		return;
2216	rw_wlock(&tte_list_global_lock);
2217	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2218		if ((tp->tte_data & TD_PV) == 0)
2219			continue;
2220		data = atomic_clear_long(&tp->tte_data, TD_W);
2221		if ((data & TD_W) != 0)
2222			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2223	}
2224	rw_wunlock(&tte_list_global_lock);
2225}
2226
2227void
2228pmap_remove_write(vm_page_t m)
2229{
2230	struct tte *tp;
2231	u_long data;
2232
2233	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2234	    ("pmap_remove_write: page %p is not managed", m));
2235
2236	/*
2237	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2238	 * set by another thread while the object is locked.  Thus,
2239	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2240	 */
2241	VM_OBJECT_ASSERT_WLOCKED(m->object);
2242	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2243		return;
2244	rw_wlock(&tte_list_global_lock);
2245	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2246		if ((tp->tte_data & TD_PV) == 0)
2247			continue;
2248		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2249		if ((data & TD_W) != 0) {
2250			vm_page_dirty(m);
2251			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2252		}
2253	}
2254	vm_page_aflag_clear(m, PGA_WRITEABLE);
2255	rw_wunlock(&tte_list_global_lock);
2256}
2257
2258int
2259pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2260{
2261
2262	/* TODO; */
2263	return (0);
2264}
2265
2266/*
2267 * Activate a user pmap.  The pmap must be activated before its address space
2268 * can be accessed in any way.
2269 */
2270void
2271pmap_activate(struct thread *td)
2272{
2273	struct vmspace *vm;
2274	struct pmap *pm;
2275	int context;
2276
2277	critical_enter();
2278	vm = td->td_proc->p_vmspace;
2279	pm = vmspace_pmap(vm);
2280
2281	context = PCPU_GET(tlb_ctx);
2282	if (context == PCPU_GET(tlb_ctx_max)) {
2283		tlb_flush_user();
2284		context = PCPU_GET(tlb_ctx_min);
2285	}
2286	PCPU_SET(tlb_ctx, context + 1);
2287
2288	pm->pm_context[curcpu] = context;
2289#ifdef SMP
2290	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2291	atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2292#else
2293	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2294	PCPU_SET(pmap, pm);
2295#endif
2296
2297	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2298	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2299	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2300	    TLB_CXR_PGSZ_MASK) | context);
2301	flush(KERNBASE);
2302	critical_exit();
2303}
2304
2305void
2306pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2307{
2308
2309}
2310
2311/*
2312 * Increase the starting virtual address of the given mapping if a
2313 * different alignment might result in more superpage mappings.
2314 */
2315void
2316pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2317    vm_offset_t *addr, vm_size_t size)
2318{
2319
2320}
2321
2322boolean_t
2323pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
2324{
2325
2326	return (mode == VM_MEMATTR_DEFAULT);
2327}
2328