1/*	$NetBSD: pmap.c,v 1.275 2011/07/12 07:51:34 mrg Exp $	*/
2/*
3 *
4 * Copyright (C) 1996-1999 Eduardo Horvath.
5 * All rights reserved.
6 *
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.275 2011/07/12 07:51:34 mrg Exp $");
30
31#undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
32#define	HWREF
33
34#include "opt_ddb.h"
35#include "opt_multiprocessor.h"
36#include "opt_modular.h"
37
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/queue.h>
41#include <sys/systm.h>
42#include <sys/msgbuf.h>
43#include <sys/pool.h>
44#include <sys/exec.h>
45#include <sys/core.h>
46#include <sys/kcore.h>
47#include <sys/proc.h>
48#include <sys/atomic.h>
49#include <sys/cpu.h>
50
51#include <sys/exec_aout.h>	/* for MID_* */
52
53#include <uvm/uvm.h>
54
55#include <machine/pcb.h>
56#include <machine/sparc64.h>
57#include <machine/ctlreg.h>
58#include <machine/promlib.h>
59#include <machine/kcore.h>
60#include <machine/bootinfo.h>
61
62#include <sparc64/sparc64/cache.h>
63
64#ifdef DDB
65#include <machine/db_machdep.h>
66#include <ddb/db_command.h>
67#include <ddb/db_sym.h>
68#include <ddb/db_variables.h>
69#include <ddb/db_extern.h>
70#include <ddb/db_access.h>
71#include <ddb/db_output.h>
72#else
73#define Debugger()
74#define db_printf	printf
75#endif
76
77#define	MEG		(1<<20) /* 1MB */
78#define	KB		(1<<10)	/* 1KB */
79
80paddr_t cpu0paddr;		/* contigious phys memory preallocated for cpus */
81
82/* These routines are in assembly to allow access thru physical mappings */
83extern int64_t pseg_get_real(struct pmap *, vaddr_t);
84extern int pseg_set_real(struct pmap *, vaddr_t, int64_t, paddr_t);
85
86/*
87 * Diatribe on ref/mod counting:
88 *
89 * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
90 * in the pv_entry structure for each page.  (We could bypass this for the
91 * vm_page, but that's a long story....)
92 *
93 * This architecture has nice, fast traps with lots of space for software bits
94 * in the TTE.  To accelerate ref/mod counts we make use of these features.
95 *
96 * When we map a page initially, we place a TTE in the page table.  It's
97 * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
98 * writable we set the TLB_REAL_W bit for the trap handler.
99 *
100 * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
101 * bit in the approprate TTE in the page table.  Whenever we take a protection
102 * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
103 * bits to enable writing and mark the page as modified.
104 *
105 * This means that we may have ref/mod information all over the place.  The
106 * pmap routines must traverse the page tables of all pmaps with a given page
107 * and collect/clear all the ref/mod information and copy it into the pv_entry.
108 */
109
110#ifdef	NO_VCACHE
111#define	FORCE_ALIAS	1
112#else
113#define FORCE_ALIAS	0
114#endif
115
116#define	PV_ALIAS	0x1LL
117#define PV_REF		0x2LL
118#define PV_MOD		0x4LL
119#define PV_NVC		0x8LL
120#define PV_NC		0x10LL
121#define PV_WE		0x20LL	/* Debug -- this page was writable somtime */
122#define PV_MASK		(0x03fLL)
123#define PV_VAMASK	(~(PAGE_SIZE - 1))
124#define PV_MATCH(pv,va)	(!(((pv)->pv_va ^ (va)) & PV_VAMASK))
125#define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \
126					(((pv)->pv_va) & PV_MASK)))
127
128struct pool_cache pmap_cache;
129struct pool_cache pmap_pv_cache;
130
131pv_entry_t	pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *);
132void	pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *,
133			   pv_entry_t);
134void	pmap_page_cache(struct pmap *, paddr_t, int);
135
136/*
137 * First and last managed physical addresses.
138 * XXX only used for dumping the system.
139 */
140paddr_t	vm_first_phys, vm_num_phys;
141
142/*
143 * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
144 */
145int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
146#define TSBENTS (512<<tsbsize)
147#define	TSBSIZE	(TSBENTS * 16)
148
149static struct pmap kernel_pmap_;
150struct pmap *const kernel_pmap_ptr = &kernel_pmap_;
151
152static int ctx_alloc(struct pmap *);
153static bool pmap_is_referenced_locked(struct vm_page *);
154
155static void ctx_free(struct pmap *, struct cpu_info *);
156
157/*
158 * Check if any MMU has a non-zero context
159 */
160static inline bool
161pmap_has_ctx(struct pmap *p)
162{
163	int i;
164
165	/* any context on any cpu? */
166	for (i = 0; i < sparc_ncpus; i++)
167		if (p->pm_ctx[i] > 0)
168			return true;
169
170	return false;
171}
172
173#ifdef MULTIPROCESSOR
174#define pmap_ctx(PM)	((PM)->pm_ctx[cpu_number()])
175#else
176#define pmap_ctx(PM)	((PM)->pm_ctx[0])
177#endif
178
179/*
180 * Check if this pmap has a live mapping on some MMU.
181 */
182static inline bool
183pmap_is_on_mmu(struct pmap *p)
184{
185	/* The kernel pmap is always on all MMUs */
186	if (p == pmap_kernel())
187		return true;
188
189	return pmap_has_ctx(p);
190}
191
192/*
193 * Virtual and physical addresses of the start and end of kernel text
194 * and data segments.
195 */
196vaddr_t ktext;
197paddr_t ktextp;
198vaddr_t ektext;
199paddr_t ektextp;
200vaddr_t kdata;
201paddr_t kdatap;
202vaddr_t ekdata;
203paddr_t ekdatap;
204
205/*
206 * Kernel 4MB pages.
207 */
208extern struct tlb_entry *kernel_tlbs;
209extern int kernel_tlb_slots;
210
211static int npgs;
212
213vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
214
215int phys_installed_size;		/* Installed physical memory */
216struct mem_region *phys_installed;
217
218paddr_t avail_start, avail_end;	/* These are used by ps & family */
219
220static int ptelookup_va(vaddr_t va);
221
222static inline void
223clrx(void *addr)
224{
225	__asm volatile("clrx [%0]" : : "r" (addr) : "memory");
226}
227
228static void
229tsb_invalidate(vaddr_t va, pmap_t pm)
230{
231	struct cpu_info *ci;
232	int ctx;
233	bool kpm = (pm == pmap_kernel());
234	int i;
235	int64_t tag;
236
237	i = ptelookup_va(va);
238#ifdef MULTIPROCESSOR
239	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
240		if (!CPUSET_HAS(cpus_active, ci->ci_index))
241			continue;
242#else
243		ci = curcpu();
244#endif
245		ctx = pm->pm_ctx[ci->ci_index];
246		if (kpm || ctx > 0) {
247			tag = TSB_TAG(0, ctx, va);
248			if (ci->ci_tsb_dmmu[i].tag == tag) {
249				clrx(&ci->ci_tsb_dmmu[i].data);
250			}
251			if (ci->ci_tsb_immu[i].tag == tag) {
252				clrx(&ci->ci_tsb_immu[i].data);
253			}
254		}
255#ifdef MULTIPROCESSOR
256	}
257#endif
258}
259
260struct prom_map *prom_map;
261int prom_map_size;
262
263#ifdef DEBUG
264struct {
265	int kernel;	/* entering kernel mapping */
266	int user;	/* entering user mapping */
267	int ptpneeded;	/* needed to allocate a PT page */
268	int pwchange;	/* no mapping change, just wiring or protection */
269	int wchange;	/* no mapping change, just wiring */
270	int mchange;	/* was mapped but mapping to different page */
271	int managed;	/* a managed page */
272	int firstpv;	/* first mapping for this PA */
273	int secondpv;	/* second mapping for this PA */
274	int ci;		/* cache inhibited */
275	int unmanaged;	/* not a managed page */
276	int flushes;	/* cache flushes */
277	int cachehit;	/* new entry forced valid entry out */
278} enter_stats;
279struct {
280	int calls;
281	int removes;
282	int flushes;
283	int tflushes;	/* TLB flushes */
284	int pidflushes;	/* HW pid stolen */
285	int pvfirst;
286	int pvsearch;
287} remove_stats;
288#define	ENTER_STAT(x)	do { enter_stats.x ++; } while (0)
289#define	REMOVE_STAT(x)	do { remove_stats.x ++; } while (0)
290
291#define	PDB_CREATE		0x000001
292#define	PDB_DESTROY		0x000002
293#define	PDB_REMOVE		0x000004
294#define	PDB_CHANGEPROT		0x000008
295#define	PDB_ENTER		0x000010
296#define	PDB_DEMAP		0x000020	/* used in locore */
297#define	PDB_REF			0x000040
298#define	PDB_COPY		0x000080
299#define	PDB_MMU_ALLOC		0x000100
300#define	PDB_MMU_STEAL		0x000200
301#define	PDB_CTX_ALLOC		0x000400
302#define	PDB_CTX_STEAL		0x000800
303#define	PDB_MMUREG_ALLOC	0x001000
304#define	PDB_MMUREG_STEAL	0x002000
305#define	PDB_CACHESTUFF		0x004000
306#define	PDB_ALIAS		0x008000
307#define PDB_EXTRACT		0x010000
308#define	PDB_BOOT		0x020000
309#define	PDB_BOOT1		0x040000
310#define	PDB_GROW		0x080000
311#define	PDB_CTX_FLUSHALL	0x100000
312int	pmapdebug = 0;
313/* Number of H/W pages stolen for page tables */
314int	pmap_pages_stolen = 0;
315
316#define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
317#define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
318#else
319#define	ENTER_STAT(x)	do { /* nothing */ } while (0)
320#define	REMOVE_STAT(x)	do { /* nothing */ } while (0)
321#define	BDPRINTF(n, f)
322#define	DPRINTF(n, f)
323#endif
324
325#define pv_check()
326
327static int pmap_get_page(paddr_t *);
328static void pmap_free_page(paddr_t, sparc64_cpuset_t);
329static void pmap_free_page_noflush(paddr_t);
330
331/*
332 * Global pmap locks.
333 */
334static kmutex_t pmap_lock;
335static bool lock_available = false;
336
337/*
338 * Support for big page sizes.  This maps the page size to the
339 * page bits.  That is: these are the bits between 8K pages and
340 * larger page sizes that cause aliasing.
341 */
342#define PSMAP_ENTRY(MASK, CODE)	{ .mask = MASK, .code = CODE }
343struct page_size_map page_size_map[] = {
344#ifdef DEBUG
345	PSMAP_ENTRY(0, PGSZ_8K & 0),	/* Disable large pages */
346#endif
347	PSMAP_ENTRY((4 * 1024 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_4M),
348	PSMAP_ENTRY((512 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_512K),
349	PSMAP_ENTRY((64 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_64K),
350	PSMAP_ENTRY((8 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_8K),
351	PSMAP_ENTRY(0, 0),
352};
353
354/*
355 * This probably shouldn't be necessary, but it stops USIII machines from
356 * breaking in general, and not just for MULTIPROCESSOR.
357 */
358#define USE_LOCKSAFE_PSEG_GETSET
359#if defined(USE_LOCKSAFE_PSEG_GETSET)
360
361static kmutex_t pseg_lock;
362
363static __inline__ int64_t
364pseg_get_locksafe(struct pmap *pm, vaddr_t va)
365{
366	int64_t rv;
367	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
368
369	if (__predict_true(took_lock))
370		mutex_enter(&pseg_lock);
371	rv = pseg_get_real(pm, va);
372	if (__predict_true(took_lock))
373		mutex_exit(&pseg_lock);
374	return rv;
375}
376
377static __inline__ int
378pseg_set_locksafe(struct pmap *pm, vaddr_t va, int64_t data, paddr_t ptp)
379{
380	int rv;
381	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
382
383	if (__predict_true(took_lock))
384		mutex_enter(&pseg_lock);
385	rv = pseg_set_real(pm, va, data, ptp);
386	if (__predict_true(took_lock))
387		mutex_exit(&pseg_lock);
388	return rv;
389}
390
391#define pseg_get(pm, va)		pseg_get_locksafe(pm, va)
392#define pseg_set(pm, va, data, ptp)	pseg_set_locksafe(pm, va, data, ptp)
393
394#else /* USE_LOCKSAFE_PSEG_GETSET */
395
396#define pseg_get(pm, va)		pseg_get_real(pm, va)
397#define pseg_set(pm, va, data, ptp)	pseg_set_real(pm, va, data, ptp)
398
399#endif /* USE_LOCKSAFE_PSEG_GETSET */
400
401/*
402 * Enter a TTE into the kernel pmap only.  Don't do anything else.
403 *
404 * Use only during bootstrapping since it does no locking and
405 * can lose ref/mod info!!!!
406 *
407 */
408static void pmap_enter_kpage(vaddr_t va, int64_t data)
409{
410	paddr_t newp;
411
412	newp = 0UL;
413	while (pseg_set(pmap_kernel(), va, data, newp) & 1) {
414		if (!pmap_get_page(&newp)) {
415			prom_printf("pmap_enter_kpage: out of pages\n");
416			panic("pmap_enter_kpage");
417		}
418
419		ENTER_STAT(ptpneeded);
420		BDPRINTF(PDB_BOOT1,
421			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\n",
422			  pmap_kernel(), va, (long)data, (long)newp));
423#ifdef DEBUG
424		if (pmapdebug & PDB_BOOT1)
425		{int i; for (i=0; i<140000000; i++) ;}
426#endif
427	}
428}
429
430/*
431 * Check the bootargs to see if we need to enable bootdebug.
432 */
433#ifdef DEBUG
434static void pmap_bootdebug(void)
435{
436	const char *cp = prom_getbootargs();
437
438	for (;;)
439		switch (*++cp) {
440		case '\0':
441			return;
442		case 'V':
443			pmapdebug |= PDB_BOOT|PDB_BOOT1;
444			break;
445		case 'D':
446			pmapdebug |= PDB_BOOT1;
447			break;
448		}
449}
450#endif
451
452
453/*
454 * Calculate the correct number of page colors to use.  This should be the
455 * size of the E$/PAGE_SIZE.  However, different CPUs can have different sized
456 * E$, so we need to take the GCM of the E$ size.
457 */
458static int pmap_calculate_colors(void)
459{
460	int node;
461	int size, assoc, color, maxcolor = 1;
462
463	for (node = prom_firstchild(prom_findroot()); node != 0;
464	     node = prom_nextsibling(node)) {
465		char *name = prom_getpropstring(node, "device_type");
466		if (strcmp("cpu", name) != 0)
467			continue;
468
469		/* Found a CPU, get the E$ info. */
470		size = prom_getpropint(node, "ecache-size", -1);
471		if (size == -1) {
472			prom_printf("pmap_calculate_colors: node %x has "
473				"no ecache-size\n", node);
474			/* If we can't get the E$ size, skip the node */
475			continue;
476		}
477
478		assoc = prom_getpropint(node, "ecache-associativity", 1);
479		color = size/assoc/PAGE_SIZE;
480		if (color > maxcolor)
481			maxcolor = color;
482	}
483	return (maxcolor);
484}
485
486static void pmap_alloc_bootargs(void)
487{
488	char *v;
489
490	v = OF_claim(NULL, 2*PAGE_SIZE, PAGE_SIZE);
491	if ((v == NULL) || (v == (void*)-1))
492		panic("Can't claim two pages of memory.");
493
494	memset(v, 0, 2*PAGE_SIZE);
495
496	cpu_args = (struct cpu_bootargs*)v;
497}
498
499#if defined(MULTIPROCESSOR)
500static void pmap_mp_init(void);
501
502static void
503pmap_mp_init(void)
504{
505	pte_t *tp;
506	char *v;
507	int i;
508
509	extern void cpu_mp_startup(void);
510
511	if ((v = OF_claim(NULL, PAGE_SIZE, PAGE_SIZE)) == NULL) {
512		panic("pmap_mp_init: Cannot claim a page.");
513	}
514
515	memcpy(v, mp_tramp_code, mp_tramp_code_len);
516	*(u_long *)(v + mp_tramp_tlb_slots) = kernel_tlb_slots;
517	*(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup;
518	*(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args;
519	tp = (pte_t *)(v + mp_tramp_code_len);
520	for (i = 0; i < kernel_tlb_slots; i++) {
521		tp[i].tag  = kernel_tlbs[i].te_va;
522		tp[i].data = TSB_DATA(0,		/* g */
523				PGSZ_4M,		/* sz */
524				kernel_tlbs[i].te_pa,	/* pa */
525				1, /* priv */
526				1, /* write */
527				1, /* cache */
528				1, /* aliased */
529				1, /* valid */
530				0 /* ie */);
531		tp[i].data |= TLB_L | TLB_CV;
532		DPRINTF(PDB_BOOT1, ("xtlb[%d]: Tag: %" PRIx64 " Data: %"
533				PRIx64 "\n", i, tp[i].tag, tp[i].data));
534	}
535
536	for (i = 0; i < PAGE_SIZE; i += sizeof(long))
537		flush(v + i);
538
539	cpu_spinup_trampoline = (vaddr_t)v;
540}
541#else
542#define pmap_mp_init()	((void)0)
543#endif
544
545paddr_t pmap_kextract(vaddr_t va);
546
547paddr_t
548pmap_kextract(vaddr_t va)
549{
550	int i;
551	paddr_t paddr = (paddr_t)-1;
552
553	for (i = 0; i < kernel_tlb_slots; i++) {
554		if ((va & ~PAGE_MASK_4M) == kernel_tlbs[i].te_va) {
555			paddr = kernel_tlbs[i].te_pa +
556				(paddr_t)(va & PAGE_MASK_4M);
557			break;
558		}
559	}
560
561	if (i == kernel_tlb_slots) {
562		panic("pmap_kextract: Address %p is not from kernel space.\n"
563				"Data segment is too small?\n", (void*)va);
564	}
565
566	return (paddr);
567}
568
569/*
570 * Bootstrap kernel allocator, allocates from unused space in 4MB kernel
571 * data segment meaning that
572 *
573 * - Access to allocated memory will never generate a trap
574 * - Allocated chunks are never reclaimed or freed
575 * - Allocation calls do not change PROM memlists
576 */
577static struct mem_region kdata_mem_pool;
578
579static void
580kdata_alloc_init(vaddr_t va_start, vaddr_t va_end)
581{
582	vsize_t va_size = va_end - va_start;
583
584	kdata_mem_pool.start = va_start;
585	kdata_mem_pool.size  = va_size;
586
587	BDPRINTF(PDB_BOOT, ("kdata_alloc_init(): %d bytes @%p.\n", va_size,
588				va_start));
589}
590
591static vaddr_t
592kdata_alloc(vsize_t size, vsize_t align)
593{
594	vaddr_t va;
595	vsize_t asize;
596
597	asize = roundup(kdata_mem_pool.start, align) - kdata_mem_pool.start;
598
599	kdata_mem_pool.start += asize;
600	kdata_mem_pool.size  -= asize;
601
602	if (kdata_mem_pool.size < size) {
603		panic("kdata_alloc(): Data segment is too small.\n");
604	}
605
606	va = kdata_mem_pool.start;
607	kdata_mem_pool.start += size;
608	kdata_mem_pool.size  -= size;
609
610	BDPRINTF(PDB_BOOT, ("kdata_alloc(): Allocated %d@%p, %d free.\n",
611				size, (void*)va, kdata_mem_pool.size));
612
613	return (va);
614}
615
616/*
617 * Unified routine for reading PROM properties.
618 */
619static void
620pmap_read_memlist(const char *device, const char *property, void **ml,
621		  int *ml_size, vaddr_t (* ml_alloc)(vsize_t, vsize_t))
622{
623	void *va;
624	int size, handle;
625
626	if ( (handle = prom_finddevice(device)) == 0) {
627		prom_printf("pmap_read_memlist(): No %s device found.\n",
628				device);
629		prom_halt();
630	}
631	if ( (size = OF_getproplen(handle, property)) < 0) {
632		prom_printf("pmap_read_memlist(): %s/%s has no length.\n",
633				device, property);
634		prom_halt();
635	}
636	if ( (va = (void*)(* ml_alloc)(size, sizeof(uint64_t))) == NULL) {
637		prom_printf("pmap_read_memlist(): Cannot allocate memlist.\n");
638		prom_halt();
639	}
640	if (OF_getprop(handle, property, va, size) <= 0) {
641		prom_printf("pmap_read_memlist(): Cannot read %s/%s.\n",
642				device, property);
643		prom_halt();
644	}
645
646	*ml = va;
647	*ml_size = size;
648}
649
650/*
651 * This is called during bootstrap, before the system is really initialized.
652 *
653 * It's called with the start and end virtual addresses of the kernel.  We
654 * bootstrap the pmap allocator now.  We will allocate the basic structures we
655 * need to bootstrap the VM system here: the page frame tables, the TSB, and
656 * the free memory lists.
657 *
658 * Now all this is becoming a bit obsolete.  maxctx is still important, but by
659 * separating the kernel text and data segments we really would need to
660 * provide the start and end of each segment.  But we can't.  The rodata
661 * segment is attached to the end of the kernel segment and has nothing to
662 * delimit its end.  We could still pass in the beginning of the kernel and
663 * the beginning and end of the data segment but we could also just as easily
664 * calculate that all in here.
665 *
666 * To handle the kernel text, we need to do a reverse mapping of the start of
667 * the kernel, then traverse the free memory lists to find out how big it is.
668 */
669
670void
671pmap_bootstrap(u_long kernelstart, u_long kernelend)
672{
673#ifdef MODULAR
674	extern vaddr_t module_start, module_end;
675#endif
676	extern char etext[], data_start[];	/* start of data segment */
677	extern int msgbufmapped;
678	struct mem_region *mp, *mp1, *avail, *orig;
679	int i, j, pcnt, msgbufsiz;
680	size_t s, sz;
681	int64_t data;
682	vaddr_t va, intstk;
683	uint64_t phys_msgbuf;
684	paddr_t newp = 0;
685
686	void *prom_memlist;
687	int prom_memlist_size;
688
689	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\n"));
690
691	cache_setup_funcs();
692
693	/*
694	 * Calculate kernel size.
695	 */
696	ktext   = kernelstart;
697	ktextp  = pmap_kextract(ktext);
698	ektext  = roundup((vaddr_t)etext, PAGE_SIZE_4M);
699	ektextp = roundup(pmap_kextract((vaddr_t)etext), PAGE_SIZE_4M);
700
701	kdata   = (vaddr_t)data_start;
702	kdatap  = pmap_kextract(kdata);
703	ekdata  = roundup(kernelend, PAGE_SIZE_4M);
704	ekdatap = roundup(pmap_kextract(kernelend), PAGE_SIZE_4M);
705
706	BDPRINTF(PDB_BOOT, ("Virtual layout: text %lx-%lx, data %lx-%lx.\n",
707				ktext, ektext, kdata, ekdata));
708	BDPRINTF(PDB_BOOT, ("Physical layout: text %lx-%lx, data %lx-%lx.\n",
709				ktextp, ektextp, kdatap, ekdatap));
710
711	/* Initialize bootstrap allocator. */
712	kdata_alloc_init(kernelend + 1 * 1024 * 1024, ekdata);
713
714#ifdef DEBUG
715	pmap_bootdebug();
716#endif
717
718	pmap_alloc_bootargs();
719	pmap_mp_init();
720
721	/*
722	 * set machine page size
723	 */
724	uvmexp.pagesize = NBPG;
725	uvmexp.ncolors = pmap_calculate_colors();
726	uvm_setpagesize();
727
728	/*
729	 * Get hold or the message buffer.
730	 */
731	msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA;
732/* XXXXX -- increase msgbufsiz for uvmhist printing */
733	msgbufsiz = 4*PAGE_SIZE /* round_page(sizeof(struct msgbuf)) */;
734	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\n",
735			    (long)msgbufp, (long)msgbufsiz));
736	if ((long)msgbufp !=
737	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
738		prom_printf(
739		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\n",
740		    (void *)msgbufp, (long)phys_msgbuf);
741	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
742	BDPRINTF(PDB_BOOT,
743		("We should have the memory at %lx, let's map it in\n",
744			phys_msgbuf));
745	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
746			  -1/* sunos does this */) == -1) {
747		prom_printf("Failed to map msgbuf\n");
748	} else {
749		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\n",
750			(void *)msgbufp));
751	}
752	msgbufmapped = 1;	/* enable message buffer */
753	initmsgbuf((void *)msgbufp, msgbufsiz);
754
755	/*
756	 * Find out how much RAM we have installed.
757	 */
758	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\n"));
759	pmap_read_memlist("/memory", "reg", &prom_memlist, &prom_memlist_size,
760			kdata_alloc);
761	phys_installed = prom_memlist;
762	phys_installed_size = prom_memlist_size / sizeof(*phys_installed);
763
764#ifdef DEBUG
765	if (pmapdebug & PDB_BOOT1) {
766		/* print out mem list */
767		prom_printf("Installed physical memory:\n");
768		for (i = 0; i < phys_installed_size; i++) {
769			prom_printf("memlist start %lx size %lx\n",
770					(u_long)phys_installed[i].start,
771					(u_long)phys_installed[i].size);
772		}
773	}
774#endif
775
776	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
777	for (i = 0; i < phys_installed_size; i++)
778		physmem += btoc(phys_installed[i].size);
779	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\n",
780			     (int)physmem, (int)physmem));
781
782	/*
783	 * Calculate approx TSB size.  This probably needs tweaking.
784	 */
785	if (physmem < btoc(64 * 1024 * 1024))
786		tsbsize = 0;
787	else if (physmem < btoc(512 * 1024 * 1024))
788		tsbsize = 1;
789	else
790		tsbsize = 2;
791
792	/*
793	 * Save the prom translations
794	 */
795	pmap_read_memlist("/virtual-memory", "translations", &prom_memlist,
796			&prom_memlist_size, kdata_alloc);
797	prom_map = prom_memlist;
798	prom_map_size = prom_memlist_size / sizeof(struct prom_map);
799
800#ifdef DEBUG
801	if (pmapdebug & PDB_BOOT) {
802		/* print out mem list */
803		prom_printf("Prom xlations:\n");
804		for (i = 0; i < prom_map_size; i++) {
805			prom_printf("start %016lx size %016lx tte %016lx\n",
806				    (u_long)prom_map[i].vstart,
807				    (u_long)prom_map[i].vsize,
808				    (u_long)prom_map[i].tte);
809		}
810		prom_printf("End of prom xlations\n");
811	}
812#endif
813
814	/*
815	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
816	 * any translations inside the kernel data VA range.
817	 */
818	for (i = 0; i < prom_map_size; i++) {
819		for (j = i; j < prom_map_size; j++) {
820			if (prom_map[j].vstart > prom_map[i].vstart) {
821				struct prom_map tmp;
822
823				tmp = prom_map[i];
824				prom_map[i] = prom_map[j];
825				prom_map[j] = tmp;
826			}
827		}
828	}
829#ifdef DEBUG
830	if (pmapdebug & PDB_BOOT) {
831		/* print out mem list */
832		prom_printf("Prom xlations:\n");
833		for (i = 0; i < prom_map_size; i++) {
834			prom_printf("start %016lx size %016lx tte %016lx\n",
835				    (u_long)prom_map[i].vstart,
836				    (u_long)prom_map[i].vsize,
837				    (u_long)prom_map[i].tte);
838		}
839		prom_printf("End of prom xlations\n");
840	}
841#endif
842
843	/*
844	 * Allocate a ncpu*64KB page for the cpu_info & stack structure now.
845	 */
846	cpu0paddr = prom_alloc_phys(8 * PAGE_SIZE * sparc_ncpus, 8 * PAGE_SIZE);
847	if (cpu0paddr == 0) {
848		prom_printf("Cannot allocate cpu_infos\n");
849		prom_halt();
850	}
851
852	/*
853	 * Now the kernel text segment is in its final location we can try to
854	 * find out how much memory really is free.
855	 */
856	pmap_read_memlist("/memory", "available", &prom_memlist,
857			&prom_memlist_size, kdata_alloc);
858	orig = prom_memlist;
859	sz  = prom_memlist_size;
860	pcnt = prom_memlist_size / sizeof(*orig);
861
862	BDPRINTF(PDB_BOOT1, ("Available physical memory:\n"));
863	avail = (struct mem_region*)kdata_alloc(sz, sizeof(uint64_t));
864	for (i = 0; i < pcnt; i++) {
865		avail[i] = orig[i];
866		BDPRINTF(PDB_BOOT1, ("memlist start %lx size %lx\n",
867					(u_long)orig[i].start,
868					(u_long)orig[i].size));
869	}
870	BDPRINTF(PDB_BOOT1, ("End of available physical memory\n"));
871
872	BDPRINTF(PDB_BOOT, ("ktext %08lx[%08lx] - %08lx[%08lx] : "
873				"kdata %08lx[%08lx] - %08lx[%08lx]\n",
874				(u_long)ktext, (u_long)ktextp,
875				(u_long)ektext, (u_long)ektextp,
876				(u_long)kdata, (u_long)kdatap,
877				(u_long)ekdata, (u_long)ekdatap));
878#ifdef DEBUG
879	if (pmapdebug & PDB_BOOT1) {
880		/* print out mem list */
881		prom_printf("Available %lx physical memory before cleanup:\n",
882			    (u_long)avail);
883		for (i = 0; i < pcnt; i++) {
884			prom_printf("memlist start %lx size %lx\n",
885				    (u_long)avail[i].start,
886				    (u_long)avail[i].size);
887		}
888		prom_printf("End of available physical memory before cleanup\n");
889		prom_printf("kernel physical text size %08lx - %08lx\n",
890			    (u_long)ktextp, (u_long)ektextp);
891		prom_printf("kernel physical data size %08lx - %08lx\n",
892			    (u_long)kdatap, (u_long)ekdatap);
893	}
894#endif
895	/*
896	 * Here's a another quick in-lined bubble sort.
897	 */
898	for (i = 0; i < pcnt; i++) {
899		for (j = i; j < pcnt; j++) {
900			if (avail[j].start < avail[i].start) {
901				struct mem_region tmp;
902				tmp = avail[i];
903				avail[i] = avail[j];
904				avail[j] = tmp;
905			}
906		}
907	}
908
909	/* Throw away page zero if we have it. */
910	if (avail->start == 0) {
911		avail->start += PAGE_SIZE;
912		avail->size -= PAGE_SIZE;
913	}
914
915	/*
916	 * Now we need to remove the area we valloc'ed from the available
917	 * memory lists.  (NB: we may have already alloc'ed the entire space).
918	 */
919	npgs = 0;
920	for (mp = avail, i = 0; i < pcnt; i++, mp = &avail[i]) {
921		/*
922		 * Now page align the start of the region.
923		 */
924		s = mp->start % PAGE_SIZE;
925		if (mp->size >= s) {
926			mp->size -= s;
927			mp->start += s;
928		}
929		/*
930		 * And now align the size of the region.
931		 */
932		mp->size -= mp->size % PAGE_SIZE;
933		/*
934		 * Check whether some memory is left here.
935		 */
936		if (mp->size == 0) {
937			memcpy(mp, mp + 1,
938			      (pcnt - (mp - avail)) * sizeof *mp);
939			pcnt--;
940			mp--;
941			continue;
942		}
943		s = mp->start;
944		sz = mp->size;
945		npgs += btoc(sz);
946		for (mp1 = avail; mp1 < mp; mp1++)
947			if (s < mp1->start)
948				break;
949		if (mp1 < mp) {
950			memcpy(mp1 + 1, mp1, (char *)mp - (char *)mp1);
951			mp1->start = s;
952			mp1->size = sz;
953		}
954#ifdef DEBUG
955/* Clear all memory we give to the VM system.  I want to make sure
956 * the PROM isn't using it for something, so this should break the PROM.
957 */
958
959/* Calling pmap_zero_page() at this point also hangs some machines
960 * so don't do it at all. -- pk 26/02/2002
961 */
962#if 0
963		{
964			paddr_t p;
965			for (p = mp->start; p < mp->start+mp->size;
966			     p += PAGE_SIZE)
967				pmap_zero_page(p);
968		}
969#endif
970#endif /* DEBUG */
971		/*
972		 * In future we should be able to specify both allocated
973		 * and free.
974		 */
975		BDPRINTF(PDB_BOOT1, ("uvm_page_physload(%lx, %lx)\n",
976					(long)mp->start,
977					(long)(mp->start + mp->size)));
978		uvm_page_physload(
979			atop(mp->start),
980			atop(mp->start+mp->size),
981			atop(mp->start),
982			atop(mp->start+mp->size),
983			VM_FREELIST_DEFAULT);
984	}
985
986#ifdef DEBUG
987	if (pmapdebug & PDB_BOOT) {
988		/* print out mem list */
989		prom_printf("Available physical memory after cleanup:\n");
990		for (i = 0; i < pcnt; i++) {
991			prom_printf("avail start %lx size %lx\n",
992				    (long)avail[i].start, (long)avail[i].size);
993		}
994		prom_printf("End of available physical memory after cleanup\n");
995	}
996#endif
997	/*
998	 * Allocate and clear out pmap_kernel()->pm_segs[]
999	 */
1000	pmap_kernel()->pm_refs = 1;
1001	memset(&pmap_kernel()->pm_ctx, 0, sizeof(pmap_kernel()->pm_ctx));
1002
1003	/* Throw away page zero */
1004	do {
1005		pmap_get_page(&newp);
1006	} while (!newp);
1007	pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp;
1008	pmap_kernel()->pm_physaddr = newp;
1009
1010	/*
1011	 * finish filling out kernel pmap.
1012	 */
1013
1014	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\n",
1015	    (long)pmap_kernel()->pm_physaddr));
1016	/*
1017	 * Tell pmap about our mesgbuf -- Hope this works already
1018	 */
1019#ifdef DEBUG
1020	BDPRINTF(PDB_BOOT1, ("Calling consinit()\n"));
1021	if (pmapdebug & PDB_BOOT1)
1022		consinit();
1023	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\n"));
1024#endif
1025	/* it's not safe to call pmap_enter so we need to do this ourselves */
1026	va = (vaddr_t)msgbufp;
1027	prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1028	while (msgbufsiz) {
1029		data = TSB_DATA(0 /* global */,
1030			PGSZ_8K,
1031			phys_msgbuf,
1032			1 /* priv */,
1033			1 /* Write */,
1034			1 /* Cacheable */,
1035			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1036			1 /* valid */,
1037			0 /* IE */);
1038		pmap_enter_kpage(va, data);
1039		va += PAGE_SIZE;
1040		msgbufsiz -= PAGE_SIZE;
1041		phys_msgbuf += PAGE_SIZE;
1042	}
1043	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\n"));
1044
1045	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\n"));
1046	for (i = 0; i < prom_map_size; i++)
1047		if (prom_map[i].vstart && ((prom_map[i].vstart >> 32) == 0))
1048			for (j = 0; j < prom_map[i].vsize; j += PAGE_SIZE) {
1049				int k;
1050
1051				for (k = 0; page_size_map[k].mask; k++) {
1052					if (((prom_map[i].vstart |
1053					      prom_map[i].tte) &
1054					      page_size_map[k].mask) == 0 &&
1055					      page_size_map[k].mask <
1056					      prom_map[i].vsize)
1057						break;
1058				}
1059#ifdef DEBUG
1060				page_size_map[k].use++;
1061#endif
1062				/* Enter PROM map into pmap_kernel() */
1063				pmap_enter_kpage(prom_map[i].vstart + j,
1064					(prom_map[i].tte + j) | TLB_EXEC |
1065					page_size_map[k].code);
1066			}
1067	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\n"));
1068
1069	/*
1070	 * Fix up start of kernel heap.
1071	 */
1072	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1073	/* Let's keep 1 page of redzone after the kernel */
1074	vmmap += PAGE_SIZE;
1075	{
1076		extern void main(void);
1077		vaddr_t u0va;
1078		paddr_t pa;
1079
1080		u0va = vmmap;
1081
1082		BDPRINTF(PDB_BOOT1,
1083			("Inserting lwp0 USPACE into pmap_kernel() at %p\n",
1084				vmmap));
1085
1086		while (vmmap < u0va + 2*USPACE) {
1087			int64_t data1;
1088
1089			if (!pmap_get_page(&pa))
1090				panic("pmap_bootstrap: no pages");
1091			prom_map_phys(pa, PAGE_SIZE, vmmap, -1);
1092			data1 = TSB_DATA(0 /* global */,
1093				PGSZ_8K,
1094				pa,
1095				1 /* priv */,
1096				1 /* Write */,
1097				1 /* Cacheable */,
1098				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1099				1 /* valid */,
1100				0 /* IE */);
1101			pmap_enter_kpage(vmmap, data1);
1102			vmmap += PAGE_SIZE;
1103		}
1104		BDPRINTF(PDB_BOOT1,
1105			 ("Done inserting stack 0 into pmap_kernel()\n"));
1106
1107		/* Now map in and initialize our cpu_info structure */
1108#ifdef DIAGNOSTIC
1109		vmmap += PAGE_SIZE; /* redzone -- XXXX do we need one? */
1110#endif
1111		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
1112			vmmap += PAGE_SIZE; /* Matchup virtual color for D$ */
1113		intstk = vmmap;
1114		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1115
1116		BDPRINTF(PDB_BOOT1,
1117			("Inserting cpu_info into pmap_kernel() at %p\n",
1118				 cpus));
1119		/* Now map in all 8 pages of interrupt stack/cpu_info */
1120		pa = cpu0paddr;
1121		prom_map_phys(pa, 64*KB, vmmap, -1);
1122
1123		/*
1124		 * Also map it in as the interrupt stack.
1125		 * This lets the PROM see this if needed.
1126		 *
1127		 * XXXX locore.s does not flush these mappings
1128		 * before installing the locked TTE.
1129		 */
1130		prom_map_phys(pa, 64*KB, INTSTACK, -1);
1131		for (i = 0; i < 8; i++) {
1132			int64_t data1;
1133
1134			data1 = TSB_DATA(0 /* global */,
1135				PGSZ_8K,
1136				pa,
1137				1 /* priv */,
1138				1 /* Write */,
1139				1 /* Cacheable */,
1140				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1141				1 /* valid */,
1142				0 /* IE */);
1143			pmap_enter_kpage(vmmap, data1);
1144			vmmap += PAGE_SIZE;
1145			pa += PAGE_SIZE;
1146		}
1147		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\n"));
1148
1149		/* Initialize our cpu_info structure */
1150		memset((void *)intstk, 0, 64 * KB);
1151		cpus->ci_self = cpus;
1152		cpus->ci_next = NULL;
1153		cpus->ci_curlwp = &lwp0;
1154		cpus->ci_flags = CPUF_PRIMARY;
1155		cpus->ci_cpuid = CPU_UPAID;
1156		cpus->ci_fplwp = NULL;
1157		cpus->ci_eintstack = NULL;
1158		cpus->ci_spinup = main; /* Call main when we're running. */
1159		cpus->ci_paddr = cpu0paddr;
1160		cpus->ci_cpcb = (struct pcb *)u0va;
1161		cpus->ci_idepth = -1;
1162		memset(cpus->ci_intrpending, -1, sizeof(cpus->ci_intrpending));
1163
1164		uvm_lwp_setuarea(&lwp0, u0va);
1165		lwp0.l_md.md_tf = (struct trapframe64*)(u0va + USPACE
1166		    - sizeof(struct trapframe64));
1167
1168		cpu0paddr += 64 * KB;
1169
1170		CPUSET_CLEAR(cpus_active);
1171		CPUSET_ADD(cpus_active, 0);
1172
1173		cpu_pmap_prepare(cpus, true);
1174		cpu_pmap_init(cpus);
1175
1176		/* The rest will be done at CPU attach time. */
1177		BDPRINTF(PDB_BOOT1,
1178			 ("Done inserting cpu_info into pmap_kernel()\n"));
1179	}
1180
1181	vmmap = (vaddr_t)reserve_dumppages((void *)(u_long)vmmap);
1182
1183#ifdef MODULAR
1184	/*
1185	 * Reserve 16 MB of VA for module loading. Right now our full
1186	 * GENERIC kernel is about 13 MB, so this looks good enough.
1187	 * If we make this bigger, we should adjust the KERNEND and
1188	 * associated defines in param.h.
1189	 */
1190	module_start = vmmap;
1191	vmmap += 16 * 1024*1024;
1192	module_end = vmmap;
1193#endif
1194
1195	/*
1196	 * Set up bounds of allocatable memory for vmstat et al.
1197	 */
1198	avail_start = avail->start;
1199	for (mp = avail; mp->size; mp++)
1200		avail_end = mp->start+mp->size;
1201
1202	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\n"));
1203
1204	BDPRINTF(PDB_BOOT, ("left kdata: %" PRId64 " @%" PRIx64 ".\n",
1205				kdata_mem_pool.size, kdata_mem_pool.start));
1206}
1207
1208/*
1209 * Allocate TSBs for both mmus from the locked kernel data segment page.
1210 * This is run before the cpu itself is activated (or by the first cpu
1211 * itself)
1212 */
1213void
1214cpu_pmap_prepare(struct cpu_info *ci, bool initial)
1215{
1216	/* allocate our TSBs */
1217	ci->ci_tsb_dmmu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1218	ci->ci_tsb_immu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1219	memset(ci->ci_tsb_dmmu, 0, TSBSIZE);
1220	memset(ci->ci_tsb_immu, 0, TSBSIZE);
1221	if (!initial) {
1222		KASSERT(ci != curcpu());
1223		/*
1224		 * Initially share ctxbusy with the boot cpu, the
1225		 * cpu will replace it as soon as it runs (and can
1226		 * probe the number of available contexts itself).
1227		 * Untill then only context 0 (aka kernel) will be
1228		 * referenced anyway.
1229		 */
1230		ci->ci_numctx = curcpu()->ci_numctx;
1231		ci->ci_ctxbusy = curcpu()->ci_ctxbusy;
1232	}
1233
1234	BDPRINTF(PDB_BOOT1, ("cpu %d: TSB allocated at %p/%p size %08x\n",
1235	    ci->ci_index, ci->ci_tsb_dmmu, ci->ci_tsb_immu, TSBSIZE));
1236}
1237
1238/*
1239 * Initialize the per CPU parts for the cpu running this code.
1240 */
1241void
1242cpu_pmap_init(struct cpu_info *ci)
1243{
1244	size_t ctxsize;
1245
1246	/*
1247	 * We delay initialising ci_ctx_lock here as LOCKDEBUG isn't
1248	 * running for cpu0 yet..
1249	 */
1250	ci->ci_pmap_next_ctx = 1;
1251#ifdef SUN4V
1252#error find out if we have 16 or 13 bit context ids
1253#else
1254	ci->ci_numctx = 0x2000; /* all SUN4U use 13 bit contexts */
1255#endif
1256	ctxsize = sizeof(paddr_t)*ci->ci_numctx;
1257	ci->ci_ctxbusy = (paddr_t *)kdata_alloc(ctxsize, sizeof(uint64_t));
1258	memset(ci->ci_ctxbusy, 0, ctxsize);
1259	LIST_INIT(&ci->ci_pmap_ctxlist);
1260
1261	/* mark kernel context as busy */
1262	ci->ci_ctxbusy[0] = pmap_kernel()->pm_physaddr;
1263}
1264
1265/*
1266 * Initialize anything else for pmap handling.
1267 * Called during vm_init().
1268 */
1269void
1270pmap_init(void)
1271{
1272	struct vm_page *pg;
1273	struct pglist pglist;
1274	uint64_t data;
1275	paddr_t pa;
1276	psize_t size;
1277	vaddr_t va;
1278
1279	BDPRINTF(PDB_BOOT1, ("pmap_init()\n"));
1280
1281	size = sizeof(struct pv_entry) * physmem;
1282	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
1283		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
1284		panic("pmap_init: no memory");
1285
1286	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
1287	if (va == 0)
1288		panic("pmap_init: no memory");
1289
1290	/* Map the pages */
1291	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
1292		pa = VM_PAGE_TO_PHYS(pg);
1293		pmap_zero_page(pa);
1294		data = TSB_DATA(0 /* global */,
1295			PGSZ_8K,
1296			pa,
1297			1 /* priv */,
1298			1 /* Write */,
1299			1 /* Cacheable */,
1300			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1301			1 /* valid */,
1302			0 /* IE */);
1303		pmap_enter_kpage(va, data);
1304		va += PAGE_SIZE;
1305	}
1306
1307	/*
1308	 * initialize the pmap pools.
1309	 */
1310	pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap),
1311	    SPARC64_BLOCK_SIZE, 0, 0, "pmappl", NULL, IPL_NONE, NULL, NULL,
1312	    NULL);
1313	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1314	    PR_LARGECACHE, "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL);
1315
1316	vm_first_phys = avail_start;
1317	vm_num_phys = avail_end - avail_start;
1318
1319	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
1320#if defined(USE_LOCKSAFE_PSEG_GETSET)
1321	mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
1322#endif
1323	lock_available = true;
1324}
1325
1326/*
1327 * How much virtual space is available to the kernel?
1328 */
1329static vaddr_t kbreak; /* End of kernel VA */
1330void
1331pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1332{
1333
1334	/*
1335	 * Reserve one segment for kernel virtual memory
1336	 */
1337	/* Reserve two pages for pmap_copy_page && /dev/mem */
1338	*start = kbreak = (vaddr_t)(vmmap + 2*PAGE_SIZE);
1339	*end = VM_MAX_KERNEL_ADDRESS;
1340	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\n", *start, *end));
1341}
1342
1343/*
1344 * Preallocate kernel page tables to a specified VA.
1345 * This simply loops through the first TTE for each
1346 * page table from the beginning of the kernel pmap,
1347 * reads the entry, and if the result is
1348 * zero (either invalid entry or no page table) it stores
1349 * a zero there, populating page tables in the process.
1350 * This is not the most efficient technique but i don't
1351 * expect it to be called that often.
1352 */
1353vaddr_t
1354pmap_growkernel(vaddr_t maxkvaddr)
1355{
1356	struct pmap *pm = pmap_kernel();
1357	paddr_t pa;
1358
1359	if (maxkvaddr >= KERNEND) {
1360		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1361		       (void *)KERNEND, (void *)maxkvaddr);
1362		return (kbreak);
1363	}
1364	DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
1365	/* Align with the start of a page table */
1366	for (kbreak &= (-1 << PDSHIFT); kbreak < maxkvaddr;
1367	     kbreak += (1 << PDSHIFT)) {
1368		if (pseg_get(pm, kbreak) & TLB_V)
1369			continue;
1370
1371		pa = 0;
1372		while (pseg_set(pm, kbreak, 0, pa) & 1) {
1373			DPRINTF(PDB_GROW,
1374			    ("pmap_growkernel: extending %lx\n", kbreak));
1375			pa = 0;
1376			if (!pmap_get_page(&pa))
1377				panic("pmap_growkernel: no pages");
1378			ENTER_STAT(ptpneeded);
1379		}
1380	}
1381	return (kbreak);
1382}
1383
1384/*
1385 * Create and return a physical map.
1386 */
1387struct pmap *
1388pmap_create(void)
1389{
1390	struct pmap *pm;
1391
1392	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
1393
1394	pm = pool_cache_get(&pmap_cache, PR_WAITOK);
1395	memset(pm, 0, sizeof *pm);
1396	DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm));
1397
1398	mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE);
1399	uvm_obj_init(&pm->pm_obj, NULL, false, 1);
1400	uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock);
1401
1402	if (pm != pmap_kernel()) {
1403		while (!pmap_get_page(&pm->pm_physaddr)) {
1404			uvm_wait("pmap_create");
1405		}
1406		pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr;
1407	}
1408	DPRINTF(PDB_CREATE, ("pmap_create(%p): ctx %d\n", pm, pmap_ctx(pm)));
1409	return pm;
1410}
1411
1412/*
1413 * Add a reference to the given pmap.
1414 */
1415void
1416pmap_reference(struct pmap *pm)
1417{
1418
1419	atomic_inc_uint(&pm->pm_refs);
1420}
1421
1422/*
1423 * Retire the given pmap from service.
1424 * Should only be called if the map contains no valid mappings.
1425 */
1426void
1427pmap_destroy(struct pmap *pm)
1428{
1429#ifdef MULTIPROCESSOR
1430	struct cpu_info *ci;
1431	sparc64_cpuset_t pmap_cpus_active;
1432#else
1433#define pmap_cpus_active 0
1434#endif
1435	struct vm_page *pg, *nextpg;
1436
1437	if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) {
1438		return;
1439	}
1440	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
1441#ifdef MULTIPROCESSOR
1442	CPUSET_CLEAR(pmap_cpus_active);
1443	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
1444		/* XXXMRG: Move the lock inside one or both tests? */
1445		mutex_enter(&ci->ci_ctx_lock);
1446		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
1447			if (pm->pm_ctx[ci->ci_index] > 0) {
1448				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
1449				ctx_free(pm, ci);
1450			}
1451		}
1452		mutex_exit(&ci->ci_ctx_lock);
1453	}
1454#else
1455	if (pmap_ctx(pm)) {
1456		mutex_enter(&curcpu()->ci_ctx_lock);
1457		ctx_free(pm, curcpu());
1458		mutex_exit(&curcpu()->ci_ctx_lock);
1459	}
1460#endif
1461
1462	/* we could be a little smarter and leave pages zeroed */
1463	for (pg = TAILQ_FIRST(&pm->pm_obj.memq); pg != NULL; pg = nextpg) {
1464#ifdef DIAGNOSTIC
1465		struct vm_page_md *md = VM_PAGE_TO_MD(pg);
1466#endif
1467
1468		KASSERT((pg->flags & PG_MARKER) == 0);
1469		nextpg = TAILQ_NEXT(pg, listq.queue);
1470		TAILQ_REMOVE(&pm->pm_obj.memq, pg, listq.queue);
1471		KASSERT(md->mdpg_pvh.pv_pmap == NULL);
1472		dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
1473		uvm_pagefree(pg);
1474	}
1475	pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
1476
1477	uvm_obj_destroy(&pm->pm_obj, false);
1478	mutex_destroy(&pm->pm_obj_lock);
1479	pool_cache_put(&pmap_cache, pm);
1480}
1481
1482/*
1483 * Copy the range specified by src_addr/len
1484 * from the source map to the range dst_addr/len
1485 * in the destination map.
1486 *
1487 * This routine is only advisory and need not do anything.
1488 */
1489void
1490pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr)
1491{
1492
1493	DPRINTF(PDB_CREATE, ("pmap_copy(%p, %p, %p, %lx, %p)\n",
1494			     dst_pmap, src_pmap, (void *)(u_long)dst_addr,
1495			     (u_long)len, (void *)(u_long)src_addr));
1496}
1497
1498/*
1499 * Activate the address space for the specified process.  If the
1500 * process is the current process, load the new MMU context.
1501 */
1502void
1503pmap_activate(struct lwp *l)
1504{
1505	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
1506
1507	if (pmap == pmap_kernel()) {
1508		return;
1509	}
1510
1511	/*
1512	 * This is essentially the same thing that happens in cpu_switchto()
1513	 * when the newly selected process is about to run, except that we
1514	 * have to make sure to clean the register windows before we set
1515	 * the new context.
1516	 */
1517
1518	if (l != curlwp) {
1519		return;
1520	}
1521	write_user_windows();
1522	pmap_activate_pmap(pmap);
1523}
1524
1525void
1526pmap_activate_pmap(struct pmap *pmap)
1527{
1528
1529	if (pmap_ctx(pmap) == 0) {
1530		(void) ctx_alloc(pmap);
1531	}
1532	dmmu_set_secondary_context(pmap_ctx(pmap));
1533}
1534
1535/*
1536 * Deactivate the address space of the specified process.
1537 */
1538void
1539pmap_deactivate(struct lwp *l)
1540{
1541}
1542
1543/*
1544 * pmap_kenter_pa:		[ INTERFACE ]
1545 *
1546 *	Enter a va -> pa mapping into the kernel pmap without any
1547 *	physical->virtual tracking.
1548 *
1549 *	Note: no locking is necessary in this function.
1550 */
1551void
1552pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1553{
1554	pte_t tte;
1555	paddr_t ptp;
1556	struct pmap *pm = pmap_kernel();
1557	int i;
1558
1559	KASSERT(va < INTSTACK || va > EINTSTACK);
1560	KASSERT(va < kdata || va > ekdata);
1561
1562	/*
1563	 * Construct the TTE.
1564	 */
1565
1566	ENTER_STAT(unmanaged);
1567	if (pa & (PMAP_NVC|PMAP_NC)) {
1568		ENTER_STAT(ci);
1569	}
1570
1571	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1572			    (VM_PROT_WRITE & prot),
1573			    !(pa & PMAP_NC), pa & (PMAP_NVC), 1, 0);
1574	/* We don't track mod/ref here. */
1575	if (prot & VM_PROT_WRITE)
1576		tte.data |= TLB_REAL_W|TLB_W;
1577	if (prot & VM_PROT_EXECUTE)
1578		tte.data |= TLB_EXEC;
1579	tte.data |= TLB_TSB_LOCK;	/* wired */
1580	ptp = 0;
1581
1582 retry:
1583	i = pseg_set(pm, va, tte.data, ptp);
1584	if (i & 1) {
1585		KASSERT((i & 4) == 0);
1586		ptp = 0;
1587		if (!pmap_get_page(&ptp))
1588			panic("pmap_kenter_pa: no pages");
1589		ENTER_STAT(ptpneeded);
1590		goto retry;
1591	}
1592	if (ptp && i == 0) {
1593		/* We allocated a spare page but didn't use it.  Free it. */
1594		printf("pmap_kenter_pa: freeing unused page %llx\n",
1595		       (long long)ptp);
1596		pmap_free_page_noflush(ptp);
1597	}
1598#ifdef DEBUG
1599	i = ptelookup_va(va);
1600	if (pmapdebug & PDB_ENTER)
1601		prom_printf("pmap_kenter_pa: va=%08x data=%08x:%08x "
1602			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1603			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1604	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1605		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x "
1606			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1607			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1608			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data,
1609			i, &curcpu()->ci_tsb_dmmu[i]);
1610		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1611			va, (int)(tte.data>>32), (int)tte.data,	i,
1612			&curcpu()->ci_tsb_dmmu[i]);
1613	}
1614#endif
1615}
1616
1617/*
1618 * pmap_kremove:		[ INTERFACE ]
1619 *
1620 *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1621 *	for size bytes (assumed to be page rounded).
1622 */
1623void
1624pmap_kremove(vaddr_t va, vsize_t size)
1625{
1626	struct pmap *pm = pmap_kernel();
1627	int64_t data;
1628	paddr_t pa;
1629	int rv;
1630	bool flush = FALSE;
1631
1632	KASSERT(va < INTSTACK || va > EINTSTACK);
1633	KASSERT(va < kdata || va > ekdata);
1634
1635	DPRINTF(PDB_DEMAP, ("pmap_kremove: start 0x%lx size %lx\n", va, size));
1636	for (; size >= PAGE_SIZE; va += PAGE_SIZE, size -= PAGE_SIZE) {
1637
1638#ifdef DIAGNOSTIC
1639		/*
1640		 * Is this part of the permanent 4MB mapping?
1641		 */
1642		if (va >= ktext && va < roundup(ekdata, 4*MEG))
1643			panic("pmap_kremove: va=%08x in locked TLB", (u_int)va);
1644#endif
1645
1646		data = pseg_get(pm, va);
1647		if ((data & TLB_V) == 0) {
1648			continue;
1649		}
1650
1651		flush = TRUE;
1652		pa = data & TLB_PA_MASK;
1653
1654		/*
1655		 * We need to flip the valid bit and
1656		 * clear the access statistics.
1657		 */
1658
1659		rv = pseg_set(pm, va, 0, 0);
1660		if (rv & 1)
1661			panic("pmap_kremove: pseg_set needs spare, rv=%d\n",
1662			    rv);
1663		DPRINTF(PDB_DEMAP, ("pmap_kremove: seg %x pdir %x pte %x\n",
1664		    (int)va_to_seg(va), (int)va_to_dir(va),
1665		    (int)va_to_pte(va)));
1666		REMOVE_STAT(removes);
1667
1668		tsb_invalidate(va, pm);
1669		REMOVE_STAT(tflushes);
1670
1671		/*
1672		 * Here we assume nothing can get into the TLB
1673		 * unless it has a PTE.
1674		 */
1675
1676		tlb_flush_pte(va, pm);
1677		dcache_flush_page_all(pa);
1678	}
1679	if (flush)
1680		REMOVE_STAT(flushes);
1681}
1682
1683/*
1684 * Insert physical page at pa into the given pmap at virtual address va.
1685 * Supports 64-bit pa so we can map I/O space.
1686 */
1687
1688int
1689pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1690{
1691	pte_t tte;
1692	int64_t data;
1693	paddr_t opa = 0, ptp; /* XXX: gcc */
1694	pv_entry_t pvh, npv = NULL, freepv;
1695	struct vm_page *pg, *opg, *ptpg;
1696	int s, i, uncached = 0, error = 0;
1697	int size = PGSZ_8K; /* PMAP_SZ_TO_TTE(pa); */
1698	bool wired = (flags & PMAP_WIRED) != 0;
1699	bool wasmapped = FALSE;
1700	bool dopv = TRUE;
1701
1702	/*
1703	 * Is this part of the permanent mappings?
1704	 */
1705	KASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1706	KASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1707
1708	/* Grab a spare PV. */
1709	freepv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
1710	if (__predict_false(freepv == NULL)) {
1711		if (flags & PMAP_CANFAIL)
1712			return (ENOMEM);
1713		panic("pmap_enter: no pv entries available");
1714	}
1715	freepv->pv_next = NULL;
1716
1717	/*
1718	 * If a mapping at this address already exists, check if we're
1719	 * entering the same PA again.  if it's different remove it.
1720	 */
1721
1722	mutex_enter(&pmap_lock);
1723	data = pseg_get(pm, va);
1724	if (data & TLB_V) {
1725		wasmapped = TRUE;
1726		opa = data & TLB_PA_MASK;
1727		if (opa != pa) {
1728			opg = PHYS_TO_VM_PAGE(opa);
1729			if (opg != NULL) {
1730				npv = pmap_remove_pv(pm, va, opg);
1731			}
1732		}
1733	}
1734
1735	/*
1736	 * Construct the TTE.
1737	 */
1738	pg = PHYS_TO_VM_PAGE(pa);
1739	if (pg) {
1740		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1741
1742		pvh = &md->mdpg_pvh;
1743		uncached = (pvh->pv_va & (PV_ALIAS|PV_NVC));
1744#ifdef DIAGNOSTIC
1745		if ((flags & VM_PROT_ALL) & ~prot)
1746			panic("pmap_enter: access_type exceeds prot");
1747#endif
1748		/*
1749		 * If we don't have the traphandler do it,
1750		 * set the ref/mod bits now.
1751		 */
1752		if (flags & VM_PROT_ALL)
1753			pvh->pv_va |= PV_REF;
1754		if (flags & VM_PROT_WRITE)
1755			pvh->pv_va |= PV_MOD;
1756
1757		/*
1758		 * make sure we have a pv entry ready if we need one.
1759		 */
1760		if (pvh->pv_pmap == NULL || (wasmapped && opa == pa)) {
1761			if (npv != NULL) {
1762				/* free it */
1763				npv->pv_next = freepv;
1764				freepv = npv;
1765				npv = NULL;
1766			}
1767			if (wasmapped && opa == pa) {
1768				dopv = FALSE;
1769			}
1770		} else if (npv == NULL) {
1771			/* use the pre-allocated pv */
1772			npv = freepv;
1773			freepv = freepv->pv_next;
1774		}
1775		ENTER_STAT(managed);
1776	} else {
1777		ENTER_STAT(unmanaged);
1778		dopv = FALSE;
1779		if (npv != NULL) {
1780			/* free it */
1781			npv->pv_next = freepv;
1782			freepv = npv;
1783			npv = NULL;
1784		}
1785	}
1786
1787#ifndef NO_VCACHE
1788	if (pa & PMAP_NVC)
1789#endif
1790		uncached = 1;
1791	if (uncached) {
1792		ENTER_STAT(ci);
1793	}
1794	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
1795		flags & VM_PROT_WRITE, !(pa & PMAP_NC),
1796		uncached, 1, pa & PMAP_LITTLE);
1797#ifdef HWREF
1798	if (prot & VM_PROT_WRITE)
1799		tte.data |= TLB_REAL_W;
1800	if (prot & VM_PROT_EXECUTE)
1801		tte.data |= TLB_EXEC;
1802#else
1803	/* If it needs ref accounting do nothing. */
1804	if (!(flags & VM_PROT_READ)) {
1805		mutex_exit(&pmap_lock);
1806		goto out;
1807	}
1808#endif
1809	if (flags & VM_PROT_EXECUTE) {
1810		if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0)
1811			tte.data |= TLB_EXEC_ONLY|TLB_EXEC;
1812		else
1813			tte.data |= TLB_EXEC;
1814	}
1815	if (wired)
1816		tte.data |= TLB_TSB_LOCK;
1817	ptp = 0;
1818
1819 retry:
1820	i = pseg_set(pm, va, tte.data, ptp);
1821	if (i & 4) {
1822		/* ptp used as L3 */
1823		KASSERT(ptp != 0);
1824		KASSERT((i & 3) == 0);
1825		ptpg = PHYS_TO_VM_PAGE(ptp);
1826		if (ptpg) {
1827			ptpg->offset = (uint64_t)va & (0xfffffLL << 23);
1828			TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue);
1829		} else {
1830			KASSERT(pm == pmap_kernel());
1831		}
1832	}
1833	if (i & 2) {
1834		/* ptp used as L2 */
1835		KASSERT(ptp != 0);
1836		KASSERT((i & 4) == 0);
1837		ptpg = PHYS_TO_VM_PAGE(ptp);
1838		if (ptpg) {
1839			ptpg->offset = (((uint64_t)va >> 43) & 0x3ffLL) << 13;
1840			TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue);
1841		} else {
1842			KASSERT(pm == pmap_kernel());
1843		}
1844	}
1845	if (i & 1) {
1846		KASSERT((i & 4) == 0);
1847		ptp = 0;
1848		if (!pmap_get_page(&ptp)) {
1849			mutex_exit(&pmap_lock);
1850			if (flags & PMAP_CANFAIL) {
1851				if (npv != NULL) {
1852					/* free it */
1853					npv->pv_next = freepv;
1854					freepv = npv;
1855				}
1856				error = ENOMEM;
1857				goto out;
1858			} else {
1859				panic("pmap_enter: no pages");
1860			}
1861		}
1862		ENTER_STAT(ptpneeded);
1863		goto retry;
1864	}
1865	if (ptp && i == 0) {
1866		/* We allocated a spare page but didn't use it.  Free it. */
1867		printf("pmap_enter: freeing unused page %llx\n",
1868		       (long long)ptp);
1869		pmap_free_page_noflush(ptp);
1870	}
1871	if (dopv) {
1872		pmap_enter_pv(pm, va, pa, pg, npv);
1873	}
1874
1875	mutex_exit(&pmap_lock);
1876#ifdef DEBUG
1877	i = ptelookup_va(va);
1878	if (pmapdebug & PDB_ENTER)
1879		prom_printf("pmap_enter: va=%08x data=%08x:%08x "
1880			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1881			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1882	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1883		prom_printf("pmap_enter: evicting entry tag=%x:%08x "
1884			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1885			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1886			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, i,
1887			&curcpu()->ci_tsb_dmmu[i]);
1888		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1889			va, (int)(tte.data>>32), (int)tte.data, i,
1890			&curcpu()->ci_tsb_dmmu[i]);
1891	}
1892#endif
1893
1894	if (flags & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) {
1895
1896		/*
1897		 * preload the TSB with the new entry,
1898		 * since we're going to need it immediately anyway.
1899		 */
1900
1901		KASSERT(pmap_ctx(pm)>=0);
1902		i = ptelookup_va(va);
1903		tte.tag = TSB_TAG(0, pmap_ctx(pm), va);
1904		s = splhigh();
1905		if (wasmapped && pmap_is_on_mmu(pm)) {
1906			tsb_invalidate(va, pm);
1907		}
1908		if (flags & (VM_PROT_READ | VM_PROT_WRITE)) {
1909			curcpu()->ci_tsb_dmmu[i].tag = tte.tag;
1910			__asm volatile("" : : : "memory");
1911			curcpu()->ci_tsb_dmmu[i].data = tte.data;
1912		}
1913		if (flags & VM_PROT_EXECUTE) {
1914			curcpu()->ci_tsb_immu[i].tag = tte.tag;
1915			__asm volatile("" : : : "memory");
1916			curcpu()->ci_tsb_immu[i].data = tte.data;
1917		}
1918
1919		/*
1920		 * it's only necessary to flush the TLB if this page was
1921		 * previously mapped, but for some reason it's a lot faster
1922		 * for the fork+exit microbenchmark if we always do it.
1923		 */
1924
1925		KASSERT(pmap_ctx(pm)>=0);
1926#ifdef MULTIPROCESSOR
1927		if (wasmapped && pmap_is_on_mmu(pm))
1928			tlb_flush_pte(va, pm);
1929		else
1930			sp_tlb_flush_pte(va, pmap_ctx(pm));
1931#else
1932		tlb_flush_pte(va, pm);
1933#endif
1934		splx(s);
1935	} else if (wasmapped && pmap_is_on_mmu(pm)) {
1936		/* Force reload -- protections may be changed */
1937		KASSERT(pmap_ctx(pm)>=0);
1938		tsb_invalidate(va, pm);
1939		tlb_flush_pte(va, pm);
1940	}
1941
1942	/* We will let the fast mmu miss interrupt load the new translation */
1943	pv_check();
1944 out:
1945	/* Catch up on deferred frees. */
1946	for (; freepv != NULL; freepv = npv) {
1947		npv = freepv->pv_next;
1948		pool_cache_put(&pmap_pv_cache, freepv);
1949	}
1950	return error;
1951}
1952
1953void
1954pmap_remove_all(struct pmap *pm)
1955{
1956#ifdef MULTIPROCESSOR
1957	struct cpu_info *ci;
1958	sparc64_cpuset_t pmap_cpus_active;
1959#endif
1960
1961	if (pm == pmap_kernel()) {
1962		return;
1963	}
1964	write_user_windows();
1965	pm->pm_refs = 0;
1966
1967	/*
1968	 * XXXMRG: pmap_destroy() does exactly the same dance here.
1969	 * surely one of them isn't necessary?
1970	 */
1971#ifdef MULTIPROCESSOR
1972	CPUSET_CLEAR(pmap_cpus_active);
1973	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
1974		/* XXXMRG: Move the lock inside one or both tests? */
1975		mutex_enter(&ci->ci_ctx_lock);
1976		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
1977			if (pm->pm_ctx[ci->ci_index] > 0) {
1978				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
1979				ctx_free(pm, ci);
1980			}
1981		}
1982		mutex_exit(&ci->ci_ctx_lock);
1983	}
1984#else
1985	if (pmap_ctx(pm)) {
1986		mutex_enter(&curcpu()->ci_ctx_lock);
1987		ctx_free(pm, curcpu());
1988		mutex_exit(&curcpu()->ci_ctx_lock);
1989	}
1990#endif
1991
1992	REMOVE_STAT(flushes);
1993	/*
1994	 * XXXMRG: couldn't we do something less severe here, and
1995	 * only flush the right context on each CPU?
1996	 */
1997	blast_dcache();
1998}
1999
2000/*
2001 * Remove the given range of mapping entries.
2002 */
2003void
2004pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
2005{
2006	int64_t data;
2007	paddr_t pa;
2008	struct vm_page *pg;
2009	pv_entry_t pv, freepv = NULL;
2010	int rv;
2011	bool flush = FALSE;
2012
2013	/*
2014	 * In here we should check each pseg and if there are no more entries,
2015	 * free it.  It's just that linear scans of 8K pages gets expensive.
2016	 */
2017
2018	KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
2019	KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
2020
2021	mutex_enter(&pmap_lock);
2022	DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
2023			     (void *)(u_long)va, (void *)(u_long)endva));
2024	REMOVE_STAT(calls);
2025
2026	/* Now do the real work */
2027	for (; va < endva; va += PAGE_SIZE) {
2028#ifdef DIAGNOSTIC
2029		/*
2030		 * Is this part of the permanent 4MB mapping?
2031		 */
2032		if (pm == pmap_kernel() && va >= ktext &&
2033			va < roundup(ekdata, 4*MEG))
2034			panic("pmap_remove: va=%08llx in locked TLB",
2035			      (long long)va);
2036#endif
2037
2038		data = pseg_get(pm, va);
2039		if ((data & TLB_V) == 0) {
2040			continue;
2041		}
2042
2043		flush = TRUE;
2044		/* First remove the pv entry, if there is one */
2045		pa = data & TLB_PA_MASK;
2046		pg = PHYS_TO_VM_PAGE(pa);
2047		if (pg) {
2048			pv = pmap_remove_pv(pm, va, pg);
2049			if (pv != NULL) {
2050				/* free it */
2051				pv->pv_next = freepv;
2052				freepv = pv;
2053			}
2054		}
2055
2056		/*
2057		 * We need to flip the valid bit and
2058		 * clear the access statistics.
2059		 */
2060
2061		rv = pseg_set(pm, va, 0, 0);
2062		if (rv & 1)
2063			panic("pmap_remove: pseg_set needed spare, rv=%d!\n",
2064			    rv);
2065
2066		DPRINTF(PDB_REMOVE, (" clearing seg %x pte %x\n",
2067				     (int)va_to_seg(va), (int)va_to_pte(va)));
2068		REMOVE_STAT(removes);
2069
2070		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2071			continue;
2072
2073		/*
2074		 * if the pmap is being torn down, don't bother flushing,
2075		 * we already have done so.
2076		 */
2077
2078		if (!pm->pm_refs)
2079			continue;
2080
2081		/*
2082		 * Here we assume nothing can get into the TLB
2083		 * unless it has a PTE.
2084		 */
2085
2086		KASSERT(pmap_ctx(pm)>=0);
2087		tsb_invalidate(va, pm);
2088		REMOVE_STAT(tflushes);
2089		tlb_flush_pte(va, pm);
2090		dcache_flush_page_all(pa);
2091	}
2092	if (flush && pm->pm_refs)
2093		REMOVE_STAT(flushes);
2094	DPRINTF(PDB_REMOVE, ("\n"));
2095	pv_check();
2096	mutex_exit(&pmap_lock);
2097
2098	/* Catch up on deferred frees. */
2099	for (; freepv != NULL; freepv = pv) {
2100		pv = freepv->pv_next;
2101		pool_cache_put(&pmap_pv_cache, freepv);
2102	}
2103}
2104
2105/*
2106 * Change the protection on the specified range of this pmap.
2107 */
2108void
2109pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2110{
2111	paddr_t pa;
2112	int64_t data;
2113	struct vm_page *pg;
2114	pv_entry_t pv;
2115	int rv;
2116
2117	KASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
2118	KASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
2119
2120	if (prot == VM_PROT_NONE) {
2121		pmap_remove(pm, sva, eva);
2122		return;
2123	}
2124
2125	sva = trunc_page(sva);
2126	for (; sva < eva; sva += PAGE_SIZE) {
2127#ifdef DEBUG
2128		/*
2129		 * Is this part of the permanent 4MB mapping?
2130		 */
2131		if (pm == pmap_kernel() && sva >= ktext &&
2132		    sva < roundup(ekdata, 4 * MEG)) {
2133			prom_printf("pmap_protect: va=%08x in locked TLB\n",
2134			    sva);
2135			prom_abort();
2136			return;
2137		}
2138#endif
2139		DPRINTF(PDB_CHANGEPROT, ("pmap_protect: va %p\n",
2140		    (void *)(u_long)sva));
2141		data = pseg_get(pm, sva);
2142		if ((data & TLB_V) == 0) {
2143			continue;
2144		}
2145
2146		pa = data & TLB_PA_MASK;
2147		DPRINTF(PDB_CHANGEPROT|PDB_REF,
2148			("pmap_protect: va=%08x data=%08llx "
2149			 "seg=%08x pte=%08x\n",
2150			 (u_int)sva, (long long)pa, (int)va_to_seg(sva),
2151			 (int)va_to_pte(sva)));
2152
2153		pg = PHYS_TO_VM_PAGE(pa);
2154		if (pg) {
2155			struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2156
2157			/* Save REF/MOD info */
2158			pv = &md->mdpg_pvh;
2159			if (data & TLB_ACCESS)
2160				pv->pv_va |= PV_REF;
2161			if (data & TLB_MODIFY)
2162				pv->pv_va |= PV_MOD;
2163		}
2164
2165		/* Just do the pmap and TSB, not the pv_list */
2166		if ((prot & VM_PROT_WRITE) == 0)
2167			data &= ~(TLB_W|TLB_REAL_W);
2168		if ((prot & VM_PROT_EXECUTE) == 0)
2169			data &= ~(TLB_EXEC);
2170
2171		rv = pseg_set(pm, sva, data, 0);
2172		if (rv & 1)
2173			panic("pmap_protect: pseg_set needs spare! rv=%d\n",
2174			    rv);
2175
2176		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2177			continue;
2178
2179		KASSERT(pmap_ctx(pm)>=0);
2180		tsb_invalidate(sva, pm);
2181		tlb_flush_pte(sva, pm);
2182	}
2183	pv_check();
2184}
2185
2186/*
2187 * Extract the physical page address associated
2188 * with the given map/virtual_address pair.
2189 */
2190bool
2191pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2192{
2193	paddr_t pa;
2194	int64_t data = 0;
2195
2196	if (pm == pmap_kernel() && va >= kdata && va < roundup(ekdata, 4*MEG)) {
2197		/* Need to deal w/locked TLB entry specially. */
2198		pa = pmap_kextract(va);
2199		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2200				      (u_long)va, (unsigned long long)pa));
2201		if (pap != NULL)
2202			*pap = pa;
2203		return TRUE;
2204	} else if (pm == pmap_kernel() && va >= ktext && va < ektext) {
2205		/* Need to deal w/locked TLB entry specially. */
2206		pa = pmap_kextract(va);
2207		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2208		    (u_long)va, (unsigned long long)pa));
2209		if (pap != NULL)
2210			*pap = pa;
2211		return TRUE;
2212	} else if (pm == pmap_kernel() && va >= INTSTACK && va < (INTSTACK + 64*KB)) {
2213		pa = (paddr_t)(curcpu()->ci_paddr - INTSTACK + va);
2214		DPRINTF(PDB_EXTRACT, ("pmap_extract (intstack): va=%lx pa=%llx\n",
2215		    (u_long)va, (unsigned long long)pa));
2216		if (pap != NULL)
2217			*pap = pa;
2218		return TRUE;
2219	} else {
2220		data = pseg_get(pm, va);
2221		pa = data & TLB_PA_MASK;
2222#ifdef DEBUG
2223		if (pmapdebug & PDB_EXTRACT) {
2224			paddr_t npa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)],
2225					   ASI_PHYS_CACHED);
2226			printf("pmap_extract: va=%p segs[%ld]=%llx",
2227			       (void *)(u_long)va, (long)va_to_seg(va),
2228			       (unsigned long long)npa);
2229			if (npa) {
2230				npa = (paddr_t)
2231					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2232					     [va_to_dir(va)],
2233					     ASI_PHYS_CACHED);
2234				printf(" segs[%ld][%ld]=%lx",
2235				       (long)va_to_seg(va),
2236				       (long)va_to_dir(va), (long)npa);
2237			}
2238			if (npa)	{
2239				npa = (paddr_t)
2240					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2241					     [va_to_pte(va)],
2242					     ASI_PHYS_CACHED);
2243				printf(" segs[%ld][%ld][%ld]=%lx",
2244				       (long)va_to_seg(va),
2245				       (long)va_to_dir(va),
2246				       (long)va_to_pte(va), (long)npa);
2247			}
2248			printf(" pseg_get: %lx\n", (long)pa);
2249		}
2250#endif
2251	}
2252	if ((data & TLB_V) == 0)
2253		return (FALSE);
2254	if (pap != NULL)
2255		*pap = pa + (va & PGOFSET);
2256	return (TRUE);
2257}
2258
2259/*
2260 * Change protection on a kernel address.
2261 * This should only be called from MD code.
2262 */
2263void
2264pmap_kprotect(vaddr_t va, vm_prot_t prot)
2265{
2266	struct pmap *pm = pmap_kernel();
2267	int64_t data;
2268	int rv;
2269
2270	data = pseg_get(pm, va);
2271	KASSERT(data & TLB_V);
2272	if (prot & VM_PROT_WRITE) {
2273		data |= (TLB_W|TLB_REAL_W);
2274	} else {
2275		data &= ~(TLB_W|TLB_REAL_W);
2276	}
2277	rv = pseg_set(pm, va, data, 0);
2278	if (rv & 1)
2279		panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv);
2280	KASSERT(pmap_ctx(pm)>=0);
2281	tsb_invalidate(va, pm);
2282	tlb_flush_pte(va, pm);
2283}
2284
2285/*
2286 * Return the number bytes that pmap_dumpmmu() will dump.
2287 */
2288int
2289pmap_dumpsize(void)
2290{
2291	int	sz;
2292
2293	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2294	sz += kernel_tlb_slots * sizeof(struct cpu_kcore_4mbseg);
2295	sz += phys_installed_size * sizeof(phys_ram_seg_t);
2296
2297	return btodb(sz + DEV_BSIZE - 1);
2298}
2299
2300/*
2301 * Write the mmu contents to the dump device.
2302 * This gets appended to the end of a crash dump since
2303 * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2304 *
2305 * Write the core dump headers and MD data to the dump device.
2306 * We dump the following items:
2307 *
2308 *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2309 *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2310 *	phys_ram_seg_t[phys_installed_size]  physical memory segments
2311 */
2312int
2313pmap_dumpmmu(int (*dump)(dev_t, daddr_t, void *, size_t), daddr_t blkno)
2314{
2315	kcore_seg_t	*kseg;
2316	cpu_kcore_hdr_t	*kcpu;
2317	phys_ram_seg_t	memseg;
2318	struct cpu_kcore_4mbseg ktlb;
2319	int	error = 0;
2320	int	i;
2321	int	buffer[dbtob(1) / sizeof(int)];
2322	int	*bp, *ep;
2323
2324#define EXPEDITE(p,n) do {						\
2325	int *sp = (void *)(p);						\
2326	int sz = (n);							\
2327	while (sz > 0) {						\
2328		*bp++ = *sp++;						\
2329		if (bp >= ep) {						\
2330			error = (*dump)(dumpdev, blkno,			\
2331					(void *)buffer, dbtob(1));	\
2332			if (error != 0)					\
2333				return (error);				\
2334			++blkno;					\
2335			bp = buffer;					\
2336		}							\
2337		sz -= 4;						\
2338	}								\
2339} while (0)
2340
2341	/* Setup bookkeeping pointers */
2342	bp = buffer;
2343	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2344
2345	/* Fill in MI segment header */
2346	kseg = (kcore_seg_t *)bp;
2347	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2348	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2349
2350	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2351	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2352	kcpu->cputype = cputyp;
2353	kcpu->kernbase = (uint64_t)KERNBASE;
2354	kcpu->cpubase = (uint64_t)CPUINFO_VA;
2355
2356	/* Describe the locked text segment */
2357	kcpu->ktextbase = (uint64_t)ktext;
2358	kcpu->ktextp = (uint64_t)ktextp;
2359	kcpu->ktextsz = (uint64_t)ektext - ktext;
2360	if (kcpu->ktextsz > 4*MEG)
2361		kcpu->ktextsz = 0;	/* old version can not work */
2362
2363	/* Describe locked data segment */
2364	kcpu->kdatabase = (uint64_t)kdata;
2365	kcpu->kdatap = (uint64_t)kdatap;
2366	kcpu->kdatasz = (uint64_t)ekdatap - kdatap;
2367
2368	/* new version of locked segments description */
2369	kcpu->newmagic = SPARC64_KCORE_NEWMAGIC;
2370	kcpu->num4mbsegs = kernel_tlb_slots;
2371	kcpu->off4mbsegs = ALIGN(sizeof(cpu_kcore_hdr_t));
2372
2373	/* description of per-cpu mappings */
2374	kcpu->numcpuinfos = sparc_ncpus;
2375	kcpu->percpusz = 64 * 1024;	/* used to be 128k for some time */
2376	kcpu->thiscpu = cpu_number();	/* which cpu is doing this dump */
2377	kcpu->cpusp = cpu0paddr - 64 * 1024 * sparc_ncpus;
2378
2379	/* Now the memsegs */
2380	kcpu->nmemseg = phys_installed_size;
2381	kcpu->memsegoffset = kcpu->off4mbsegs
2382		+ kernel_tlb_slots * sizeof(struct cpu_kcore_4mbseg);
2383
2384	/* Now we need to point this at our kernel pmap. */
2385	kcpu->nsegmap = STSZ;
2386	kcpu->segmapoffset = (uint64_t)pmap_kernel()->pm_physaddr;
2387
2388	/* Note: we have assumed everything fits in buffer[] so far... */
2389	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2390
2391	/* write locked kernel 4MB TLBs */
2392	for (i = 0; i < kernel_tlb_slots; i++) {
2393		ktlb.va = kernel_tlbs[i].te_va;
2394		ktlb.pa = kernel_tlbs[i].te_pa;
2395		EXPEDITE(&ktlb, sizeof(ktlb));
2396	}
2397
2398	/* write memsegs */
2399	for (i = 0; i < phys_installed_size; i++) {
2400		memseg.start = phys_installed[i].start;
2401		memseg.size = phys_installed[i].size;
2402		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2403	}
2404
2405	if (bp != buffer)
2406		error = (*dump)(dumpdev, blkno++, (void *)buffer, dbtob(1));
2407
2408	return (error);
2409}
2410
2411/*
2412 * Determine (non)existence of physical page
2413 */
2414int
2415pmap_pa_exists(paddr_t pa)
2416{
2417	int i;
2418
2419	/* Just go through physical memory list & see if we're there */
2420	for (i = 0; i < phys_installed_size; i++) {
2421		if ((phys_installed[i].start <= pa) &&
2422				(phys_installed[i].start +
2423				 phys_installed[i].size >= pa))
2424			return 1;
2425	}
2426	return 0;
2427}
2428
2429/*
2430 * Lookup the appropriate TSB entry.
2431 *
2432 * Here is the full official pseudo code:
2433 *
2434 */
2435
2436#ifdef NOTYET
2437int64 GenerateTSBPointer(
2438 	int64 va,		/* Missing VA			*/
2439 	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2440 	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2441 	Boolean split,		/* TSB Register[12]		*/
2442 	int TSBSize)		/* TSB Register[2:0]		*/
2443{
2444 	int64 vaPortion;
2445 	int64 TSBBaseMask;
2446 	int64 splitMask;
2447
2448	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2449	TSBBaseMask = 0xffffffffffffe000 <<
2450		(split? (TSBsize + 1) : TSBsize);
2451
2452	/* Shift va towards lsb appropriately and		*/
2453	/* zero out the original va page offset			*/
2454	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2455		0xfffffffffffffff0;
2456
2457	if (split) {
2458		/* There's only one bit in question for split	*/
2459		splitMask = 1 << (13 + TSBsize);
2460		if (type == 8K_POINTER)
2461			/* Make sure we're in the lower half	*/
2462			vaPortion &= ~splitMask;
2463		else
2464			/* Make sure we're in the upper half	*/
2465			vaPortion |= splitMask;
2466	}
2467	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2468}
2469#endif
2470/*
2471 * Of course, since we are not using a split TSB or variable page sizes,
2472 * we can optimize this a bit.
2473 *
2474 * The following only works for a unified 8K TSB.  It will find the slot
2475 * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2476 */
2477int
2478ptelookup_va(vaddr_t va)
2479{
2480	long tsbptr;
2481#define TSBBASEMASK	(0xffffffffffffe000LL << tsbsize)
2482
2483	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK);
2484	return (tsbptr / sizeof(pte_t));
2485}
2486
2487/*
2488 * Do whatever is needed to sync the MOD/REF flags
2489 */
2490
2491bool
2492pmap_clear_modify(struct vm_page *pg)
2493{
2494	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2495	pv_entry_t pv;
2496	int rv;
2497	int changed = 0;
2498#ifdef DEBUG
2499	int modified = 0;
2500
2501	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify(%p)\n", pg));
2502
2503	modified = pmap_is_modified(pg);
2504#endif
2505	mutex_enter(&pmap_lock);
2506	/* Clear all mappings */
2507	pv = &md->mdpg_pvh;
2508#ifdef DEBUG
2509	if (pv->pv_va & PV_MOD)
2510		pv->pv_va |= PV_WE;	/* Remember this was modified */
2511#endif
2512	if (pv->pv_va & PV_MOD) {
2513		changed |= 1;
2514		pv->pv_va &= ~PV_MOD;
2515	}
2516#ifdef DEBUG
2517	if (pv->pv_next && !pv->pv_pmap) {
2518		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
2519		Debugger();
2520	}
2521#endif
2522	if (pv->pv_pmap != NULL) {
2523		for (; pv; pv = pv->pv_next) {
2524			int64_t data;
2525			struct pmap *pmap = pv->pv_pmap;
2526			vaddr_t va = pv->pv_va & PV_VAMASK;
2527
2528			/* First clear the mod bit in the PTE and make it R/O */
2529			data = pseg_get(pmap, va);
2530			KASSERT(data & TLB_V);
2531			/* Need to both clear the modify and write bits */
2532			if (data & TLB_MODIFY)
2533				changed |= 1;
2534#ifdef HWREF
2535			data &= ~(TLB_MODIFY|TLB_W);
2536#else
2537			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
2538#endif
2539			rv = pseg_set(pmap, va, data, 0);
2540			if (rv & 1)
2541				printf("pmap_clear_modify: pseg_set needs"
2542				    " spare! rv=%d\n", rv);
2543			if (pmap_is_on_mmu(pmap)) {
2544				KASSERT(pmap_ctx(pmap)>=0);
2545				tsb_invalidate(va, pmap);
2546				tlb_flush_pte(va, pmap);
2547			}
2548			/* Then clear the mod bit in the pv */
2549			if (pv->pv_va & PV_MOD) {
2550				changed |= 1;
2551				pv->pv_va &= ~PV_MOD;
2552			}
2553		}
2554	}
2555	pv_check();
2556	mutex_exit(&pmap_lock);
2557#ifdef DEBUG
2558	if (pmap_is_modified(pg)) {
2559		printf("pmap_clear_modify(): %p still modified!\n", pg);
2560		Debugger();
2561	}
2562	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify: pg %p %s\n", pg,
2563	    (changed ? "was modified" : "was not modified")));
2564	if (modified != changed) {
2565		printf("pmap_clear_modify: modified %d changed %d\n",
2566		       modified, changed);
2567		Debugger();
2568	} else return (modified);
2569#endif
2570	return (changed);
2571}
2572
2573bool
2574pmap_clear_reference(struct vm_page *pg)
2575{
2576	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2577	pv_entry_t pv;
2578	int rv;
2579	int changed = 0;
2580#ifdef DEBUG
2581	int referenced = 0;
2582#endif
2583
2584	mutex_enter(&pmap_lock);
2585#ifdef DEBUG
2586	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg));
2587	referenced = pmap_is_referenced_locked(pg);
2588#endif
2589	/* Clear all references */
2590	pv = &md->mdpg_pvh;
2591	if (pv->pv_va & PV_REF) {
2592		changed |= 1;
2593		pv->pv_va &= ~PV_REF;
2594	}
2595#ifdef DEBUG
2596	if (pv->pv_next && !pv->pv_pmap) {
2597		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
2598		Debugger();
2599	}
2600#endif
2601	if (pv->pv_pmap != NULL) {
2602		for (; pv; pv = pv->pv_next) {
2603			int64_t data;
2604			struct pmap *pmap = pv->pv_pmap;
2605			vaddr_t va = pv->pv_va & PV_VAMASK;
2606
2607			data = pseg_get(pmap, va);
2608			KASSERT(data & TLB_V);
2609			DPRINTF(PDB_CHANGEPROT,
2610			    ("clearing ref pm:%p va:%p ctx:%lx data:%llx\n",
2611			     pmap, (void *)(u_long)va,
2612			     (u_long)pmap_ctx(pmap),
2613			     (long long)data));
2614#ifdef HWREF
2615			if (data & TLB_ACCESS) {
2616				changed |= 1;
2617				data &= ~TLB_ACCESS;
2618			}
2619#else
2620			if (data < 0)
2621				changed |= 1;
2622			data = 0;
2623#endif
2624			rv = pseg_set(pmap, va, data, 0);
2625			if (rv & 1)
2626				panic("pmap_clear_reference: pseg_set needs"
2627				    " spare! rv=%d\n", rv);
2628			if (pmap_is_on_mmu(pmap)) {
2629				KASSERT(pmap_ctx(pmap)>=0);
2630				tsb_invalidate(va, pmap);
2631				tlb_flush_pte(va, pmap);
2632			}
2633			if (pv->pv_va & PV_REF) {
2634				changed |= 1;
2635				pv->pv_va &= ~PV_REF;
2636			}
2637		}
2638	}
2639	dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
2640	pv_check();
2641#ifdef DEBUG
2642	if (pmap_is_referenced_locked(pg)) {
2643		pv = &md->mdpg_pvh;
2644		printf("pmap_clear_reference(): %p still referenced "
2645			"(pmap = %p, ctx = %d)\n", pg, pv->pv_pmap,
2646			pv->pv_pmap ? pmap_ctx(pv->pv_pmap) : 0);
2647		Debugger();
2648	}
2649	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2650	    ("pmap_clear_reference: pg %p %s\n", pg,
2651	     (changed ? "was referenced" : "was not referenced")));
2652	if (referenced != changed) {
2653		printf("pmap_clear_reference: referenced %d changed %d\n",
2654		       referenced, changed);
2655		Debugger();
2656	} else {
2657		mutex_exit(&pmap_lock);
2658		return (referenced);
2659	}
2660#endif
2661	mutex_exit(&pmap_lock);
2662	return (changed);
2663}
2664
2665bool
2666pmap_is_modified(struct vm_page *pg)
2667{
2668	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2669	pv_entry_t pv, npv;
2670	bool res = false;
2671
2672	/* Check if any mapping has been modified */
2673	pv = &md->mdpg_pvh;
2674	if (pv->pv_va & PV_MOD)
2675		res = true;
2676#ifdef HWREF
2677#ifdef DEBUG
2678	if (pv->pv_next && !pv->pv_pmap) {
2679		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
2680		Debugger();
2681	}
2682#endif
2683	if (!res && pv->pv_pmap != NULL) {
2684		mutex_enter(&pmap_lock);
2685		for (npv = pv; !res && npv && npv->pv_pmap;
2686		     npv = npv->pv_next) {
2687			int64_t data;
2688
2689			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2690			KASSERT(data & TLB_V);
2691			if (data & TLB_MODIFY)
2692				res = true;
2693
2694			/* Migrate modify info to head pv */
2695			if (npv->pv_va & PV_MOD) {
2696				res = true;
2697				npv->pv_va &= ~PV_MOD;
2698			}
2699		}
2700		/* Save modify info */
2701		if (res)
2702			pv->pv_va |= PV_MOD;
2703#ifdef DEBUG
2704		if (res)
2705			pv->pv_va |= PV_WE;
2706#endif
2707		mutex_exit(&pmap_lock);
2708	}
2709#endif
2710
2711	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_is_modified(%p) = %d\n", pg,
2712	    res));
2713	pv_check();
2714	return res;
2715}
2716
2717/*
2718 * Variant of pmap_is_reference() where caller already holds pmap_lock
2719 */
2720static bool
2721pmap_is_referenced_locked(struct vm_page *pg)
2722{
2723	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2724	pv_entry_t pv, npv;
2725	bool res = false;
2726
2727	KASSERT(mutex_owned(&pmap_lock));
2728
2729	/* Check if any mapping has been referenced */
2730	pv = &md->mdpg_pvh;
2731	if (pv->pv_va & PV_REF)
2732		return true;
2733
2734#ifdef HWREF
2735#ifdef DEBUG
2736	if (pv->pv_next && !pv->pv_pmap) {
2737		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2738		Debugger();
2739	}
2740#endif
2741	if (pv->pv_pmap == NULL)
2742		return false;
2743
2744	for (npv = pv; npv; npv = npv->pv_next) {
2745		int64_t data;
2746
2747		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2748		KASSERT(data & TLB_V);
2749		if (data & TLB_ACCESS)
2750			res = true;
2751
2752		/* Migrate ref info to head pv */
2753		if (npv->pv_va & PV_REF) {
2754			res = true;
2755			npv->pv_va &= ~PV_REF;
2756		}
2757	}
2758	/* Save ref info */
2759	if (res)
2760		pv->pv_va |= PV_REF;
2761#endif
2762
2763	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2764		("pmap_is_referenced(%p) = %d\n", pg, res));
2765	pv_check();
2766	return res;
2767}
2768
2769bool
2770pmap_is_referenced(struct vm_page *pg)
2771{
2772	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2773	pv_entry_t pv;
2774	bool res = false;
2775
2776	/* Check if any mapping has been referenced */
2777	pv = &md->mdpg_pvh;
2778	if (pv->pv_va & PV_REF)
2779		return true;
2780
2781#ifdef HWREF
2782#ifdef DEBUG
2783	if (pv->pv_next && !pv->pv_pmap) {
2784		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2785		Debugger();
2786	}
2787#endif
2788	if (pv->pv_pmap != NULL) {
2789		mutex_enter(&pmap_lock);
2790		res = pmap_is_referenced_locked(pg);
2791		mutex_exit(&pmap_lock);
2792	}
2793#endif
2794
2795	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2796		("pmap_is_referenced(%p) = %d\n", pg, res));
2797	pv_check();
2798	return res;
2799}
2800
2801
2802
2803/*
2804 *	Routine:	pmap_unwire
2805 *	Function:	Clear the wired attribute for a map/virtual-address
2806 *			pair.
2807 *	In/out conditions:
2808 *			The mapping must already exist in the pmap.
2809 */
2810void
2811pmap_unwire(pmap_t pmap, vaddr_t va)
2812{
2813	int64_t data;
2814	int rv;
2815
2816	DPRINTF(PDB_MMU_STEAL, ("pmap_unwire(%p, %lx)\n", pmap, va));
2817
2818#ifdef DEBUG
2819	/*
2820	 * Is this part of the permanent 4MB mapping?
2821	 */
2822	if (pmap == pmap_kernel() && va >= ktext &&
2823		va < roundup(ekdata, 4*MEG)) {
2824		prom_printf("pmap_unwire: va=%08x in locked TLB\n", va);
2825		prom_abort();
2826		return;
2827	}
2828#endif
2829	data = pseg_get(pmap, va & PV_VAMASK);
2830	KASSERT(data & TLB_V);
2831	data &= ~TLB_TSB_LOCK;
2832	rv = pseg_set(pmap, va & PV_VAMASK, data, 0);
2833	if (rv & 1)
2834		panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv);
2835	pv_check();
2836}
2837
2838/*
2839 * Lower the protection on the specified physical page.
2840 *
2841 * Never enable writing as it will break COW
2842 */
2843
2844void
2845pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2846{
2847	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2848	int64_t clear, set;
2849	int64_t data = 0;
2850	int rv;
2851	pv_entry_t pv, npv, freepv = NULL;
2852	struct pmap *pmap;
2853	vaddr_t va;
2854	bool needflush = FALSE;
2855
2856	DPRINTF(PDB_CHANGEPROT,
2857	    ("pmap_page_protect: pg %p prot %x\n", pg, prot));
2858
2859	mutex_enter(&pmap_lock);
2860	pv = &md->mdpg_pvh;
2861	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
2862		/* copy_on_write */
2863
2864		set = TLB_V;
2865		clear = TLB_REAL_W|TLB_W;
2866		if (VM_PROT_EXECUTE & prot)
2867			set |= TLB_EXEC;
2868		else
2869			clear |= TLB_EXEC;
2870		if (VM_PROT_EXECUTE == prot)
2871			set |= TLB_EXEC_ONLY;
2872
2873#ifdef DEBUG
2874		if (pv->pv_next && !pv->pv_pmap) {
2875			printf("pmap_page_protect: no pmap for pv %p\n", pv);
2876			Debugger();
2877		}
2878#endif
2879		if (pv->pv_pmap != NULL) {
2880			for (; pv; pv = pv->pv_next) {
2881				pmap = pv->pv_pmap;
2882				va = pv->pv_va & PV_VAMASK;
2883
2884				DPRINTF(PDB_CHANGEPROT | PDB_REF,
2885					("pmap_page_protect: "
2886					 "RO va %p of pg %p...\n",
2887					 (void *)(u_long)pv->pv_va, pg));
2888				data = pseg_get(pmap, va);
2889				KASSERT(data & TLB_V);
2890
2891				/* Save REF/MOD info */
2892				if (data & TLB_ACCESS)
2893					pv->pv_va |= PV_REF;
2894				if (data & TLB_MODIFY)
2895					pv->pv_va |= PV_MOD;
2896
2897				data &= ~clear;
2898				data |= set;
2899				rv = pseg_set(pmap, va, data, 0);
2900				if (rv & 1)
2901					panic("pmap_page_protect: "
2902					       "pseg_set needs spare! rv=%d\n",
2903					       rv);
2904				if (pmap_is_on_mmu(pmap)) {
2905					KASSERT(pmap_ctx(pmap)>=0);
2906					tsb_invalidate(va, pmap);
2907					tlb_flush_pte(va, pmap);
2908				}
2909			}
2910		}
2911	} else {
2912		/* remove mappings */
2913		DPRINTF(PDB_REMOVE,
2914			("pmap_page_protect: demapping pg %p\n", pg));
2915
2916		/* First remove the entire list of continuation pv's */
2917		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
2918			pmap = npv->pv_pmap;
2919			va = npv->pv_va & PV_VAMASK;
2920
2921			/* We're removing npv from pv->pv_next */
2922			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
2923				("pmap_page_protect: "
2924				 "demap va %p of pg %p in pmap %p...\n",
2925				 (void *)(u_long)va, pg, pmap));
2926
2927			/* clear the entry in the page table */
2928			data = pseg_get(pmap, va);
2929			KASSERT(data & TLB_V);
2930
2931			/* Save ref/mod info */
2932			if (data & TLB_ACCESS)
2933				pv->pv_va |= PV_REF;
2934			if (data & TLB_MODIFY)
2935				pv->pv_va |= PV_MOD;
2936			/* Clear mapping */
2937			rv = pseg_set(pmap, va, 0, 0);
2938			if (rv & 1)
2939				panic("pmap_page_protect: pseg_set needs"
2940				     " spare! rv=%d\n", rv);
2941			if (pmap_is_on_mmu(pmap)) {
2942				KASSERT(pmap_ctx(pmap)>=0);
2943				tsb_invalidate(va, pmap);
2944				tlb_flush_pte(va, pmap);
2945			}
2946			if (pmap->pm_refs > 0) {
2947				needflush = TRUE;
2948			}
2949
2950			/* free the pv */
2951			pv->pv_next = npv->pv_next;
2952			npv->pv_next = freepv;
2953			freepv = npv;
2954		}
2955
2956		/* Then remove the primary pv */
2957#ifdef DEBUG
2958		if (pv->pv_next && !pv->pv_pmap) {
2959			printf("pmap_page_protect: no pmap for pv %p\n", pv);
2960			Debugger();
2961		}
2962#endif
2963		if (pv->pv_pmap != NULL) {
2964			pmap = pv->pv_pmap;
2965			va = pv->pv_va & PV_VAMASK;
2966
2967			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
2968				("pmap_page_protect: "
2969				 "demap va %p of pg %p from pm %p...\n",
2970				 (void *)(u_long)va, pg, pmap));
2971
2972			data = pseg_get(pmap, va);
2973			KASSERT(data & TLB_V);
2974			/* Save ref/mod info */
2975			if (data & TLB_ACCESS)
2976				pv->pv_va |= PV_REF;
2977			if (data & TLB_MODIFY)
2978				pv->pv_va |= PV_MOD;
2979			rv = pseg_set(pmap, va, 0, 0);
2980			if (rv & 1)
2981				panic("pmap_page_protect: pseg_set needs"
2982				    " spare! rv=%d\n", rv);
2983			if (pmap_is_on_mmu(pmap)) {
2984			    	KASSERT(pmap_ctx(pmap)>=0);
2985				tsb_invalidate(va, pmap);
2986				tlb_flush_pte(va, pmap);
2987			}
2988			if (pmap->pm_refs > 0) {
2989				needflush = TRUE;
2990			}
2991			npv = pv->pv_next;
2992			/* dump the first pv */
2993			if (npv) {
2994				/* First save mod/ref bits */
2995				pv->pv_pmap = npv->pv_pmap;
2996				pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
2997				pv->pv_next = npv->pv_next;
2998				npv->pv_next = freepv;
2999				freepv = npv;
3000			} else {
3001				pv->pv_pmap = NULL;
3002				pv->pv_next = NULL;
3003			}
3004		}
3005		if (needflush)
3006			dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
3007	}
3008	/* We should really only flush the pages we demapped. */
3009	pv_check();
3010	mutex_exit(&pmap_lock);
3011
3012	/* Catch up on deferred frees. */
3013	for (; freepv != NULL; freepv = npv) {
3014		npv = freepv->pv_next;
3015		pool_cache_put(&pmap_pv_cache, freepv);
3016	}
3017}
3018
3019#ifdef PMAP_COUNT_DEBUG
3020/*
3021 * count pages in pmap -- this can be slow.
3022 */
3023int
3024pmap_count_res(struct pmap *pm)
3025{
3026	int64_t data;
3027	paddr_t *pdir, *ptbl;
3028	int i, j, k, n;
3029
3030	/* Don't want one of these pages reused while we're reading it. */
3031	mutex_enter(&pmap_lock);
3032	n = 0;
3033	for (i = 0; i < STSZ; i++) {
3034		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3035					       ASI_PHYS_CACHED);
3036		if (pdir == NULL) {
3037			continue;
3038		}
3039		for (k = 0; k < PDSZ; k++) {
3040			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3041						       ASI_PHYS_CACHED);
3042			if (ptbl == NULL) {
3043				continue;
3044			}
3045			for (j = 0; j < PTSZ; j++) {
3046				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3047						     ASI_PHYS_CACHED);
3048				if (data & TLB_V)
3049					n++;
3050			}
3051		}
3052	}
3053	mutex_exit(&pmap_lock);
3054
3055	if (pm->pm_stats.resident_count != n)
3056		printf("pmap_count_resident: pm_stats = %ld, counted: %d\n",
3057		    pm->pm_stats.resident_count, n);
3058
3059	return n;
3060}
3061
3062/*
3063 * count wired pages in pmap -- this can be slow.
3064 */
3065int
3066pmap_count_wired(struct pmap *pm)
3067{
3068	int64_t data;
3069	paddr_t *pdir, *ptbl;
3070	int i, j, k, n;
3071
3072	/* Don't want one of these pages reused while we're reading it. */
3073	mutex_enter(&pmap_lock);	/* XXX uvmplock */
3074	n = 0;
3075	for (i = 0; i < STSZ; i++) {
3076		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3077					       ASI_PHYS_CACHED);
3078		if (pdir == NULL) {
3079			continue;
3080		}
3081		for (k = 0; k < PDSZ; k++) {
3082			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3083						       ASI_PHYS_CACHED);
3084			if (ptbl == NULL) {
3085				continue;
3086			}
3087			for (j = 0; j < PTSZ; j++) {
3088				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3089						     ASI_PHYS_CACHED);
3090				if (data & TLB_TSB_LOCK)
3091					n++;
3092			}
3093		}
3094	}
3095	mutex_exit(&pmap_lock);	/* XXX uvmplock */
3096
3097	if (pm->pm_stats.wired_count != n)
3098		printf("pmap_count_wired: pm_stats = %ld, counted: %d\n",
3099		    pm->pm_stats.wired_count, n);
3100
3101	return n;
3102}
3103#endif	/* PMAP_COUNT_DEBUG */
3104
3105void
3106pmap_procwr(struct proc *p, vaddr_t va, size_t len)
3107{
3108
3109	blast_icache();
3110}
3111
3112/*
3113 * Allocate a hardware context to the given pmap.
3114 */
3115static int
3116ctx_alloc(struct pmap *pm)
3117{
3118	int i, ctx;
3119
3120	KASSERT(pm != pmap_kernel());
3121	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
3122	mutex_enter(&curcpu()->ci_ctx_lock);
3123	ctx = curcpu()->ci_pmap_next_ctx++;
3124
3125	/*
3126	 * if we have run out of contexts, remove all user entries from
3127	 * the TSB, TLB and dcache and start over with context 1 again.
3128	 */
3129
3130	if (ctx == curcpu()->ci_numctx) {
3131		DPRINTF(PDB_CTX_ALLOC|PDB_CTX_FLUSHALL,
3132			("ctx_alloc: cpu%d run out of contexts %d\n",
3133			 cpu_number(), curcpu()->ci_numctx));
3134		write_user_windows();
3135		while (!LIST_EMPTY(&curcpu()->ci_pmap_ctxlist)) {
3136#ifdef MULTIPROCESSOR
3137			KASSERT(pmap_ctx(LIST_FIRST(&curcpu()->ci_pmap_ctxlist)) != 0);
3138#endif
3139			ctx_free(LIST_FIRST(&curcpu()->ci_pmap_ctxlist),
3140				 curcpu());
3141		}
3142		for (i = TSBENTS - 1; i >= 0; i--) {
3143			if (TSB_TAG_CTX(curcpu()->ci_tsb_dmmu[i].tag) != 0) {
3144				clrx(&curcpu()->ci_tsb_dmmu[i].data);
3145			}
3146			if (TSB_TAG_CTX(curcpu()->ci_tsb_immu[i].tag) != 0) {
3147				clrx(&curcpu()->ci_tsb_immu[i].data);
3148			}
3149		}
3150		sp_tlb_flush_all();
3151		ctx = 1;
3152		curcpu()->ci_pmap_next_ctx = 2;
3153	}
3154	curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
3155	LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
3156	pmap_ctx(pm) = ctx;
3157	mutex_exit(&curcpu()->ci_ctx_lock);
3158	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n",
3159		cpu_number(), ctx));
3160	return ctx;
3161}
3162
3163/*
3164 * Give away a context.
3165 */
3166static void
3167ctx_free(struct pmap *pm, struct cpu_info *ci)
3168{
3169	int oldctx;
3170	int cpunum;
3171
3172	KASSERT(mutex_owned(&ci->ci_ctx_lock));
3173
3174#ifdef MULTIPROCESSOR
3175	cpunum = ci->ci_index;
3176#else
3177	/* Give the compiler a hint.. */
3178	cpunum = 0;
3179#endif
3180
3181	oldctx = pm->pm_ctx[cpunum];
3182	if (oldctx == 0)
3183		return;
3184
3185#ifdef DIAGNOSTIC
3186	if (pm == pmap_kernel())
3187		panic("ctx_free: freeing kernel context");
3188	if (ci->ci_ctxbusy[oldctx] == 0)
3189		printf("ctx_free: freeing free context %d\n", oldctx);
3190	if (ci->ci_ctxbusy[oldctx] != pm->pm_physaddr) {
3191		printf("ctx_free: freeing someone else's context\n "
3192		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
3193		       oldctx, (void *)(u_long)ci->ci_ctxbusy[oldctx], pm,
3194		       (void *)(u_long)pm->pm_physaddr);
3195		Debugger();
3196	}
3197#endif
3198	/* We should verify it has not been stolen and reallocated... */
3199	DPRINTF(PDB_CTX_ALLOC, ("ctx_free: cpu%d freeing ctx %d\n",
3200		cpu_number(), oldctx));
3201	ci->ci_ctxbusy[oldctx] = 0UL;
3202	pm->pm_ctx[cpunum] = 0;
3203	LIST_REMOVE(pm, pm_list[cpunum]);
3204}
3205
3206/*
3207 * Enter the pmap and virtual address into the
3208 * physical to virtual map table.
3209 *
3210 * We enter here with the pmap locked.
3211 */
3212
3213void
3214pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg,
3215	      pv_entry_t npv)
3216{
3217	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3218	pv_entry_t pvh;
3219
3220	KASSERT(mutex_owned(&pmap_lock));
3221
3222	pvh = &md->mdpg_pvh;
3223	DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n",
3224	    pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next));
3225	if (pvh->pv_pmap == NULL) {
3226
3227		/*
3228		 * No entries yet, use header as the first entry
3229		 */
3230		DPRINTF(PDB_ENTER, ("pmap_enter: first pv: pmap %p va %lx\n",
3231		    pmap, va));
3232		ENTER_STAT(firstpv);
3233		PV_SETVA(pvh, va);
3234		pvh->pv_pmap = pmap;
3235		pvh->pv_next = NULL;
3236		KASSERT(npv == NULL);
3237	} else {
3238		if (pg->loan_count == 0 && !(pvh->pv_va & PV_ALIAS)) {
3239
3240			/*
3241			 * There is at least one other VA mapping this page.
3242			 * Check if they are cache index compatible. If not
3243			 * remove all mappings, flush the cache and set page
3244			 * to be mapped uncached. Caching will be restored
3245			 * when pages are mapped compatible again.
3246			 */
3247			if ((pvh->pv_va ^ va) & VA_ALIAS_MASK) {
3248				pvh->pv_va |= PV_ALIAS;
3249				pmap_page_cache(pmap, pa, 0);
3250				ENTER_STAT(ci);
3251			}
3252		}
3253
3254		/*
3255		 * There is at least one other VA mapping this page.
3256		 * Place this entry after the header.
3257		 */
3258
3259		DPRINTF(PDB_ENTER, ("pmap_enter: new pv: pmap %p va %lx\n",
3260		    pmap, va));
3261		npv->pv_pmap = pmap;
3262		npv->pv_va = va & PV_VAMASK;
3263		npv->pv_next = pvh->pv_next;
3264		pvh->pv_next = npv;
3265
3266		if (!npv->pv_next) {
3267			ENTER_STAT(secondpv);
3268		}
3269	}
3270}
3271
3272/*
3273 * Remove a physical to virtual address translation.
3274 */
3275
3276pv_entry_t
3277pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg)
3278{
3279	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3280	pv_entry_t pvh, npv, pv;
3281	int64_t data = 0;
3282
3283	KASSERT(mutex_owned(&pmap_lock));
3284
3285	pvh = &md->mdpg_pvh;
3286
3287	DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap,
3288	    (void *)(u_long)va, pg));
3289	pv_check();
3290
3291	/*
3292	 * Remove page from the PV table.
3293	 * If it is the first entry on the list, it is actually
3294	 * in the header and we must copy the following entry up
3295	 * to the header.  Otherwise we must search the list for
3296	 * the entry.  In either case we free the now unused entry.
3297	 */
3298	if (pmap == pvh->pv_pmap && PV_MATCH(pvh, va)) {
3299		data = pseg_get(pvh->pv_pmap, pvh->pv_va & PV_VAMASK);
3300		KASSERT(data & TLB_V);
3301		npv = pvh->pv_next;
3302		if (npv) {
3303			/* First save mod/ref bits */
3304			pvh->pv_va = (pvh->pv_va & PV_MASK) | npv->pv_va;
3305			pvh->pv_next = npv->pv_next;
3306			pvh->pv_pmap = npv->pv_pmap;
3307		} else {
3308			pvh->pv_pmap = NULL;
3309			pvh->pv_next = NULL;
3310			pvh->pv_va &= (PV_REF|PV_MOD);
3311		}
3312		REMOVE_STAT(pvfirst);
3313	} else {
3314		for (pv = pvh, npv = pvh->pv_next; npv;
3315		     pv = npv, npv = npv->pv_next) {
3316			REMOVE_STAT(pvsearch);
3317			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
3318				break;
3319		}
3320		pv->pv_next = npv->pv_next;
3321		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
3322		KASSERT(data & TLB_V);
3323	}
3324
3325	/* Save ref/mod info */
3326	if (data & TLB_ACCESS)
3327		pvh->pv_va |= PV_REF;
3328	if (data & TLB_MODIFY)
3329		pvh->pv_va |= PV_MOD;
3330
3331	/* Check to see if the alias went away */
3332	if (pvh->pv_va & PV_ALIAS) {
3333		pvh->pv_va &= ~PV_ALIAS;
3334		for (pv = pvh; pv; pv = pv->pv_next) {
3335			if ((pv->pv_va ^ pvh->pv_va) & VA_ALIAS_MASK) {
3336				pvh->pv_va |= PV_ALIAS;
3337				break;
3338			}
3339		}
3340		if (!(pvh->pv_va & PV_ALIAS))
3341			pmap_page_cache(pmap, VM_PAGE_TO_PHYS(pg), 1);
3342	}
3343	pv_check();
3344	return npv;
3345}
3346
3347/*
3348 *	pmap_page_cache:
3349 *
3350 *	Change all mappings of a page to cached/uncached.
3351 */
3352void
3353pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
3354{
3355	struct vm_page *pg;
3356	struct vm_page_md *md;
3357	pv_entry_t pv;
3358	vaddr_t va;
3359	int rv;
3360
3361#if 0
3362	/*
3363	 * Why is this?
3364	 */
3365	if (CPU_ISSUN4US || CPU_ISSUN4V)
3366		return;
3367#endif
3368
3369	KASSERT(mutex_owned(&pmap_lock));
3370
3371	DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n",
3372	    (unsigned long long)pa));
3373	pg = PHYS_TO_VM_PAGE(pa);
3374	md = VM_PAGE_TO_MD(pg);
3375	pv = &md->mdpg_pvh;
3376	while (pv) {
3377		va = pv->pv_va & PV_VAMASK;
3378		if (pv->pv_va & PV_NC) {
3379			int64_t data;
3380
3381			/* Non-cached -- I/O mapping */
3382			data = pseg_get(pv->pv_pmap, va);
3383			KASSERT(data & TLB_V);
3384			rv = pseg_set(pv->pv_pmap, va,
3385				     data & ~(TLB_CV|TLB_CP), 0);
3386			if (rv & 1)
3387				panic("pmap_page_cache: pseg_set needs"
3388				     " spare! rv=%d\n", rv);
3389		} else if (mode && (!(pv->pv_va & PV_NVC))) {
3390			int64_t data;
3391
3392			/* Enable caching */
3393			data = pseg_get(pv->pv_pmap, va);
3394			KASSERT(data & TLB_V);
3395			rv = pseg_set(pv->pv_pmap, va, data | TLB_CV, 0);
3396			if (rv & 1)
3397				panic("pmap_page_cache: pseg_set needs"
3398				    " spare! rv=%d\n", rv);
3399		} else {
3400			int64_t data;
3401
3402			/* Disable caching */
3403			data = pseg_get(pv->pv_pmap, va);
3404			KASSERT(data & TLB_V);
3405			rv = pseg_set(pv->pv_pmap, va, data & ~TLB_CV, 0);
3406			if (rv & 1)
3407				panic("pmap_page_cache: pseg_set needs"
3408				    " spare! rv=%d\n", rv);
3409		}
3410		if (pmap_is_on_mmu(pv->pv_pmap)) {
3411			/* Force reload -- cache bits have changed */
3412			KASSERT(pmap_ctx(pv->pv_pmap)>=0);
3413			tsb_invalidate(va, pv->pv_pmap);
3414			tlb_flush_pte(va, pv->pv_pmap);
3415		}
3416		pv = pv->pv_next;
3417	}
3418}
3419
3420/*
3421 * Some routines to allocate and free PTPs.
3422 */
3423static int
3424pmap_get_page(paddr_t *p)
3425{
3426	struct vm_page *pg;
3427	paddr_t pa;
3428
3429	if (uvm.page_init_done) {
3430		pg = uvm_pagealloc(NULL, 0, NULL,
3431		    UVM_PGA_ZERO | UVM_PGA_USERESERVE);
3432		if (pg == NULL)
3433			return (0);
3434		pa = VM_PAGE_TO_PHYS(pg);
3435	} else {
3436		if (!uvm_page_physget(&pa))
3437			return (0);
3438		pmap_zero_page(pa);
3439	}
3440	*p = pa;
3441	return (1);
3442}
3443
3444static void
3445pmap_free_page(paddr_t pa, sparc64_cpuset_t cs)
3446{
3447	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3448
3449	dcache_flush_page_cpuset(pa, cs);
3450	uvm_pagefree(pg);
3451}
3452
3453static void
3454pmap_free_page_noflush(paddr_t pa)
3455{
3456	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3457
3458	uvm_pagefree(pg);
3459}
3460
3461#ifdef DDB
3462
3463void db_dump_pv(db_expr_t, int, db_expr_t, const char *);
3464void
3465db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, const char *modif)
3466{
3467	struct vm_page *pg;
3468	struct vm_page_md *md;
3469	struct pv_entry *pv;
3470
3471	if (!have_addr) {
3472		db_printf("Need addr for pv\n");
3473		return;
3474	}
3475
3476	pg = PHYS_TO_VM_PAGE((paddr_t)addr);
3477	if (pg == NULL) {
3478		db_printf("page is not managed\n");
3479		return;
3480	}
3481	md = VM_PAGE_TO_MD(pg);
3482	for (pv = &md->mdpg_pvh; pv; pv = pv->pv_next)
3483		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
3484			  pv, pv->pv_next, pv->pv_pmap,
3485			  (unsigned long long)pv->pv_va);
3486}
3487
3488#endif
3489
3490#ifdef DEBUG
3491/*
3492 * Test ref/modify handling.  */
3493void pmap_testout(void);
3494void
3495pmap_testout(void)
3496{
3497	vaddr_t va;
3498	volatile int *loc;
3499	int val = 0;
3500	paddr_t pa;
3501	struct vm_page *pg;
3502	int ref, mod;
3503
3504	/* Allocate a page */
3505	va = (vaddr_t)(vmmap - PAGE_SIZE);
3506	KASSERT(va != 0);
3507	loc = (int*)va;
3508
3509	pmap_get_page(&pa);
3510	pg = PHYS_TO_VM_PAGE(pa);
3511	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3512	pmap_update(pmap_kernel());
3513
3514	/* Now clear reference and modify */
3515	ref = pmap_clear_reference(pg);
3516	mod = pmap_clear_modify(pg);
3517	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3518	       (void *)(u_long)va, (long)pa,
3519	       ref, mod);
3520
3521	/* Check it's properly cleared */
3522	ref = pmap_is_referenced(pg);
3523	mod = pmap_is_modified(pg);
3524	printf("Checking cleared page: ref %d, mod %d\n",
3525	       ref, mod);
3526
3527	/* Reference page */
3528	val = *loc;
3529
3530	ref = pmap_is_referenced(pg);
3531	mod = pmap_is_modified(pg);
3532	printf("Referenced page: ref %d, mod %d val %x\n",
3533	       ref, mod, val);
3534
3535	/* Now clear reference and modify */
3536	ref = pmap_clear_reference(pg);
3537	mod = pmap_clear_modify(pg);
3538	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3539	       (void *)(u_long)va, (long)pa,
3540	       ref, mod);
3541
3542	/* Modify page */
3543	*loc = 1;
3544
3545	ref = pmap_is_referenced(pg);
3546	mod = pmap_is_modified(pg);
3547	printf("Modified page: ref %d, mod %d\n",
3548	       ref, mod);
3549
3550	/* Now clear reference and modify */
3551	ref = pmap_clear_reference(pg);
3552	mod = pmap_clear_modify(pg);
3553	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3554	       (void *)(u_long)va, (long)pa,
3555	       ref, mod);
3556
3557	/* Check it's properly cleared */
3558	ref = pmap_is_referenced(pg);
3559	mod = pmap_is_modified(pg);
3560	printf("Checking cleared page: ref %d, mod %d\n",
3561	       ref, mod);
3562
3563	/* Modify page */
3564	*loc = 1;
3565
3566	ref = pmap_is_referenced(pg);
3567	mod = pmap_is_modified(pg);
3568	printf("Modified page: ref %d, mod %d\n",
3569	       ref, mod);
3570
3571	/* Check pmap_protect() */
3572	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
3573	pmap_update(pmap_kernel());
3574	ref = pmap_is_referenced(pg);
3575	mod = pmap_is_modified(pg);
3576	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3577	       ref, mod);
3578
3579	/* Now clear reference and modify */
3580	ref = pmap_clear_reference(pg);
3581	mod = pmap_clear_modify(pg);
3582	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3583	       (void *)(u_long)va, (long)pa,
3584	       ref, mod);
3585
3586	/* Modify page */
3587	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3588	pmap_update(pmap_kernel());
3589	*loc = 1;
3590
3591	ref = pmap_is_referenced(pg);
3592	mod = pmap_is_modified(pg);
3593	printf("Modified page: ref %d, mod %d\n",
3594	       ref, mod);
3595
3596	/* Check pmap_protect() */
3597	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
3598	pmap_update(pmap_kernel());
3599	ref = pmap_is_referenced(pg);
3600	mod = pmap_is_modified(pg);
3601	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3602	       ref, mod);
3603
3604	/* Now clear reference and modify */
3605	ref = pmap_clear_reference(pg);
3606	mod = pmap_clear_modify(pg);
3607	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3608	       (void *)(u_long)va, (long)pa,
3609	       ref, mod);
3610
3611	/* Modify page */
3612	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3613	pmap_update(pmap_kernel());
3614	*loc = 1;
3615
3616	ref = pmap_is_referenced(pg);
3617	mod = pmap_is_modified(pg);
3618	printf("Modified page: ref %d, mod %d\n",
3619	       ref, mod);
3620
3621	/* Check pmap_pag_protect() */
3622	pmap_page_protect(pg, VM_PROT_READ);
3623	ref = pmap_is_referenced(pg);
3624	mod = pmap_is_modified(pg);
3625	printf("pmap_protect(): ref %d, mod %d\n",
3626	       ref, mod);
3627
3628	/* Now clear reference and modify */
3629	ref = pmap_clear_reference(pg);
3630	mod = pmap_clear_modify(pg);
3631	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3632	       (void *)(u_long)va, (long)pa,
3633	       ref, mod);
3634
3635
3636	/* Modify page */
3637	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3638	pmap_update(pmap_kernel());
3639	*loc = 1;
3640
3641	ref = pmap_is_referenced(pg);
3642	mod = pmap_is_modified(pg);
3643	printf("Modified page: ref %d, mod %d\n",
3644	       ref, mod);
3645
3646	/* Check pmap_pag_protect() */
3647	pmap_page_protect(pg, VM_PROT_NONE);
3648	ref = pmap_is_referenced(pg);
3649	mod = pmap_is_modified(pg);
3650	printf("pmap_protect(): ref %d, mod %d\n",
3651	       ref, mod);
3652
3653	/* Now clear reference and modify */
3654	ref = pmap_clear_reference(pg);
3655	mod = pmap_clear_modify(pg);
3656	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3657	       (void *)(u_long)va, (long)pa,
3658	       ref, mod);
3659
3660	/* Unmap page */
3661	pmap_remove(pmap_kernel(), va, va+1);
3662	pmap_update(pmap_kernel());
3663	ref = pmap_is_referenced(pg);
3664	mod = pmap_is_modified(pg);
3665	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
3666
3667	/* Now clear reference and modify */
3668	ref = pmap_clear_reference(pg);
3669	mod = pmap_clear_modify(pg);
3670	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3671	       (void *)(u_long)va, (long)pa, ref, mod);
3672
3673	/* Check it's properly cleared */
3674	ref = pmap_is_referenced(pg);
3675	mod = pmap_is_modified(pg);
3676	printf("Checking cleared page: ref %d, mod %d\n",
3677	       ref, mod);
3678
3679	pmap_remove(pmap_kernel(), va, va+1);
3680	pmap_update(pmap_kernel());
3681	pmap_free_page(pa, cpus_active);
3682}
3683#endif
3684
3685void
3686pmap_update(struct pmap *pmap)
3687{
3688
3689	if (pmap->pm_refs > 0) {
3690		return;
3691	}
3692	pmap->pm_refs = 1;
3693	pmap_activate_pmap(pmap);
3694}
3695
3696/*
3697 * pmap_copy_page()/pmap_zero_page()
3698 *
3699 * we make sure that the destination page is flushed from all D$'s
3700 * before we perform the copy/zero.
3701 */
3702extern int cold;
3703void
3704pmap_copy_page(paddr_t src, paddr_t dst)
3705{
3706
3707	if (!cold)
3708		dcache_flush_page_all(dst);
3709	pmap_copy_page_phys(src, dst);
3710}
3711
3712void
3713pmap_zero_page(paddr_t pa)
3714{
3715
3716	if (!cold)
3717		dcache_flush_page_all(pa);
3718	pmap_zero_page_phys(pa);
3719}
3720