1/*	$NetBSD$	*/
2
3/*-
4 * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and by Chris G. Demetriou.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1992, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * the Systems Programming Group of the University of Utah Computer
39 * Science Department and Ralph Campbell.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)pmap.c	8.4 (Berkeley) 1/26/94
66 */
67
68#include <sys/cdefs.h>
69
70__KERNEL_RCSID(0, "$NetBSD$");
71
72/*
73 *	Manages physical address maps.
74 *
75 *	In addition to hardware address maps, this
76 *	module is called upon to provide software-use-only
77 *	maps which may or may not be stored in the same
78 *	form as hardware maps.  These pseudo-maps are
79 *	used to store intermediate results from copy
80 *	operations to and from address spaces.
81 *
82 *	Since the information managed by this module is
83 *	also stored by the logical address mapping module,
84 *	this module may throw away valid virtual-to-physical
85 *	mappings at almost any time.  However, invalidations
86 *	of virtual-to-physical mappings must be done as
87 *	requested.
88 *
89 *	In order to cope with hardware architectures which
90 *	make virtual-to-physical map invalidates expensive,
91 *	this module may delay invalidate or reduced protection
92 *	operations until such time as they are actually
93 *	necessary.  This module is given full information as
94 *	to which processors are currently using which maps,
95 *	and to when physical maps must be made correct.
96 */
97
98/* XXX simonb 2002/02/26
99 *
100 * MIPS3_PLUS is used to conditionally compile the r4k MMU support.
101 * This is bogus - for example, some IDT MIPS-II CPUs have r4k style
102 * MMUs (and 32-bit ones at that).
103 *
104 * On the other hand, it's not likely that we'll ever support the R6000
105 * (is it?), so maybe that can be an "if MIPS2 or greater" check.
106 *
107 * Also along these lines are using totally separate functions for
108 * r3k-style and r4k-style MMUs and removing all the MIPS_HAS_R4K_MMU
109 * checks in the current functions.
110 *
111 * These warnings probably applies to other files under sys/arch/mips.
112 */
113
114#include "opt_sysv.h"
115#include "opt_cputype.h"
116#include "opt_multiprocessor.h"
117#include "opt_mips_cache.h"
118
119#ifdef MULTIPROCESSOR
120#define PMAP_NO_PV_UNCACHED
121#endif
122
123#include <sys/param.h>
124#include <sys/systm.h>
125#include <sys/proc.h>
126#include <sys/cpu.h>
127#include <sys/malloc.h>
128#include <sys/buf.h>
129#include <sys/pool.h>
130#include <sys/atomic.h>
131#include <sys/mutex.h>
132#include <sys/atomic.h>
133#ifdef SYSVSHM
134#include <sys/shm.h>
135#endif
136#include <sys/socketvar.h>	/* XXX: for sock_loan_thresh */
137
138#include <uvm/uvm.h>
139
140#include <mips/cache.h>
141#include <mips/cpuregs.h>
142#include <mips/locore.h>
143#include <mips/pte.h>
144
145CTASSERT(MIPS_KSEG0_START < 0);
146CTASSERT((intptr_t)MIPS_PHYS_TO_KSEG0(0x1000) < 0);
147CTASSERT(MIPS_KSEG1_START < 0);
148CTASSERT((intptr_t)MIPS_PHYS_TO_KSEG1(0x1000) < 0);
149CTASSERT(MIPS_KSEG2_START < 0);
150CTASSERT(MIPS_MAX_MEM_ADDR < 0);
151CTASSERT(MIPS_RESERVED_ADDR < 0);
152CTASSERT((uint32_t)MIPS_KSEG0_START == 0x80000000);
153CTASSERT((uint32_t)MIPS_KSEG1_START == 0xa0000000);
154CTASSERT((uint32_t)MIPS_KSEG2_START == 0xc0000000);
155CTASSERT((uint32_t)MIPS_MAX_MEM_ADDR == 0xbe000000);
156CTASSERT((uint32_t)MIPS_RESERVED_ADDR == 0xbfc80000);
157CTASSERT(MIPS_KSEG0_P(MIPS_PHYS_TO_KSEG0(0)));
158CTASSERT(MIPS_KSEG1_P(MIPS_PHYS_TO_KSEG1(0)));
159
160#define	PMAP_COUNT(name)	(pmap_evcnt_##name.ev_count++ + 0)
161#define PMAP_COUNTER(name, desc) \
162static struct evcnt pmap_evcnt_##name = \
163	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", desc); \
164EVCNT_ATTACH_STATIC(pmap_evcnt_##name)
165
166PMAP_COUNTER(remove_kernel_calls, "remove kernel calls");
167PMAP_COUNTER(remove_kernel_pages, "kernel pages unmapped");
168PMAP_COUNTER(remove_user_calls, "remove user calls");
169PMAP_COUNTER(remove_user_pages, "user pages unmapped");
170PMAP_COUNTER(remove_flushes, "remove cache flushes");
171PMAP_COUNTER(remove_tlb_ops, "remove tlb ops");
172PMAP_COUNTER(remove_pvfirst, "remove pv first");
173PMAP_COUNTER(remove_pvsearch, "remove pv search");
174
175PMAP_COUNTER(prefer_requests, "prefer requests");
176PMAP_COUNTER(prefer_adjustments, "prefer adjustments");
177
178PMAP_COUNTER(idlezeroed_pages, "pages idle zeroed");
179PMAP_COUNTER(zeroed_pages, "pages zeroed");
180PMAP_COUNTER(copied_pages, "pages copied");
181
182PMAP_COUNTER(kenter_pa, "kernel fast mapped pages");
183PMAP_COUNTER(kenter_pa_bad, "kernel fast mapped pages (bad color)");
184PMAP_COUNTER(kenter_pa_unmanaged, "kernel fast mapped unmanaged pages");
185PMAP_COUNTER(kremove_pages, "kernel fast unmapped pages");
186
187PMAP_COUNTER(page_cache_evictions, "pages changed to uncacheable");
188PMAP_COUNTER(page_cache_restorations, "pages changed to cacheable");
189
190PMAP_COUNTER(kernel_mappings_bad, "kernel pages mapped (bad color)");
191PMAP_COUNTER(user_mappings_bad, "user pages mapped (bad color)");
192PMAP_COUNTER(kernel_mappings, "kernel pages mapped");
193PMAP_COUNTER(user_mappings, "user pages mapped");
194PMAP_COUNTER(user_mappings_changed, "user mapping changed");
195PMAP_COUNTER(kernel_mappings_changed, "kernel mapping changed");
196PMAP_COUNTER(uncached_mappings, "uncached pages mapped");
197PMAP_COUNTER(unmanaged_mappings, "unmanaged pages mapped");
198PMAP_COUNTER(managed_mappings, "managed pages mapped");
199PMAP_COUNTER(mappings, "pages mapped");
200PMAP_COUNTER(remappings, "pages remapped");
201PMAP_COUNTER(unmappings, "pages unmapped");
202PMAP_COUNTER(primary_mappings, "page initial mappings");
203PMAP_COUNTER(primary_unmappings, "page final unmappings");
204PMAP_COUNTER(tlb_hit, "page mapping");
205
206PMAP_COUNTER(exec_mappings, "exec pages mapped");
207PMAP_COUNTER(exec_synced_mappings, "exec pages synced");
208PMAP_COUNTER(exec_synced_remove, "exec pages synced (PR)");
209PMAP_COUNTER(exec_synced_clear_modify, "exec pages synced (CM)");
210PMAP_COUNTER(exec_synced_page_protect, "exec pages synced (PP)");
211PMAP_COUNTER(exec_synced_protect, "exec pages synced (P)");
212PMAP_COUNTER(exec_uncached_page_protect, "exec pages uncached (PP)");
213PMAP_COUNTER(exec_uncached_clear_modify, "exec pages uncached (CM)");
214PMAP_COUNTER(exec_uncached_zero_page, "exec pages uncached (ZP)");
215PMAP_COUNTER(exec_uncached_copy_page, "exec pages uncached (CP)");
216PMAP_COUNTER(exec_uncached_remove, "exec pages uncached (PR)");
217
218PMAP_COUNTER(create, "creates");
219PMAP_COUNTER(reference, "references");
220PMAP_COUNTER(dereference, "dereferences");
221PMAP_COUNTER(destroy, "destroyed");
222PMAP_COUNTER(activate, "activations");
223PMAP_COUNTER(deactivate, "deactivations");
224PMAP_COUNTER(update, "updates");
225#ifdef MULTIPROCESSOR
226PMAP_COUNTER(shootdown_ipis, "shootdown IPIs");
227#endif
228PMAP_COUNTER(unwire, "unwires");
229PMAP_COUNTER(copy, "copies");
230PMAP_COUNTER(collect, "collects");
231PMAP_COUNTER(clear_modify, "clear_modifies");
232PMAP_COUNTER(protect, "protects");
233PMAP_COUNTER(page_protect, "page_protects");
234
235#define PDB_FOLLOW	0x0001
236#define PDB_INIT	0x0002
237#define PDB_ENTER	0x0004
238#define PDB_REMOVE	0x0008
239#define PDB_CREATE	0x0010
240#define PDB_PTPAGE	0x0020
241#define PDB_PVENTRY	0x0040
242#define PDB_BITS	0x0080
243#define PDB_COLLECT	0x0100
244#define PDB_PROTECT	0x0200
245#define PDB_TLBPID	0x0400
246#define PDB_PARANOIA	0x2000
247#define PDB_WIRING	0x4000
248#define PDB_PVDUMP	0x8000
249int pmapdebug = 0;
250
251#define PMAP_ASID_RESERVED 0
252
253CTASSERT(PMAP_ASID_RESERVED == 0);
254/*
255 * Initialize the kernel pmap.
256 */
257#ifdef MULTIPROCESSOR
258#define	PMAP_SIZE	offsetof(struct pmap, pm_pai[MAXCPUS])
259#else
260#define	PMAP_SIZE	sizeof(struct pmap)
261kmutex_t pmap_pvlist_mutex __aligned(COHERENCY_UNIT);
262#endif
263
264struct pmap_kernel kernel_pmap_store = {
265	.kernel_pmap = {
266		.pm_count = 1,
267		.pm_segtab = (void *)(MIPS_KSEG2_START + 0x1eadbeef),
268#ifdef MULTIPROCESSOR
269		.pm_active = 1,
270		.pm_onproc = 1,
271#endif
272	},
273};
274struct pmap * const kernel_pmap_ptr = &kernel_pmap_store.kernel_pmap;
275
276paddr_t mips_avail_start;	/* PA of first available physical page */
277paddr_t mips_avail_end;		/* PA of last available physical page */
278vaddr_t mips_virtual_end;	/* VA of last avail page (end of kernel AS) */
279vaddr_t iospace;		/* VA of start of I/O space, if needed  */
280vsize_t iospace_size = 0;	/* Size of (initial) range of I/O addresses */
281
282pt_entry_t	*Sysmap;		/* kernel pte table */
283unsigned int	Sysmapsize;		/* number of pte's in Sysmap */
284
285#ifdef PMAP_POOLPAGE_DEBUG
286struct poolpage_info {
287	vaddr_t base;
288	vaddr_t size;
289	vaddr_t hint;
290	pt_entry_t *sysmap;
291} poolpage;
292#endif
293
294static void pmap_pvlist_lock_init(void);
295
296/*
297 * The pools from which pmap structures and sub-structures are allocated.
298 */
299struct pool pmap_pmap_pool;
300struct pool pmap_pv_pool;
301
302#ifndef PMAP_PV_LOWAT
303#define	PMAP_PV_LOWAT	16
304#endif
305int		pmap_pv_lowat = PMAP_PV_LOWAT;
306
307bool		pmap_initialized = false;
308#define	PMAP_PAGE_COLOROK_P(a, b) \
309		((((int)(a) ^ (int)(b)) & pmap_page_colormask) == 0)
310u_int		pmap_page_colormask;
311
312#define PAGE_IS_MANAGED(pa)	(pmap_initialized && uvm_pageismanaged(pa))
313
314#define PMAP_IS_ACTIVE(pm)						\
315	((pm) == pmap_kernel() || 					\
316	 (pm) == curlwp->l_proc->p_vmspace->vm_map.pmap)
317
318/* Forward function declarations */
319void pmap_remove_pv(pmap_t, vaddr_t, struct vm_page *, bool);
320void pmap_enter_pv(pmap_t, vaddr_t, struct vm_page *, u_int *);
321pt_entry_t *pmap_pte(pmap_t, vaddr_t);
322
323/*
324 * PV table management functions.
325 */
326void	*pmap_pv_page_alloc(struct pool *, int);
327void	pmap_pv_page_free(struct pool *, void *);
328
329struct pool_allocator pmap_pv_page_allocator = {
330	pmap_pv_page_alloc, pmap_pv_page_free, 0,
331};
332
333#define	pmap_pv_alloc()		pool_get(&pmap_pv_pool, PR_NOWAIT)
334#define	pmap_pv_free(pv)	pool_put(&pmap_pv_pool, (pv))
335
336/*
337 * Misc. functions.
338 */
339
340static inline bool
341pmap_clear_mdpage_attributes(struct vm_page_md *md, u_int clear_attributes)
342{
343	volatile u_int * const attrp = &md->pvh_attrs;
344#ifdef MULTIPROCESSOR
345	for (;;) {
346		u_int old_attr = *attrp;
347		if ((old_attr & clear_attributes) == 0)
348			return false;
349		u_int new_attr = old_attr & ~clear_attributes;
350		if (old_attr == atomic_cas_uint(attrp, old_attr, new_attr))
351			return true;
352	}
353#else
354	u_int old_attr = *attrp;
355	if ((old_attr & clear_attributes) == 0)
356		return false;
357	*attrp &= ~clear_attributes;
358	return true;
359#endif
360}
361
362static inline void
363pmap_set_mdpage_attributes(struct vm_page_md *md, u_int set_attributes)
364{
365#ifdef MULTIPROCESSOR
366	atomic_or_uint(&md->pvh_attrs, set_attributes);
367#else
368	md->pvh_attrs |= set_attributes;
369#endif
370}
371
372static inline void
373pmap_page_syncicache(struct vm_page *pg)
374{
375	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
376#ifdef MULTIPROCESSOR
377	pv_entry_t pv = &md->pvh_first;
378	uint32_t onproc = 0;
379	(void)PG_MD_PVLIST_LOCK(md, false);
380	if (pv->pv_pmap != NULL) {
381		for (; pv != NULL; pv = pv->pv_next) {
382			onproc |= pv->pv_pmap->pm_onproc;
383			if (onproc == cpus_running)
384				break;
385		}
386	}
387	PG_MD_PVLIST_UNLOCK(md);
388	kpreempt_disable();
389	pmap_tlb_syncicache(md->pvh_first.pv_va, onproc);
390	kpreempt_enable();
391#else
392	if (MIPS_HAS_R4K_MMU) {
393		if (PG_MD_CACHED_P(md)) {
394			mips_icache_sync_range_index(
395			    md->pvh_first.pv_va, PAGE_SIZE);
396		}
397	} else {
398		mips_icache_sync_range(MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(pg)),
399		    PAGE_SIZE);
400	}
401#endif
402}
403
404static vaddr_t
405pmap_map_ephemeral_page(struct vm_page *pg, int prot, pt_entry_t *old_pt_entry_p)
406{
407	const paddr_t pa = VM_PAGE_TO_PHYS(pg);
408	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
409	pv_entry_t pv = &md->pvh_first;
410
411#ifdef _LP64
412	vaddr_t va = MIPS_PHYS_TO_XKPHYS_CACHED(pa);
413#else
414	vaddr_t va;
415	if (pa <= MIPS_PHYS_MASK) {
416		va = MIPS_PHYS_TO_KSEG0(pa);
417	} else {
418		KASSERT(pmap_initialized);
419		/*
420		 * Make sure to use a congruent mapping to the last mapped
421		 * address so we don't have to worry about virtual aliases.
422		 */
423		kpreempt_disable();
424		struct cpu_info * const ci = curcpu();
425
426		va = (prot & VM_PROT_WRITE ? ci->ci_pmap_dstbase : ci->ci_pmap_srcbase)
427		    + mips_cache_indexof(MIPS_CACHE_VIRTUAL_ALIAS ? pv->pv_va : pa);
428		*old_pt_entry_p = *kvtopte(va);
429		pmap_kenter_pa(va, pa, prot, 0);
430	}
431#endif /* _LP64 */
432	if (MIPS_CACHE_VIRTUAL_ALIAS) {
433		/*
434		 * If we are forced to use an incompatible alias, flush the
435		 * page from the cache so we will copy the correct contents.
436		 */
437		(void)PG_MD_PVLIST_LOCK(md, false);
438		if (PG_MD_CACHED_P(md)
439		    && mips_cache_badalias(pv->pv_va, va))
440			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
441		if (pv->pv_pmap == NULL)
442			pv->pv_va = va;
443		PG_MD_PVLIST_UNLOCK(md);
444	}
445
446	return va;
447}
448
449static void
450pmap_unmap_ephemeral_page(struct vm_page *pg, vaddr_t va,
451	pt_entry_t old_pt_entry)
452{
453	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
454	pv_entry_t pv = &md->pvh_first;
455
456	(void)PG_MD_PVLIST_LOCK(md, false);
457	if (MIPS_CACHE_VIRTUAL_ALIAS
458	    && (PG_MD_UNCACHED_P(md)
459		|| (pv->pv_pmap != NULL
460		    && mips_cache_badalias(pv->pv_va, va)))) {
461		/*
462		 * If this page was previously uncached or we had to use an
463		 * incompatible alias and it has a valid mapping, flush it
464		 * from the cache.
465		 */
466		mips_dcache_wbinv_range(va, PAGE_SIZE);
467	}
468	PG_MD_PVLIST_UNLOCK(md);
469#ifndef _LP64
470	/*
471	 * If we had to map using a page table entry, unmap it now.
472	 */
473	if (va >= VM_MIN_KERNEL_ADDRESS) {
474		pmap_kremove(va, PAGE_SIZE);
475		if (mips_pg_v(old_pt_entry.pt_entry)) {
476			*kvtopte(va) = old_pt_entry;
477			pmap_tlb_update_addr(pmap_kernel(), va,
478			    old_pt_entry.pt_entry, false);
479		}
480		kpreempt_enable();
481	}
482#endif
483}
484
485/*
486 *	Bootstrap the system enough to run with virtual memory.
487 *	firstaddr is the first unused kseg0 address (not page aligned).
488 */
489void
490pmap_bootstrap(void)
491{
492	vsize_t bufsz;
493
494	pmap_page_colormask = (uvmexp.ncolors -1) << PAGE_SHIFT;
495
496	pmap_tlb_info_init(&pmap_tlb0_info);		/* init the lock */
497
498	/*
499	 * Compute the number of pages kmem_arena will have.
500	 */
501	kmeminit_nkmempages();
502
503	/*
504	 * Figure out how many PTE's are necessary to map the kernel.
505	 * We also reserve space for kmem_alloc_pageable() for vm_fork().
506	 */
507
508	/* Get size of buffer cache and set an upper limit */
509	buf_setvalimit((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 8);
510	bufsz = buf_memcalc();
511	buf_setvalimit(bufsz);
512
513	Sysmapsize = (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
514	    bufsz + 16 * NCARGS + pager_map_size + iospace_size) / NBPG +
515	    (maxproc * UPAGES) + nkmempages;
516#ifdef DEBUG
517	{
518		extern int kmem_guard_depth;
519		Sysmapsize += kmem_guard_depth;
520	}
521#endif
522
523#ifdef SYSVSHM
524	Sysmapsize += shminfo.shmall;
525#endif
526#ifdef KSEG2IOBUFSIZE
527	Sysmapsize += (KSEG2IOBUFSIZE >> PGSHIFT);
528#endif
529#ifdef PMAP_POOLPAGE_DEBUG
530	poolpage.size = nkmempages + MCLBYTES * nmbclusters;
531	Sysmapsize += poolpage.size;
532#endif
533#ifdef _LP64
534	/*
535	 * If we are using tmpfs, then we might want to use a great deal of
536	 * our memory with it.  Make sure we have enough VM to do that.
537	 */
538	Sysmapsize += physmem;
539#else
540	/* XXX: else runs out of space on 256MB sbmips!! */
541	Sysmapsize += 20000;
542#endif
543	/* Rounup to a even number of pte page tables */
544	Sysmapsize = (Sysmapsize + NPTEPG - 1) & -NPTEPG;
545
546	/*
547	 * Initialize `FYI' variables.	Note we're relying on
548	 * the fact that BSEARCH sorts the vm_physmem[] array
549	 * for us.  Must do this before uvm_pageboot_alloc()
550	 * can be called.
551	 */
552	mips_avail_start = ptoa(VM_PHYSMEM_PTR(0)->start);
553	mips_avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end);
554	mips_virtual_end = VM_MIN_KERNEL_ADDRESS + (vaddr_t)Sysmapsize * NBPG;
555
556#ifndef _LP64
557	/* Need space for I/O (not in K1SEG) ? */
558
559	if (mips_virtual_end > VM_MAX_KERNEL_ADDRESS) {
560		mips_virtual_end = VM_MAX_KERNEL_ADDRESS;
561		Sysmapsize =
562		    (VM_MAX_KERNEL_ADDRESS -
563		     (VM_MIN_KERNEL_ADDRESS + iospace_size)) / NBPG;
564	}
565
566	if (iospace_size) {
567		iospace = mips_virtual_end - iospace_size;
568#ifdef DEBUG
569		printf("io: %#"PRIxVADDR".%#"PRIxVADDR" %#"PRIxVADDR"\n",
570		    iospace, iospace_size, mips_virtual_end);
571#endif
572	}
573#endif
574	pmap_pvlist_lock_init();
575
576	/*
577	 * Now actually allocate the kernel PTE array (must be done
578	 * after virtual_end is initialized).
579	 */
580	Sysmap = (pt_entry_t *)
581	    uvm_pageboot_alloc(sizeof(pt_entry_t) * Sysmapsize);
582
583#ifdef PMAP_POOLPAGE_DEBUG
584	mips_virtual_end -= poolpage.size;
585	poolpage.base = mips_virtual_end;
586	poolpage.sysmap = Sysmap + atop(poolpage.size);
587#endif
588	/*
589	 * Initialize the pools.
590	 */
591	pool_init(&pmap_pmap_pool, PMAP_SIZE, 0, 0, 0, "pmappl",
592	    &pool_allocator_nointr, IPL_NONE);
593	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl",
594	    &pmap_pv_page_allocator, IPL_NONE);
595
596	tlb_set_asid(0);
597
598#ifdef MIPS3_PLUS	/* XXX mmu XXX */
599	/*
600	 * The R4?00 stores only one copy of the Global bit in the
601	 * translation lookaside buffer for each 2 page entry.
602	 * Thus invalid entries must have the Global bit set so
603	 * when Entry LO and Entry HI G bits are anded together
604	 * they will produce a global bit to store in the tlb.
605	 */
606	if (MIPS_HAS_R4K_MMU) {
607		u_int i;
608		pt_entry_t *spte;
609
610		for (i = 0, spte = Sysmap; i < Sysmapsize; i++, spte++)
611			spte->pt_entry = MIPS3_PG_G;
612	}
613#endif	/* MIPS3_PLUS */
614}
615
616/*
617 * Define the initial bounds of the kernel virtual address space.
618 */
619void
620pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
621{
622
623	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
624	*vendp = trunc_page(mips_virtual_end);	/* XXX need pmap_growkernel() */
625}
626
627/*
628 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
629 * This function allows for early dynamic memory allocation until the virtual
630 * memory system has been bootstrapped.  After that point, either kmem_alloc
631 * or malloc should be used.  This function works by stealing pages from the
632 * (to be) managed page pool, then implicitly mapping the pages (by using
633 * their k0seg addresses) and zeroing them.
634 *
635 * It may be used once the physical memory segments have been pre-loaded
636 * into the vm_physmem[] array.  Early memory allocation MUST use this
637 * interface!  This cannot be used after vm_page_startup(), and will
638 * generate a panic if tried.
639 *
640 * Note that this memory will never be freed, and in essence it is wired
641 * down.
642 *
643 * We must adjust *vstartp and/or *vendp iff we use address space
644 * from the kernel virtual address range defined by pmap_virtual_space().
645 */
646vaddr_t
647pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
648{
649	u_int npgs;
650	paddr_t pa;
651	vaddr_t va;
652
653	size = round_page(size);
654	npgs = atop(size);
655
656	for (u_int bank = 0; bank < vm_nphysseg; bank++) {
657		struct vm_physseg * const seg = VM_PHYSMEM_PTR(bank);
658		if (uvm.page_init_done == true)
659			panic("pmap_steal_memory: called _after_ bootstrap");
660
661		printf("%s: seg %u: %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR" %#"PRIxPADDR"\n",
662		    __func__, bank,
663		    seg->avail_start, seg->start,
664		    seg->avail_end, seg->end);
665
666		if (seg->avail_start != seg->start
667		    || seg->avail_start >= seg->avail_end) {
668			printf("%s: seg %u: bad start\n", __func__, bank);
669			continue;
670		}
671
672		if (seg->avail_end - seg->avail_start < npgs) {
673			printf("%s: seg %u: too small for %u pages\n",
674			    __func__, bank, npgs);
675			continue;
676		}
677
678		/*
679		 * There are enough pages here; steal them!
680		 */
681		pa = ptoa(seg->avail_start);
682		seg->avail_start += npgs;
683		seg->start += npgs;
684
685		/*
686		 * Have we used up this segment?
687		 */
688		if (seg->avail_start == seg->end) {
689			if (vm_nphysseg == 1)
690				panic("pmap_steal_memory: out of memory!");
691
692			/* Remove this segment from the list. */
693			vm_nphysseg--;
694			for (u_int x = bank; x < vm_nphysseg; x++) {
695				/* structure copy */
696				VM_PHYSMEM_PTR_SWAP(x, x + 1);
697			}
698		}
699
700#ifdef _LP64
701		/*
702		 * Use the same CCA as used to access KSEG0 for XKPHYS.
703		 */
704		uint32_t v;
705
706		__asm __volatile("mfc0 %0,$%1"
707		    : "=r"(v)
708		    : "n"(MIPS_COP_0_CONFIG));
709
710		va = MIPS_PHYS_TO_XKPHYS(v & MIPS3_CONFIG_K0_MASK, pa);
711#else
712		if (pa + size > MIPS_PHYS_MASK + 1)
713			panic("pmap_steal_memory: pa %"PRIxPADDR
714			    " can not be mapped into KSEG0", pa);
715		va = MIPS_PHYS_TO_KSEG0(pa);
716#endif
717
718		memset((void *)va, 0, size);
719		return va;
720	}
721
722	/*
723	 * If we got here, there was no memory left.
724	 */
725	panic("pmap_steal_memory: no memory to steal");
726}
727
728/*
729 *	Initialize the pmap module.
730 *	Called by vm_init, to initialize any structures that the pmap
731 *	system needs to map virtual memory.
732 */
733void
734pmap_init(void)
735{
736#ifdef DEBUG
737	if (pmapdebug & (PDB_FOLLOW|PDB_INIT))
738		printf("pmap_init()\n");
739#endif
740
741	/*
742	 * Initialize the segtab lock.
743	 */
744	mutex_init(&pmap_segtab_lock, MUTEX_DEFAULT, IPL_HIGH);
745
746	/*
747	 * Set a low water mark on the pv_entry pool, so that we are
748	 * more likely to have these around even in extreme memory
749	 * starvation.
750	 */
751	pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
752
753	/*
754	 * Now it is safe to enable pv entry recording.
755	 */
756	pmap_initialized = true;
757
758#ifndef _LP64
759	/*
760	 * If we have more memory than can be mapped by KSEG0, we need to
761	 * allocate enough VA so we can map pages with the right color
762	 * (to avoid cache alias problems).
763	 */
764	if (mips_avail_end > MIPS_KSEG1_START - MIPS_KSEG0_START) {
765		curcpu()->ci_pmap_dstbase = uvm_km_alloc(kernel_map,
766		    uvmexp.ncolors * PAGE_SIZE, 0, UVM_KMF_VAONLY);
767		KASSERT(curcpu()->ci_pmap_dstbase);
768		curcpu()->ci_pmap_srcbase = uvm_km_alloc(kernel_map,
769		    uvmexp.ncolors * PAGE_SIZE, 0, UVM_KMF_VAONLY);
770		KASSERT(curcpu()->ci_pmap_srcbase);
771	}
772#endif
773
774#ifdef MIPS3
775	if (MIPS_HAS_R4K_MMU) {
776		/*
777		 * XXX
778		 * Disable sosend_loan() in src/sys/kern/uipc_socket.c
779		 * on MIPS3 CPUs to avoid possible virtual cache aliases
780		 * and uncached mappings in pmap_enter_pv().
781		 *
782		 * Ideally, read only shared mapping won't cause aliases
783		 * so pmap_enter_pv() should handle any shared read only
784		 * mappings without uncached ops like ARM pmap.
785		 *
786		 * On the other hand, R4000 and R4400 have the virtual
787		 * coherency exceptions which will happen even on read only
788		 * mappings, so we always have to disable sosend_loan()
789		 * on such CPUs.
790		 */
791		sock_loan_thresh = -1;
792	}
793#endif
794}
795
796/*
797 *	Create and return a physical map.
798 *
799 *	If the size specified for the map
800 *	is zero, the map is an actual physical
801 *	map, and may be referenced by the
802 *	hardware.
803 *
804 *	If the size specified is non-zero,
805 *	the map will be used in software only, and
806 *	is bounded by that size.
807 */
808pmap_t
809pmap_create(void)
810{
811	pmap_t pmap;
812
813	PMAP_COUNT(create);
814#ifdef DEBUG
815	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
816		printf("pmap_create()\n");
817#endif
818
819	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
820	memset(pmap, 0, PMAP_SIZE);
821
822	pmap->pm_count = 1;
823
824	pmap_segtab_init(pmap);
825
826	return pmap;
827}
828
829/*
830 *	Retire the given physical map from service.
831 *	Should only be called if the map contains
832 *	no valid mappings.
833 */
834void
835pmap_destroy(pmap_t pmap)
836{
837#ifdef DEBUG
838	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
839		printf("pmap_destroy(%p)\n", pmap);
840#endif
841	if (atomic_dec_uint_nv(&pmap->pm_count) > 0) {
842		PMAP_COUNT(dereference);
843		return;
844	}
845
846	KASSERT(pmap->pm_count == 0);
847	PMAP_COUNT(destroy);
848	kpreempt_disable();
849	pmap_tlb_asid_release_all(pmap);
850	pmap_segtab_destroy(pmap);
851
852	pool_put(&pmap_pmap_pool, pmap);
853	kpreempt_enable();
854}
855
856/*
857 *	Add a reference to the specified pmap.
858 */
859void
860pmap_reference(pmap_t pmap)
861{
862
863#ifdef DEBUG
864	if (pmapdebug & PDB_FOLLOW)
865		printf("pmap_reference(%p)\n", pmap);
866#endif
867	if (pmap != NULL) {
868		atomic_inc_uint(&pmap->pm_count);
869	}
870	PMAP_COUNT(reference);
871}
872
873/*
874 *	Make a new pmap (vmspace) active for the given process.
875 */
876void
877pmap_activate(struct lwp *l)
878{
879	pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
880
881	PMAP_COUNT(activate);
882
883	kpreempt_disable();
884	pmap_tlb_asid_acquire(pmap, l);
885	if (l == curlwp) {
886		pmap_segtab_activate(pmap, l);
887	}
888	kpreempt_enable();
889}
890
891/*
892 *	Make a previously active pmap (vmspace) inactive.
893 */
894void
895pmap_deactivate(struct lwp *l)
896{
897	PMAP_COUNT(deactivate);
898
899	kpreempt_disable();
900	KASSERT(l == curlwp || l->l_cpu == curlwp->l_cpu);
901#ifdef _LP64
902	curcpu()->ci_pmap_segtab = (void *)(MIPS_KSEG2_START + 0x1eadbeef);
903	curcpu()->ci_pmap_seg0tab = NULL;
904#else
905	curcpu()->ci_pmap_seg0tab = (void *)(MIPS_KSEG2_START + 0x1eadbeef);
906#endif
907	pmap_tlb_asid_deactivate(l->l_proc->p_vmspace->vm_map.pmap);
908	kpreempt_enable();
909}
910
911void
912pmap_update(struct pmap *pm)
913{
914	PMAP_COUNT(update);
915
916	kpreempt_disable();
917#ifdef MULTIPROCESSOR
918	u_int pending = atomic_swap_uint(&pm->pm_shootdown_pending, 0);
919	if (pending && pmap_tlb_shootdown_bystanders(pm))
920		PMAP_COUNT(shootdown_ipis);
921#endif
922	/*
923	 * If pmap_remove_all was called, we deactivated ourselves and nuked
924	 * our ASID.  Now we have to reactivate ourselves.
925	 */
926	if (__predict_false(pm->pm_flags & PMAP_DEFERRED_ACTIVATE)) {
927		pm->pm_flags ^= PMAP_DEFERRED_ACTIVATE;
928		pmap_tlb_asid_acquire(pm, curlwp);
929		pmap_segtab_activate(pm, curlwp);
930	}
931	kpreempt_enable();
932}
933
934/*
935 *	Remove the given range of addresses from the specified map.
936 *
937 *	It is assumed that the start and end are properly
938 *	rounded to the page size.
939 */
940
941static bool
942pmap_pte_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *pte,
943	uintptr_t flags)
944{
945#ifdef DEBUG
946	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) {
947		printf("%s: %p, %"PRIxVADDR", %"PRIxVADDR", %p, %"PRIxPTR"\n",
948		   __func__, pmap, sva, eva, pte, flags);
949	}
950#endif
951	KASSERT(kpreempt_disabled());
952
953	for (; sva < eva; sva += NBPG, pte++) {
954		struct vm_page *pg;
955		uint32_t pt_entry = pte->pt_entry;
956		if (!mips_pg_v(pt_entry))
957			continue;
958		PMAP_COUNT(remove_user_pages);
959		if (mips_pg_wired(pt_entry))
960			pmap->pm_stats.wired_count--;
961		pmap->pm_stats.resident_count--;
962		pg = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pt_entry));
963		if (pg) {
964			pmap_remove_pv(pmap, sva, pg,
965			   pt_entry & mips_pg_m_bit());
966		}
967		pte->pt_entry = mips_pg_nv_bit();
968		/*
969		 * Flush the TLB for the given address.
970		 */
971		pmap_tlb_invalidate_addr(pmap, sva);
972	}
973	return false;
974}
975
976void
977pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
978{
979	struct vm_page *pg;
980
981#ifdef DEBUG
982	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
983		printf("pmap_remove(%p, %#"PRIxVADDR", %#"PRIxVADDR")\n", pmap, sva, eva);
984#endif
985
986	kpreempt_disable();
987	if (pmap == pmap_kernel()) {
988		/* remove entries from kernel pmap */
989		PMAP_COUNT(remove_kernel_calls);
990#ifdef PARANOIADIAG
991		if (sva < VM_MIN_KERNEL_ADDRESS || eva >= mips_virtual_end)
992			panic("pmap_remove: kva not in range");
993#endif
994		pt_entry_t *pte = kvtopte(sva);
995		for (; sva < eva; sva += NBPG, pte++) {
996			uint32_t pt_entry = pte->pt_entry;
997			if (!mips_pg_v(pt_entry))
998				continue;
999			PMAP_COUNT(remove_kernel_pages);
1000			if (mips_pg_wired(pt_entry))
1001				pmap->pm_stats.wired_count--;
1002			pmap->pm_stats.resident_count--;
1003			pg = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pt_entry));
1004			if (pg)
1005				pmap_remove_pv(pmap, sva, pg, false);
1006			if (MIPS_HAS_R4K_MMU)
1007				/* See above about G bit */
1008				pte->pt_entry = MIPS3_PG_NV | MIPS3_PG_G;
1009			else
1010				pte->pt_entry = MIPS1_PG_NV;
1011
1012			/*
1013			 * Flush the TLB for the given address.
1014			 */
1015			pmap_tlb_invalidate_addr(pmap, sva);
1016		}
1017		kpreempt_enable();
1018		return;
1019	}
1020
1021	PMAP_COUNT(remove_user_calls);
1022#ifdef PARANOIADIAG
1023	if (eva > VM_MAXUSER_ADDRESS)
1024		panic("pmap_remove: uva not in range");
1025	if (PMAP_IS_ACTIVE(pmap)) {
1026		struct pmap_asid_info * const pai = PMAP_PAI(pmap, curcpu());
1027		uint32_t asid;
1028
1029		__asm volatile("mfc0 %0,$10; nop" : "=r"(asid));
1030		asid = (MIPS_HAS_R4K_MMU) ? (asid & 0xff) : (asid & 0xfc0) >> 6;
1031		if (asid != pai->pai_asid) {
1032			panic("inconsistency for active TLB flush: %d <-> %d",
1033			    asid, pai->pai_asid);
1034		}
1035	}
1036#endif
1037#ifdef PMAP_FAULTINFO
1038	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
1039	curpcb->pcb_faultinfo.pfi_repeats = 0;
1040	curpcb->pcb_faultinfo.pfi_faultpte = NULL;
1041#endif
1042	pmap_pte_process(pmap, sva, eva, pmap_pte_remove, 0);
1043	kpreempt_enable();
1044}
1045
1046/*
1047 *	pmap_page_protect:
1048 *
1049 *	Lower the permission for all mappings to a given page.
1050 */
1051void
1052pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1053{
1054	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1055	pv_entry_t pv;
1056	vaddr_t va;
1057
1058	PMAP_COUNT(page_protect);
1059#ifdef DEBUG
1060	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1061	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1062		printf("pmap_page_protect(%#"PRIxPADDR", %x)\n",
1063		    VM_PAGE_TO_PHYS(pg), prot);
1064#endif
1065	switch (prot) {
1066	case VM_PROT_READ|VM_PROT_WRITE:
1067	case VM_PROT_ALL:
1068		break;
1069
1070	/* copy_on_write */
1071	case VM_PROT_READ:
1072	case VM_PROT_READ|VM_PROT_EXECUTE:
1073		(void)PG_MD_PVLIST_LOCK(md, false);
1074		pv = &md->pvh_first;
1075		/*
1076		 * Loop over all current mappings setting/clearing as apropos.
1077		 */
1078		if (pv->pv_pmap != NULL) {
1079			while (pv != NULL) {
1080				const pmap_t pmap = pv->pv_pmap;
1081				const uint16_t gen = PG_MD_PVLIST_GEN(md);
1082				va = pv->pv_va;
1083				PG_MD_PVLIST_UNLOCK(md);
1084				pmap_protect(pmap, va, va + PAGE_SIZE, prot);
1085				KASSERT(pv->pv_pmap == pmap);
1086				pmap_update(pmap);
1087				if (gen != PG_MD_PVLIST_LOCK(md, false)) {
1088					pv = &md->pvh_first;
1089				} else {
1090					pv = pv->pv_next;
1091				}
1092			}
1093		}
1094		PG_MD_PVLIST_UNLOCK(md);
1095		break;
1096
1097	/* remove_all */
1098	default:
1099		/*
1100		 * Do this first so that for each unmapping, pmap_remove_pv
1101		 * won't try to sync the icache.
1102		 */
1103		if (pmap_clear_mdpage_attributes(md, PG_MD_EXECPAGE)) {
1104			PMAP_COUNT(exec_uncached_page_protect);
1105		}
1106		(void)PG_MD_PVLIST_LOCK(md, false);
1107		pv = &md->pvh_first;
1108		while (pv->pv_pmap != NULL) {
1109			const pmap_t pmap = pv->pv_pmap;
1110			va = pv->pv_va;
1111			PG_MD_PVLIST_UNLOCK(md);
1112			pmap_remove(pmap, va, va + PAGE_SIZE);
1113			pmap_update(pmap);
1114			(void)PG_MD_PVLIST_LOCK(md, false);
1115		}
1116		PG_MD_PVLIST_UNLOCK(md);
1117	}
1118}
1119
1120static bool
1121pmap_pte_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, pt_entry_t *pte,
1122	uintptr_t flags)
1123{
1124	const uint32_t pg_mask = ~(mips_pg_m_bit() | mips_pg_ro_bit());
1125	const uint32_t p = (flags & VM_PROT_WRITE) ? mips_pg_rw_bit() : mips_pg_ro_bit();
1126	KASSERT(kpreempt_disabled());
1127	/*
1128	 * Change protection on every valid mapping within this segment.
1129	 */
1130	for (; sva < eva; sva += NBPG, pte++) {
1131		uint32_t pt_entry = pte->pt_entry;
1132		if (!mips_pg_v(pt_entry))
1133			continue;
1134		struct vm_page *pg;
1135		pg = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pt_entry));
1136		if (pg && (pt_entry & mips_pg_m_bit())) {
1137			struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1138			if (MIPS_HAS_R4K_MMU
1139			    && MIPS_CACHE_VIRTUAL_ALIAS
1140			    && PG_MD_CACHED_P(md))
1141				mips_dcache_wbinv_range_index(sva, PAGE_SIZE);
1142			if (PG_MD_EXECPAGE_P(md)) {
1143				KASSERT(md->pvh_first.pv_pmap != NULL);
1144				if (PG_MD_CACHED_P(md)) {
1145					pmap_page_syncicache(pg);
1146					PMAP_COUNT(exec_synced_protect);
1147				}
1148			}
1149		}
1150		pt_entry = (pt_entry & pg_mask) | p;
1151		pte->pt_entry = pt_entry;
1152		/*
1153		 * Update the TLB if needed.
1154		 */
1155		pmap_tlb_update_addr(pmap, sva, pt_entry, true);
1156	}
1157	return false;
1158}
1159
1160/*
1161 *	Set the physical protection on the
1162 *	specified range of this map as requested.
1163 */
1164void
1165pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1166{
1167	const uint32_t pg_mask = ~(mips_pg_m_bit() | mips_pg_ro_bit());
1168	pt_entry_t *pte;
1169	u_int p;
1170
1171	PMAP_COUNT(protect);
1172#ifdef DEBUG
1173	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1174		printf("pmap_protect(%p, %#"PRIxVADDR", %#"PRIxVADDR", %x)\n",
1175		    pmap, sva, eva, prot);
1176#endif
1177	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1178		pmap_remove(pmap, sva, eva);
1179		return;
1180	}
1181
1182	p = (prot & VM_PROT_WRITE) ? mips_pg_rw_bit() : mips_pg_ro_bit();
1183
1184	kpreempt_disable();
1185	if (pmap == pmap_kernel()) {
1186		/*
1187		 * Change entries in kernel pmap.
1188		 * This will trap if the page is writable (in order to set
1189		 * the dirty bit) even if the dirty bit is already set. The
1190		 * optimization isn't worth the effort since this code isn't
1191		 * executed much. The common case is to make a user page
1192		 * read-only.
1193		 */
1194#ifdef PARANOIADIAG
1195		if (sva < VM_MIN_KERNEL_ADDRESS || eva >= mips_virtual_end)
1196			panic("pmap_protect: kva not in range");
1197#endif
1198		pte = kvtopte(sva);
1199		for (; sva < eva; sva += NBPG, pte++) {
1200			uint32_t pt_entry = pte->pt_entry;
1201			if (!mips_pg_v(pt_entry))
1202				continue;
1203			if (MIPS_HAS_R4K_MMU && (pt_entry & mips_pg_m_bit()))
1204				mips_dcache_wb_range(sva, PAGE_SIZE);
1205			pt_entry &= (pt_entry & pg_mask) | p;
1206			pte->pt_entry = pt_entry;
1207			pmap_tlb_update_addr(pmap, sva, pt_entry, true);
1208		}
1209		kpreempt_enable();
1210		return;
1211	}
1212
1213#ifdef PARANOIADIAG
1214	if (eva > VM_MAXUSER_ADDRESS)
1215		panic("pmap_protect: uva not in range");
1216	if (PMAP_IS_ACTIVE(pmap)) {
1217		struct pmap_asid_info * const pai = PMAP_PAI(pmap, curcpu());
1218		uint32_t asid;
1219
1220		__asm volatile("mfc0 %0,$10; nop" : "=r"(asid));
1221		asid = (MIPS_HAS_R4K_MMU) ? (asid & 0xff) : (asid & 0xfc0) >> 6;
1222		if (asid != pai->pai_asid) {
1223			panic("inconsistency for active TLB update: %d <-> %d",
1224			    asid, pai->pai_asid);
1225		}
1226	}
1227#endif
1228
1229	/*
1230	 * Change protection on every valid mapping within this segment.
1231	 */
1232	pmap_pte_process(pmap, sva, eva, pmap_pte_protect, p);
1233	kpreempt_enable();
1234}
1235
1236/*
1237 * XXXJRT -- need a version for each cache type.
1238 */
1239void
1240pmap_procwr(struct proc *p, vaddr_t va, size_t len)
1241{
1242#ifdef MIPS1
1243	pmap_t pmap;
1244
1245	pmap = p->p_vmspace->vm_map.pmap;
1246#endif /* MIPS1 */
1247
1248	if (MIPS_HAS_R4K_MMU) {
1249#ifdef MIPS3_PLUS	/* XXX mmu XXX */
1250		/*
1251		 * XXX
1252		 * shouldn't need to do this for physical d$?
1253		 * should need to do this for virtual i$ if prot == EXEC?
1254		 */
1255		if (p == curlwp->l_proc
1256		    && mips_cache_info.mci_pdcache_way_mask < PAGE_SIZE)
1257		    /* XXX check icache mask too? */
1258			mips_icache_sync_range(va, len);
1259		else
1260			mips_icache_sync_range_index(va, len);
1261#endif /* MIPS3_PLUS */	/* XXX mmu XXX */
1262	} else {
1263#ifdef MIPS1
1264		pt_entry_t *pte;
1265		unsigned entry;
1266
1267		kpreempt_disable();
1268		if (pmap == pmap_kernel()) {
1269			pte = kvtopte(va);
1270		} else {
1271			pte = pmap_pte_lookup(pmap, va);
1272		}
1273		entry = pte->pt_entry;
1274		kpreempt_enable();
1275		if (!mips_pg_v(entry))
1276			return;
1277
1278		/*
1279		 * XXXJRT -- Wrong -- since page is physically-indexed, we
1280		 * XXXJRT need to loop.
1281		 */
1282		mips_icache_sync_range(
1283		    MIPS_PHYS_TO_KSEG0(mips1_tlbpfn_to_paddr(entry)
1284		    + (va & PGOFSET)),
1285		    len);
1286#endif /* MIPS1 */
1287	}
1288}
1289
1290/*
1291 *	Return RO protection of page.
1292 */
1293bool
1294pmap_is_page_ro_p(pmap_t pmap, vaddr_t va, uint32_t entry)
1295{
1296
1297	return (entry & mips_pg_ro_bit()) != 0;
1298}
1299
1300#if defined(MIPS3_PLUS) && !defined(MIPS3_NO_PV_UNCACHED)	/* XXX mmu XXX */
1301/*
1302 *	pmap_page_cache:
1303 *
1304 *	Change all mappings of a managed page to cached/uncached.
1305 */
1306static void
1307pmap_page_cache(struct vm_page *pg, bool cached)
1308{
1309	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1310	const uint32_t newmode = cached ? MIPS3_PG_CACHED : MIPS3_PG_UNCACHED;
1311
1312	KASSERT(kpreempt_disabled());
1313#ifdef DEBUG
1314	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1315		printf("pmap_page_uncache(%#"PRIxPADDR")\n", VM_PAGE_TO_PHYS(pg));
1316#endif
1317
1318	if (cached) {
1319		pmap_clear_mdpage_attributes(md, PG_MD_UNCACHED);
1320		PMAP_COUNT(page_cache_restorations);
1321	} else {
1322		pmap_set_mdpage_attributes(md, PG_MD_UNCACHED);
1323		PMAP_COUNT(page_cache_evictions);
1324	}
1325
1326	KASSERT(PG_MD_PVLIST_LOCKED_P(md));
1327	KASSERT(kpreempt_disabled());
1328	for (pv_entry_t pv = &md->pvh_first;
1329	     pv != NULL;
1330	     pv = pv->pv_next) {
1331		pmap_t pmap = pv->pv_pmap;
1332		vaddr_t va = pv->pv_va;
1333		pt_entry_t *pte;
1334		uint32_t pt_entry;
1335
1336		KASSERT(pmap != NULL);
1337		KASSERT(!MIPS_KSEG0_P(va));
1338		KASSERT(!MIPS_KSEG1_P(va));
1339#ifdef _LP64
1340		KASSERT(!MIPS_XKPHYS_P(va));
1341#endif
1342		if (pmap == pmap_kernel()) {
1343			/*
1344			 * Change entries in kernel pmap.
1345			 */
1346			pte = kvtopte(va);
1347		} else {
1348			pte = pmap_pte_lookup(pmap, va);
1349			if (pte == NULL)
1350				continue;
1351		}
1352		pt_entry = pte->pt_entry;
1353		if (pt_entry & MIPS3_PG_V) {
1354			pt_entry = (pt_entry & ~MIPS3_PG_CACHEMODE) | newmode;
1355			pte->pt_entry = pt_entry;
1356			pmap_tlb_update_addr(pmap, va, pt_entry, true);
1357		}
1358	}
1359}
1360#endif	/* MIPS3_PLUS && !MIPS3_NO_PV_UNCACHED */
1361
1362/*
1363 *	Insert the given physical page (p) at
1364 *	the specified virtual address (v) in the
1365 *	target physical map with the protection requested.
1366 *
1367 *	If specified, the page will be wired down, meaning
1368 *	that the related pte can not be reclaimed.
1369 *
1370 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1371 *	or lose information.  That is, this routine must actually
1372 *	insert this page into the given map NOW.
1373 */
1374int
1375pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1376{
1377	pt_entry_t *pte;
1378	u_int npte;
1379	struct vm_page *pg;
1380	bool wired = (flags & PMAP_WIRED) != 0;
1381#if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64)
1382	bool cached = true;
1383	bool prefetch = false;
1384#endif
1385
1386#ifdef DEBUG
1387	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1388		printf("pmap_enter(%p, %#"PRIxVADDR", %#"PRIxPADDR", %x, %x)\n",
1389		    pmap, va, pa, prot, wired);
1390#endif
1391	const bool good_color = PMAP_PAGE_COLOROK_P(pa, va);
1392	if (pmap == pmap_kernel()) {
1393		PMAP_COUNT(kernel_mappings);
1394		if (!good_color)
1395			PMAP_COUNT(kernel_mappings_bad);
1396#if defined(DEBUG) || defined(DIAGNOSTIC) || defined(PARANOIADIAG)
1397		if (va < VM_MIN_KERNEL_ADDRESS || va >= mips_virtual_end)
1398			panic("pmap_enter: kva %#"PRIxVADDR"too big", va);
1399#endif
1400	} else {
1401		PMAP_COUNT(user_mappings);
1402		if (!good_color)
1403			PMAP_COUNT(user_mappings_bad);
1404#if defined(DEBUG) || defined(DIAGNOSTIC) || defined(PARANOIADIAG)
1405		if (va >= VM_MAXUSER_ADDRESS)
1406			panic("pmap_enter: uva %#"PRIxVADDR" too big", va);
1407#endif
1408	}
1409#ifdef PARANOIADIAG
1410#if defined(cobalt) || defined(newsmips) || defined(pmax) /* otherwise ok */
1411	if (pa & 0x80000000)	/* this is not error in general. */
1412		panic("pmap_enter: pa");
1413#endif
1414
1415	if (!(prot & VM_PROT_READ))
1416		panic("pmap_enter: prot");
1417#endif
1418
1419#if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64)
1420	if (flags & PMAP_NOCACHE)
1421		cached = 0;
1422
1423	if (pa & PGC_NOCACHE) {
1424		cached = false;
1425		pa &= ~PGC_NOCACHE;
1426	}
1427	if (pa & PGC_PREFETCH) {
1428		prefetch = true;
1429		pa &= ~PGC_PREFETCH;
1430	}
1431#endif
1432	pg = PHYS_TO_VM_PAGE(pa);
1433
1434	if (pg) {
1435		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1436
1437		/* Set page referenced/modified status based on flags */
1438		if (flags & VM_PROT_WRITE)
1439			pmap_set_mdpage_attributes(md, PG_MD_MODIFIED|PG_MD_REFERENCED);
1440		else if (flags & VM_PROT_ALL)
1441			pmap_set_mdpage_attributes(md, PG_MD_REFERENCED);
1442		if (!(prot & VM_PROT_WRITE))
1443			/*
1444			 * If page is not yet referenced, we could emulate this
1445			 * by not setting the page valid, and setting the
1446			 * referenced status in the TLB fault handler, similar
1447			 * to how page modified status is done for UTLBmod
1448			 * exceptions.
1449			 */
1450			npte = mips_pg_ropage_bit();
1451		else {
1452#if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64)
1453			if (cached == false) {
1454				if (PG_MD_MODIFIED_P(md)) {
1455					npte = mips_pg_rwncpage_bit();
1456				} else {
1457					npte = mips_pg_cwncpage_bit();
1458				}
1459				PMAP_COUNT(uncached_mappings);
1460			} else
1461#endif
1462			 {
1463				if (PG_MD_MODIFIED_P(md)) {
1464					npte = mips_pg_rwpage_bit();
1465				} else {
1466					npte = mips_pg_cwpage_bit();
1467				}
1468			}
1469		}
1470		PMAP_COUNT(managed_mappings);
1471	} else {
1472		/*
1473		 * Assumption: if it is not part of our managed memory
1474		 * then it must be device memory which may be volatile.
1475		 */
1476		if (MIPS_HAS_R4K_MMU) {
1477#if defined(_MIPS_PADDR_T_64BIT) || defined(_LP64)
1478			u_int cca = PMAP_CCA_FOR_PA(pa);
1479			if (prefetch) cca = mips_options.mips3_cca_devmem;
1480			npte = MIPS3_PG_IOPAGE(cca) &
1481			    ~MIPS3_PG_G;
1482#else
1483			npte = MIPS3_PG_IOPAGE(PMAP_CCA_FOR_PA(pa)) &
1484			    ~MIPS3_PG_G;
1485#endif
1486		} else {
1487			npte = (prot & VM_PROT_WRITE) ?
1488			    (MIPS1_PG_D | MIPS1_PG_N) :
1489			    (MIPS1_PG_RO | MIPS1_PG_N);
1490		}
1491		PMAP_COUNT(unmanaged_mappings);
1492	}
1493
1494#if 0
1495	/*
1496	 * The only time we need to flush the cache is if we
1497	 * execute from a physical address and then change the data.
1498	 * This is the best place to do this.
1499	 * pmap_protect() and pmap_remove() are mostly used to switch
1500	 * between R/W and R/O pages.
1501	 * NOTE: we only support cache flush for read only text.
1502	 */
1503#ifdef MIPS1
1504	if (!MIPS_HAS_R4K_MMU
1505	    && pg != NULL
1506	    && prot == (VM_PROT_READ | VM_PROT_EXECUTE)) {
1507		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1508		PMAP_COUNT(enter_exec_mapping);
1509		if (!PG_MD_EXECPAGE_P(md)) {
1510			mips_icache_sync_range(MIPS_PHYS_TO_KSEG0(pa),
1511			    PAGE_SIZE);
1512			pmap_set_mdpage_attributes(md, PG_MD_EXECPAGE);
1513			PMAP_COUNT(exec_syncicache_entry);
1514		}
1515	}
1516#endif
1517#endif
1518
1519	kpreempt_disable();
1520	if (pmap == pmap_kernel()) {
1521		if (pg)
1522			pmap_enter_pv(pmap, va, pg, &npte);
1523
1524		/* enter entries into kernel pmap */
1525		pte = kvtopte(va);
1526
1527		if (MIPS_HAS_R4K_MMU)
1528			npte |= mips3_paddr_to_tlbpfn(pa) | MIPS3_PG_G;
1529		else
1530			npte |= mips1_paddr_to_tlbpfn(pa) |
1531			    MIPS1_PG_V | MIPS1_PG_G;
1532
1533		if (wired) {
1534			pmap->pm_stats.wired_count++;
1535			npte |= mips_pg_wired_bit();
1536		}
1537		if (mips_pg_v(pte->pt_entry)
1538		    && mips_tlbpfn_to_paddr(pte->pt_entry) != pa) {
1539			pmap_remove(pmap, va, va + NBPG);
1540			PMAP_COUNT(kernel_mappings_changed);
1541		}
1542		bool resident = mips_pg_v(pte->pt_entry);
1543		if (!resident)
1544			pmap->pm_stats.resident_count++;
1545		pte->pt_entry = npte;
1546
1547		/*
1548		 * Update the same virtual address entry.
1549		 */
1550		pmap_tlb_update_addr(pmap, va, npte, resident);
1551		kpreempt_enable();
1552		return 0;
1553	}
1554
1555	pte = pmap_pte_reserve(pmap, va, flags);
1556	if (__predict_false(pte == NULL)) {
1557		kpreempt_enable();
1558		return ENOMEM;
1559	}
1560
1561	/* Done after case that may sleep/return. */
1562	if (pg)
1563		pmap_enter_pv(pmap, va, pg, &npte);
1564
1565	/*
1566	 * Now validate mapping with desired protection/wiring.
1567	 * Assume uniform modified and referenced status for all
1568	 * MIPS pages in a MACH page.
1569	 */
1570
1571	if (MIPS_HAS_R4K_MMU)
1572		npte |= mips3_paddr_to_tlbpfn(pa);
1573	else
1574		npte |= mips1_paddr_to_tlbpfn(pa) | MIPS1_PG_V;
1575
1576	if (wired) {
1577		pmap->pm_stats.wired_count++;
1578		npte |= mips_pg_wired_bit();
1579	}
1580#if defined(DEBUG)
1581	if (pmapdebug & PDB_ENTER) {
1582		printf("pmap_enter: %p: %#"PRIxVADDR": new pte %#x (pa %#"PRIxPADDR")", pmap, va, npte, pa);
1583		printf("\n");
1584	}
1585#endif
1586
1587#ifdef PARANOIADIAG
1588	if (PMAP_IS_ACTIVE(pmap)) {
1589		uint32_t asid;
1590
1591		__asm volatile("mfc0 %0,$10; nop" : "=r"(asid));
1592		asid = (MIPS_HAS_R4K_MMU) ? (asid & 0xff) : (asid & 0xfc0) >> 6;
1593		if (asid != pai->pai_asid) {
1594			panic("inconsistency for active TLB update: %u <-> %u",
1595			    asid, pai->pai_asid);
1596		}
1597	}
1598#endif
1599
1600	if (mips_pg_v(pte->pt_entry) &&
1601	    mips_tlbpfn_to_paddr(pte->pt_entry) != pa) {
1602#ifdef PMAP_FAULTINFO
1603		struct pcb_faultinfo tmp_fi = curpcb->pcb_faultinfo;
1604#endif
1605		pmap_remove(pmap, va, va + NBPG);
1606#ifdef PMAP_FAULTINFO
1607		curpcb->pcb_faultinfo = tmp_fi;
1608#endif
1609		PMAP_COUNT(user_mappings_changed);
1610	}
1611
1612	KASSERT(mips_pg_v(npte));
1613	bool resident = mips_pg_v(pte->pt_entry);
1614	if (!resident)
1615		pmap->pm_stats.resident_count++;
1616#ifdef PMAP_FAULTINFO
1617	if (curpcb->pcb_faultinfo.pfi_faultpte == pte
1618	    && curpcb->pcb_faultinfo.pfi_repeats > 1) {
1619		printf("%s(%#"PRIxVADDR", %#"PRIxPADDR"): changing pte@%p from %#x to %#x\n",
1620		    __func__, va, pa, pte, pte->pt_entry, npte);
1621		cpu_Debugger();
1622	}
1623#endif
1624	pte->pt_entry = npte;
1625
1626	pmap_tlb_update_addr(pmap, va, npte, resident);
1627	kpreempt_enable();
1628
1629	if (pg != NULL && (prot == (VM_PROT_READ | VM_PROT_EXECUTE))) {
1630		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1631#ifdef DEBUG
1632		if (pmapdebug & PDB_ENTER)
1633			printf("pmap_enter: flush I cache va %#"PRIxVADDR" (%#"PRIxPADDR")\n",
1634			    va - NBPG, pa);
1635#endif
1636		PMAP_COUNT(exec_mappings);
1637		if (!PG_MD_EXECPAGE_P(md) && PG_MD_CACHED_P(md)) {
1638			pmap_page_syncicache(pg);
1639			pmap_set_mdpage_attributes(md, PG_MD_EXECPAGE);
1640			PMAP_COUNT(exec_synced_mappings);
1641		}
1642	}
1643
1644	return 0;
1645}
1646
1647void
1648pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1649{
1650	const bool managed = PAGE_IS_MANAGED(pa);
1651	pt_entry_t *pte;
1652	u_int npte;
1653
1654#ifdef DEBUG
1655	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1656		printf("pmap_kenter_pa(%#"PRIxVADDR", %#"PRIxPADDR", %x)\n", va, pa, prot);
1657#endif
1658	PMAP_COUNT(kenter_pa);
1659	if (!PMAP_PAGE_COLOROK_P(pa, va) && managed)
1660		PMAP_COUNT(kenter_pa_bad);
1661
1662	if (!managed)
1663		PMAP_COUNT(kenter_pa_unmanaged);
1664
1665	if (MIPS_HAS_R4K_MMU) {
1666		npte = mips3_paddr_to_tlbpfn(pa)
1667		    | ((prot & VM_PROT_WRITE) ? MIPS3_PG_D : MIPS3_PG_RO)
1668		    | (managed ? MIPS3_PG_CACHED : MIPS3_PG_UNCACHED)
1669		    | MIPS3_PG_WIRED | MIPS3_PG_V | MIPS3_PG_G;
1670	} else {
1671		npte = mips1_paddr_to_tlbpfn(pa)
1672		    | ((prot & VM_PROT_WRITE) ? MIPS1_PG_D : MIPS1_PG_RO)
1673		    | (managed ? 0 : MIPS1_PG_N)
1674		    | MIPS1_PG_WIRED | MIPS1_PG_V | MIPS1_PG_G;
1675	}
1676	kpreempt_disable();
1677	pte = kvtopte(va);
1678	KASSERT(!mips_pg_v(pte->pt_entry));
1679	pte->pt_entry = npte;
1680	pmap_tlb_update_addr(pmap_kernel(), va, npte, false);
1681	kpreempt_enable();
1682}
1683
1684void
1685pmap_kremove(vaddr_t va, vsize_t len)
1686{
1687#ifdef DEBUG
1688	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE))
1689		printf("pmap_kremove(%#"PRIxVADDR", %#"PRIxVSIZE")\n", va, len);
1690#endif
1691
1692	const uint32_t new_pt_entry =
1693	    (MIPS_HAS_R4K_MMU ? MIPS3_PG_NV | MIPS3_PG_G : MIPS1_PG_NV);
1694
1695	kpreempt_disable();
1696	pt_entry_t *pte = kvtopte(va);
1697	for (vaddr_t eva = va + len; va < eva; va += PAGE_SIZE, pte++) {
1698		uint32_t pt_entry = pte->pt_entry;
1699		if (!mips_pg_v(pt_entry)) {
1700			continue;
1701		}
1702
1703		PMAP_COUNT(kremove_pages);
1704		if (MIPS_HAS_R4K_MMU && MIPS_CACHE_VIRTUAL_ALIAS) {
1705			struct vm_page * const pg =
1706			    PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pt_entry));
1707			if (pg != NULL) {
1708				struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1709				(void)PG_MD_PVLIST_LOCK(md, false);
1710				pv_entry_t pv = &md->pvh_first;
1711				if (pv->pv_pmap == NULL) {
1712					pv->pv_va = va;
1713				} else if (PG_MD_CACHED_P(md)
1714				    && mips_cache_badalias(pv->pv_va, va)) {
1715					mips_dcache_wbinv_range(va, PAGE_SIZE);
1716				}
1717				PG_MD_PVLIST_UNLOCK(md);
1718			}
1719		}
1720
1721		pte->pt_entry = new_pt_entry;
1722		pmap_tlb_invalidate_addr(pmap_kernel(), va);
1723	}
1724	kpreempt_enable();
1725}
1726
1727void
1728pmap_remove_all(struct pmap *pmap)
1729{
1730	KASSERT(pmap != pmap_kernel());
1731
1732	kpreempt_disable();
1733	/*
1734	 * Free all of our ASIDs which means we can skip doing all the
1735	 * tlb_invalidate_addrs().
1736	 */
1737#ifdef MULTIPROCESSOR
1738	const uint32_t cpu_mask = 1 << cpu_index(curcpu());
1739	KASSERT((pmap->pm_onproc & ~cpu_mask) == 0);
1740	if (pmap->pm_onproc & cpu_mask)
1741		pmap_tlb_asid_deactivate(pmap);
1742#endif
1743	pmap_tlb_asid_release_all(pmap);
1744	pmap->pm_flags |= PMAP_DEFERRED_ACTIVATE;
1745
1746#ifdef PMAP_FAULTINFO
1747	curpcb->pcb_faultinfo.pfi_faultaddr = 0;
1748	curpcb->pcb_faultinfo.pfi_repeats = 0;
1749	curpcb->pcb_faultinfo.pfi_faultpte = NULL;
1750#endif
1751	kpreempt_enable();
1752}
1753/*
1754 *	Routine:	pmap_unwire
1755 *	Function:	Clear the wired attribute for a map/virtual-address
1756 *			pair.
1757 *	In/out conditions:
1758 *			The mapping must already exist in the pmap.
1759 */
1760void
1761pmap_unwire(pmap_t pmap, vaddr_t va)
1762{
1763	pt_entry_t *pte;
1764
1765	PMAP_COUNT(unwire);
1766#ifdef DEBUG
1767	if (pmapdebug & (PDB_FOLLOW|PDB_WIRING))
1768		printf("pmap_unwire(%p, %#"PRIxVADDR")\n", pmap, va);
1769#endif
1770	/*
1771	 * Don't need to flush the TLB since PG_WIRED is only in software.
1772	 */
1773	kpreempt_disable();
1774	if (pmap == pmap_kernel()) {
1775		/* change entries in kernel pmap */
1776#ifdef PARANOIADIAG
1777		if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end)
1778			panic("pmap_unwire");
1779#endif
1780		pte = kvtopte(va);
1781	} else {
1782		pte = pmap_pte_lookup(pmap, va);
1783#ifdef DIAGNOSTIC
1784		if (pte == NULL)
1785			panic("pmap_unwire: pmap %p va %#"PRIxVADDR" invalid STE",
1786			    pmap, va);
1787#endif
1788	}
1789
1790#ifdef DIAGNOSTIC
1791	if (mips_pg_v(pte->pt_entry) == 0)
1792		panic("pmap_unwire: pmap %p va %#"PRIxVADDR" invalid PTE",
1793		    pmap, va);
1794#endif
1795
1796	if (mips_pg_wired(pte->pt_entry)) {
1797		pte->pt_entry &= ~mips_pg_wired_bit();
1798		pmap->pm_stats.wired_count--;
1799	}
1800#ifdef DIAGNOSTIC
1801	else {
1802		printf("pmap_unwire: wiring for pmap %p va %#"PRIxVADDR" "
1803		    "didn't change!\n", pmap, va);
1804	}
1805#endif
1806	kpreempt_enable();
1807}
1808
1809/*
1810 *	Routine:	pmap_extract
1811 *	Function:
1812 *		Extract the physical page address associated
1813 *		with the given map/virtual_address pair.
1814 */
1815bool
1816pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
1817{
1818	paddr_t pa;
1819	pt_entry_t *pte;
1820
1821#ifdef DEBUG
1822	if (pmapdebug & PDB_FOLLOW)
1823		printf("pmap_extract(%p, %#"PRIxVADDR") -> ", pmap, va);
1824#endif
1825	if (pmap == pmap_kernel()) {
1826		if (MIPS_KSEG0_P(va)) {
1827			pa = MIPS_KSEG0_TO_PHYS(va);
1828			goto done;
1829		}
1830#ifdef _LP64
1831		if (MIPS_XKPHYS_P(va)) {
1832			pa = MIPS_XKPHYS_TO_PHYS(va);
1833			goto done;
1834		}
1835#endif
1836#ifdef DIAGNOSTIC
1837		if (MIPS_KSEG1_P(va))
1838			panic("pmap_extract: kseg1 address %#"PRIxVADDR"", va);
1839#endif
1840		if (va >= mips_virtual_end)
1841			panic("pmap_extract: illegal kernel mapped address %#"PRIxVADDR"", va);
1842		pte = kvtopte(va);
1843		kpreempt_disable();
1844	} else {
1845		kpreempt_disable();
1846		if (!(pte = pmap_pte_lookup(pmap, va))) {
1847#ifdef DEBUG
1848			if (pmapdebug & PDB_FOLLOW)
1849				printf("not in segmap\n");
1850#endif
1851			kpreempt_enable();
1852			return false;
1853		}
1854	}
1855	if (!mips_pg_v(pte->pt_entry)) {
1856#ifdef DEBUG
1857		if (pmapdebug & PDB_FOLLOW)
1858			printf("PTE not valid\n");
1859#endif
1860		kpreempt_enable();
1861		return false;
1862	}
1863	pa = mips_tlbpfn_to_paddr(pte->pt_entry) | (va & PGOFSET);
1864	kpreempt_enable();
1865done:
1866	if (pap != NULL) {
1867		*pap = pa;
1868	}
1869#ifdef DEBUG
1870	if (pmapdebug & PDB_FOLLOW)
1871		printf("pa %#"PRIxPADDR"\n", pa);
1872#endif
1873	return true;
1874}
1875
1876/*
1877 *	Copy the range specified by src_addr/len
1878 *	from the source map to the range dst_addr/len
1879 *	in the destination map.
1880 *
1881 *	This routine is only advisory and need not do anything.
1882 */
1883void
1884pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr, vsize_t len,
1885    vaddr_t src_addr)
1886{
1887
1888	PMAP_COUNT(copy);
1889#ifdef DEBUG
1890	if (pmapdebug & PDB_FOLLOW)
1891		printf("pmap_copy(%p, %p, %#"PRIxVADDR", %#"PRIxVSIZE", %#"PRIxVADDR")\n",
1892		    dst_pmap, src_pmap, dst_addr, len, src_addr);
1893#endif
1894}
1895
1896/*
1897 *	pmap_zero_page zeros the specified page.
1898 */
1899void
1900pmap_zero_page(paddr_t dst_pa)
1901{
1902	vaddr_t dst_va;
1903	pt_entry_t dst_tmp;
1904
1905#ifdef DEBUG
1906	if (pmapdebug & PDB_FOLLOW)
1907		printf("pmap_zero_page(%#"PRIxPADDR")\n", dst_pa);
1908#endif
1909	PMAP_COUNT(zeroed_pages);
1910
1911	struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst_pa);
1912
1913	dst_va = pmap_map_ephemeral_page(dst_pg, VM_PROT_READ|VM_PROT_WRITE, &dst_tmp);
1914
1915	mips_pagezero((void *)dst_va);
1916
1917	pmap_unmap_ephemeral_page(dst_pg, dst_va, dst_tmp);
1918}
1919
1920/*
1921 *	pmap_copy_page copies the specified page.
1922 */
1923void
1924pmap_copy_page(paddr_t src_pa, paddr_t dst_pa)
1925{
1926	vaddr_t src_va, dst_va;
1927	pt_entry_t src_tmp, dst_tmp;
1928#ifdef DEBUG
1929	if (pmapdebug & PDB_FOLLOW)
1930		printf("pmap_copy_page(%#"PRIxPADDR", %#"PRIxPADDR")\n", src_pa, dst_pa);
1931#endif
1932	struct vm_page *src_pg = PHYS_TO_VM_PAGE(src_pa);
1933	struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst_pa);
1934
1935	PMAP_COUNT(copied_pages);
1936
1937	src_va = pmap_map_ephemeral_page(src_pg, VM_PROT_READ, &src_tmp);
1938	dst_va = pmap_map_ephemeral_page(dst_pg, VM_PROT_READ|VM_PROT_WRITE, &dst_tmp);
1939
1940	mips_pagecopy((void *)dst_va, (void *)src_va);
1941
1942	pmap_unmap_ephemeral_page(dst_pg, dst_va, dst_tmp);
1943	pmap_unmap_ephemeral_page(src_pg, src_va, src_tmp);
1944}
1945
1946/*
1947 *	pmap_clear_reference:
1948 *
1949 *	Clear the reference bit on the specified physical page.
1950 */
1951bool
1952pmap_clear_reference(struct vm_page *pg)
1953{
1954	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1955#ifdef DEBUG
1956	if (pmapdebug & PDB_FOLLOW)
1957		printf("pmap_clear_reference(%#"PRIxPADDR")\n",
1958		    VM_PAGE_TO_PHYS(pg));
1959#endif
1960	return pmap_clear_mdpage_attributes(md, PG_MD_REFERENCED);
1961}
1962
1963/*
1964 *	pmap_is_referenced:
1965 *
1966 *	Return whether or not the specified physical page is referenced
1967 *	by any physical maps.
1968 */
1969bool
1970pmap_is_referenced(struct vm_page *pg)
1971{
1972	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1973
1974	return PG_MD_REFERENCED_P(md);
1975}
1976
1977/*
1978 *	Clear the modify bits on the specified physical page.
1979 */
1980bool
1981pmap_clear_modify(struct vm_page *pg)
1982{
1983	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1984	pv_entry_t pv = &md->pvh_first;
1985	pv_entry_t pv_next;
1986	uint16_t gen;
1987
1988	PMAP_COUNT(clear_modify);
1989#ifdef DEBUG
1990	if (pmapdebug & PDB_FOLLOW)
1991		printf("pmap_clear_modify(%#"PRIxPADDR")\n", VM_PAGE_TO_PHYS(pg));
1992#endif
1993	if (PG_MD_EXECPAGE_P(md)) {
1994		if (pv->pv_pmap == NULL) {
1995			pmap_clear_mdpage_attributes(md, PG_MD_EXECPAGE);
1996			PMAP_COUNT(exec_uncached_clear_modify);
1997		} else {
1998			pmap_page_syncicache(pg);
1999			PMAP_COUNT(exec_synced_clear_modify);
2000		}
2001	}
2002	if (!pmap_clear_mdpage_attributes(md, PG_MD_MODIFIED))
2003		return false;
2004	if (pv->pv_pmap == NULL) {
2005		return true;
2006	}
2007
2008	/*
2009	 * remove write access from any pages that are dirty
2010	 * so we can tell if they are written to again later.
2011	 * flush the VAC first if there is one.
2012	 */
2013	kpreempt_disable();
2014	gen = PG_MD_PVLIST_LOCK(md, false);
2015	for (; pv != NULL; pv = pv_next) {
2016		pmap_t pmap = pv->pv_pmap;
2017		vaddr_t va = pv->pv_va;
2018		pt_entry_t *pte;
2019		uint32_t pt_entry;
2020		pv_next = pv->pv_next;
2021		if (pmap == pmap_kernel()) {
2022			pte = kvtopte(va);
2023		} else {
2024			pte = pmap_pte_lookup(pmap, va);
2025			KASSERT(pte);
2026		}
2027		pt_entry = pte->pt_entry & ~mips_pg_m_bit();
2028		if (pte->pt_entry == pt_entry) {
2029			continue;
2030		}
2031		KASSERT(mips_pg_v(pt_entry));
2032		/*
2033		 * Why? Why?
2034		 */
2035		if (MIPS_HAS_R4K_MMU && MIPS_CACHE_VIRTUAL_ALIAS) {
2036			if (PMAP_IS_ACTIVE(pmap)) {
2037				mips_dcache_wbinv_range(va, PAGE_SIZE);
2038			} else {
2039				mips_dcache_wbinv_range_index(va, PAGE_SIZE);
2040			}
2041		}
2042		pte->pt_entry = pt_entry;
2043		PG_MD_PVLIST_UNLOCK(md);
2044		pmap_tlb_invalidate_addr(pmap, va);
2045		pmap_update(pmap);
2046		if (__predict_false(gen != PG_MD_PVLIST_LOCK(md, false))) {
2047			/*
2048			 * The list changed!  So restart from the beginning.
2049			 */
2050			pv_next = &md->pvh_first;
2051		}
2052	}
2053	PG_MD_PVLIST_UNLOCK(md);
2054	kpreempt_enable();
2055	return true;
2056}
2057
2058/*
2059 *	pmap_is_modified:
2060 *
2061 *	Return whether or not the specified physical page is modified
2062 *	by any physical maps.
2063 */
2064bool
2065pmap_is_modified(struct vm_page *pg)
2066{
2067	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2068
2069	return PG_MD_MODIFIED_P(md);
2070}
2071
2072/*
2073 *	pmap_set_modified:
2074 *
2075 *	Sets the page modified reference bit for the specified page.
2076 */
2077void
2078pmap_set_modified(paddr_t pa)
2079{
2080	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2081	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2082	pmap_set_mdpage_attributes(md, PG_MD_MODIFIED | PG_MD_REFERENCED);
2083}
2084
2085/******************** pv_entry management ********************/
2086
2087static void
2088pmap_check_pvlist(struct vm_page_md *md)
2089{
2090#ifdef PARANOIADIAG
2091	pt_entry_t pv = &md->pvh_first;
2092	if (pv->pv_pmap != NULL) {
2093		for (; pv != NULL; pv = pv->pv_next) {
2094			KASSERT(!MIPS_KSEG0_P(pv->pv_va));
2095			KASSERT(!MIPS_KSEG1_P(pv->pv_va));
2096#ifdef _LP64
2097			KASSERT(!MIPS_XKPHYS_P(pv->pv_va));
2098#endif
2099		}
2100		pv = &md->pvh_first;
2101	}
2102#endif /* PARANOIADIAG */
2103}
2104
2105/*
2106 * Enter the pmap and virtual address into the
2107 * physical to virtual map table.
2108 */
2109void
2110pmap_enter_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, u_int *npte)
2111{
2112	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2113	pv_entry_t pv, npv, apv;
2114	int16_t gen;
2115
2116	KASSERT(kpreempt_disabled());
2117	KASSERT(!MIPS_KSEG0_P(va));
2118	KASSERT(!MIPS_KSEG1_P(va));
2119#ifdef _LP64
2120	KASSERT(!MIPS_XKPHYS_P(va));
2121#endif
2122
2123	apv = NULL;
2124	pv = &md->pvh_first;
2125#ifdef DEBUG
2126	if (pmapdebug & PDB_ENTER)
2127		printf("pmap_enter: pv %p: was %#"PRIxVADDR"/%p/%p\n",
2128		    pv, pv->pv_va, pv->pv_pmap, pv->pv_next);
2129#endif
2130	gen = PG_MD_PVLIST_LOCK(md, true);
2131	pmap_check_pvlist(md);
2132#if defined(MIPS3_NO_PV_UNCACHED) || defined(MULTIPROCESSOR)
2133again:
2134#endif
2135	if (pv->pv_pmap == NULL) {
2136		KASSERT(pv->pv_next == NULL);
2137		/*
2138		 * No entries yet, use header as the first entry
2139		 */
2140#ifdef DEBUG
2141		if (pmapdebug & PDB_PVENTRY)
2142			printf("pmap_enter_pv: first pv: pmap %p va %#"PRIxVADDR"\n",
2143			    pmap, va);
2144#endif
2145		PMAP_COUNT(primary_mappings);
2146		PMAP_COUNT(mappings);
2147		pmap_clear_mdpage_attributes(md, PG_MD_UNCACHED);
2148		pv->pv_pmap = pmap;
2149		pv->pv_va = va;
2150	} else {
2151#if defined(MIPS3_PLUS) && !defined(MULTIPROCESSOR) /* XXX mmu XXX */
2152		if (MIPS_CACHE_VIRTUAL_ALIAS) {
2153			/*
2154			 * There is at least one other VA mapping this page.
2155			 * Check if they are cache index compatible.
2156			 */
2157
2158#if defined(MIPS3_NO_PV_UNCACHED)
2159
2160			/*
2161			 * Instead of mapping uncached, which some platforms
2162			 * cannot support, remove the mapping from the pmap.
2163			 * When this address is touched again, the uvm will
2164			 * fault it in.  Because of this, each page will only
2165			 * be mapped with one index at any given time.
2166			 */
2167
2168			if (mips_cache_badalias(pv->pv_va, va)) {
2169				for (npv = pv; npv; npv = npv->pv_next) {
2170					pmap_remove(npv->pv_pmap, npv->pv_va,
2171					    npv->pv_va + PAGE_SIZE);
2172					pmap_update(npv->pv_pmap);
2173					goto again;
2174				}
2175			}
2176#else	/* !MIPS3_NO_PV_UNCACHED */
2177			if (PG_MD_CACHED_P(md)) {
2178				/*
2179				 * If this page is cached, then all mappings
2180				 * have the same cache alias so we only need
2181				 * to check the first page to see if it's
2182				 * incompatible with the new mapping.
2183				 *
2184				 * If the mappings are incompatible, map this
2185				 * page as uncached and re-map all the current
2186				 * mapping as uncached until all pages can
2187				 * share the same cache index again.
2188				 */
2189				if (mips_cache_badalias(pv->pv_va, va)) {
2190					pmap_page_cache(pg, false);
2191					mips_dcache_wbinv_range_index(
2192					    pv->pv_va, PAGE_SIZE);
2193					*npte = (*npte &
2194					    ~MIPS3_PG_CACHEMODE) |
2195					    MIPS3_PG_UNCACHED;
2196					PMAP_COUNT(page_cache_evictions);
2197				}
2198			} else {
2199				*npte = (*npte & ~MIPS3_PG_CACHEMODE) |
2200				    MIPS3_PG_UNCACHED;
2201				PMAP_COUNT(page_cache_evictions);
2202			}
2203#endif	/* !MIPS3_NO_PV_UNCACHED */
2204		}
2205#endif /* MIPS3_PLUS && !MULTIPROCESSOR */
2206
2207		/*
2208		 * There is at least one other VA mapping this page.
2209		 * Place this entry after the header.
2210		 *
2211		 * Note: the entry may already be in the table if
2212		 * we are only changing the protection bits.
2213		 */
2214
2215		for (npv = pv; npv; npv = npv->pv_next) {
2216			if (pmap == npv->pv_pmap && va == npv->pv_va) {
2217#ifdef PARANOIADIAG
2218				pt_entry_t *pte;
2219				uint32_t pt_entry;
2220
2221				if (pmap == pmap_kernel()) {
2222					pt_entry = kvtopte(va)->pt_entry;
2223				} else {
2224					pte = pmap_pte_lookup(pmap, va);
2225					if (pte) {
2226						pt_entry = pte->pt_entry;
2227					} else
2228						pt_entry = 0;
2229				}
2230				if (!mips_pg_v(pt_entry) ||
2231				    mips_tlbpfn_to_paddr(pt_entry) !=
2232				    VM_PAGE_TO_PHYS(pg))
2233					printf(
2234		"pmap_enter_pv: found va %#"PRIxVADDR" pa %#"PRIxPADDR" in pv_table but != %x\n",
2235					    va, VM_PAGE_TO_PHYS(pg),
2236					    pt_entry);
2237#endif
2238				PMAP_COUNT(remappings);
2239				PG_MD_PVLIST_UNLOCK(md);
2240				if (__predict_false(apv != NULL))
2241					pmap_pv_free(apv);
2242				return;
2243			}
2244		}
2245#ifdef DEBUG
2246		if (pmapdebug & PDB_PVENTRY)
2247			printf("pmap_enter_pv: new pv: pmap %p va %#"PRIxVADDR"\n",
2248			    pmap, va);
2249#endif
2250		if (__predict_true(apv == NULL)) {
2251#if defined(MULTIPROCESSOR) || !defined(_LP64) || defined(PMAP_POOLPAGE_DEBUG) || defined(LOCKDEBUG)
2252			/*
2253			 * To allocate a PV, we have to release the PVLIST lock
2254			 * so get the page generation.  We allocate the PV, and
2255			 * then reacquire the lock.
2256			 */
2257			PG_MD_PVLIST_UNLOCK(md);
2258#endif
2259			apv = (pv_entry_t)pmap_pv_alloc();
2260			if (apv == NULL)
2261				panic("pmap_enter_pv: pmap_pv_alloc() failed");
2262#if defined(MULTIPROCESSOR) || !defined(_LP64) || defined(PMAP_POOLPAGE_DEBUG) || defined(LOCKDEBUG)
2263#ifdef MULTIPROCESSOR
2264			/*
2265			 * If the generation has changed, then someone else
2266			 * tinkered with this page so we should
2267			 * start over.
2268			 */
2269			uint16_t oldgen = gen;
2270#endif
2271			gen = PG_MD_PVLIST_LOCK(md, true);
2272#ifdef MULTIPROCESSOR
2273			if (gen != oldgen)
2274				goto again;
2275#endif
2276#endif
2277		}
2278		npv = apv;
2279		apv = NULL;
2280		npv->pv_va = va;
2281		npv->pv_pmap = pmap;
2282		npv->pv_next = pv->pv_next;
2283		pv->pv_next = npv;
2284		PMAP_COUNT(mappings);
2285	}
2286	pmap_check_pvlist(md);
2287	PG_MD_PVLIST_UNLOCK(md);
2288	if (__predict_false(apv != NULL))
2289		pmap_pv_free(apv);
2290}
2291
2292/*
2293 * Remove a physical to virtual address translation.
2294 * If cache was inhibited on this page, and there are no more cache
2295 * conflicts, restore caching.
2296 * Flush the cache if the last page is removed (should always be cached
2297 * at this point).
2298 */
2299void
2300pmap_remove_pv(pmap_t pmap, vaddr_t va, struct vm_page *pg, bool dirty)
2301{
2302	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2303	pv_entry_t pv, npv;
2304	bool last;
2305
2306#ifdef DEBUG
2307	if (pmapdebug & (PDB_FOLLOW|PDB_PVENTRY))
2308		printf("pmap_remove_pv(%p, %#"PRIxVADDR", %#"PRIxPADDR")\n", pmap, va,
2309		    VM_PAGE_TO_PHYS(pg));
2310#endif
2311	KASSERT(kpreempt_disabled());
2312	pv = &md->pvh_first;
2313
2314	(void)PG_MD_PVLIST_LOCK(md, true);
2315	pmap_check_pvlist(md);
2316
2317	/*
2318	 * If it is the first entry on the list, it is actually
2319	 * in the header and we must copy the following entry up
2320	 * to the header.  Otherwise we must search the list for
2321	 * the entry.  In either case we free the now unused entry.
2322	 */
2323
2324	last = false;
2325	if (pmap == pv->pv_pmap && va == pv->pv_va) {
2326		npv = pv->pv_next;
2327		if (npv) {
2328			*pv = *npv;
2329			KASSERT(pv->pv_pmap != NULL);
2330		} else {
2331			pmap_clear_mdpage_attributes(md, PG_MD_UNCACHED);
2332			pv->pv_pmap = NULL;
2333			last = true;	/* Last mapping removed */
2334		}
2335		PMAP_COUNT(remove_pvfirst);
2336	} else {
2337		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
2338			PMAP_COUNT(remove_pvsearch);
2339			if (pmap == npv->pv_pmap && va == npv->pv_va)
2340				break;
2341		}
2342		if (npv) {
2343			pv->pv_next = npv->pv_next;
2344		}
2345	}
2346#ifdef MIPS3_PLUS	/* XXX mmu XXX */
2347#ifndef MIPS3_NO_PV_UNCACHED
2348	if (MIPS_HAS_R4K_MMU && PG_MD_UNCACHED_P(md)) {
2349		/*
2350		 * Page is currently uncached, check if alias mapping has been
2351		 * removed.  If it was, then reenable caching.
2352		 */
2353		pv = &md->pvh_first;
2354		for (pv_entry_t pv0 = pv->pv_next; pv0; pv0 = pv0->pv_next) {
2355			if (mips_cache_badalias(pv->pv_va, pv0->pv_va))
2356				break;
2357		}
2358		if (npv == NULL)
2359			pmap_page_cache(pg, true);
2360	}
2361#endif
2362#endif	/* MIPS3_PLUS */
2363
2364	pmap_check_pvlist(md);
2365	PG_MD_PVLIST_UNLOCK(md);
2366
2367	/*
2368	 * Free the pv_entry if needed.
2369	 */
2370	if (npv)
2371		pmap_pv_free(npv);
2372	if (PG_MD_EXECPAGE_P(md) && dirty) {
2373		if (last) {
2374			/*
2375			 * If this was the page's last mapping, we no longer
2376			 * care about its execness.
2377			 */
2378			pmap_clear_mdpage_attributes(md, PG_MD_EXECPAGE);
2379			PMAP_COUNT(exec_uncached_remove);
2380		} else {
2381			/*
2382			 * Someone still has it mapped as an executable page
2383			 * so we must sync it.
2384			 */
2385			pmap_page_syncicache(pg);
2386			PMAP_COUNT(exec_synced_remove);
2387		}
2388	}
2389#ifdef MIPS3_PLUS	/* XXX mmu XXX */
2390	if (MIPS_HAS_R4K_MMU && last)	/* XXX why */
2391		mips_dcache_wbinv_range_index(va, PAGE_SIZE);
2392#endif	/* MIPS3_PLUS */
2393}
2394
2395#ifdef MULTIPROCESSOR
2396struct pmap_pvlist_info {
2397	kmutex_t *pli_locks[PAGE_SIZE / 32];
2398	volatile u_int pli_lock_refs[PAGE_SIZE / 32];
2399	volatile u_int pli_lock_index;
2400	u_int pli_lock_mask;
2401} pmap_pvlist_info;
2402
2403static void
2404pmap_pvlist_lock_init(void)
2405{
2406	struct pmap_pvlist_info * const pli = &pmap_pvlist_info;
2407	const vaddr_t lock_page = uvm_pageboot_alloc(PAGE_SIZE);
2408	vaddr_t lock_va = lock_page;
2409	size_t cache_line_size = mips_cache_info.mci_pdcache_line_size;
2410	if (sizeof(kmutex_t) > cache_line_size) {
2411		cache_line_size = roundup2(sizeof(kmutex_t), cache_line_size);
2412	}
2413	const size_t nlocks = PAGE_SIZE / cache_line_size;
2414	KASSERT((nlocks & (nlocks - 1)) == 0);
2415	/*
2416	 * Now divide the page into a number of mutexes, one per cacheline.
2417	 */
2418	for (size_t i = 0; i < nlocks; lock_va += cache_line_size, i++) {
2419		kmutex_t * const lock = (kmutex_t *)lock_va;
2420		mutex_init(lock, MUTEX_DEFAULT, IPL_VM);
2421		pli->pli_locks[i] = lock;
2422	}
2423	pli->pli_lock_mask = nlocks - 1;
2424}
2425
2426uint16_t
2427pmap_pvlist_lock(struct vm_page_md *md, bool list_change)
2428{
2429	struct pmap_pvlist_info * const pli = &pmap_pvlist_info;
2430	kmutex_t *lock = md->pvh_lock;
2431	int16_t gen;
2432
2433	/*
2434	 * Allocate a lock on an as-needed basis.  This will hopefully give us
2435	 * semi-random distribution not based on page color.
2436	 */
2437	if (__predict_false(lock == NULL)) {
2438		size_t locknum = atomic_add_int_nv(&pli->pli_lock_index, 37);
2439		size_t lockid = locknum & pli->pli_lock_mask;
2440		kmutex_t * const new_lock = pli->pli_locks[lockid];
2441		/*
2442		 * Set the lock.  If some other thread already did, just use
2443		 * the one they assigned.
2444		 */
2445		lock = atomic_cas_ptr(&md->pvh_lock, NULL, new_lock);
2446		if (lock == NULL) {
2447			lock = new_lock;
2448			atomic_inc_uint(&pli->pli_lock_refs[lockid]);
2449		}
2450	}
2451
2452	/*
2453	 * Now finally lock the pvlists.
2454	 */
2455	mutex_spin_enter(lock);
2456
2457	/*
2458	 * If the locker will be changing the list, increment the high 16 bits
2459	 * of attrs so we use that as a generation number.
2460	 */
2461	gen = PG_MD_PVLIST_GEN(md);		/* get old value */
2462	if (list_change)
2463		atomic_add_int(&md->pvh_attrs, 0x10000);
2464
2465	/*
2466	 * Return the generation number.
2467	 */
2468	return gen;
2469}
2470#else
2471static void
2472pmap_pvlist_lock_init(void)
2473{
2474	mutex_init(&pmap_pvlist_mutex, MUTEX_DEFAULT, IPL_VM);
2475}
2476#endif /* MULTIPROCESSOR */
2477
2478/*
2479 * pmap_pv_page_alloc:
2480 *
2481 *	Allocate a page for the pv_entry pool.
2482 */
2483void *
2484pmap_pv_page_alloc(struct pool *pp, int flags)
2485{
2486	struct vm_page * const pg = PMAP_ALLOC_POOLPAGE(UVM_PGA_USERESERVE);
2487	if (pg == NULL)
2488		return NULL;
2489
2490	return (void *)mips_pmap_map_poolpage(VM_PAGE_TO_PHYS(pg));
2491}
2492
2493/*
2494 * pmap_pv_page_free:
2495 *
2496 *	Free a pv_entry pool page.
2497 */
2498void
2499pmap_pv_page_free(struct pool *pp, void *v)
2500{
2501	vaddr_t va = (vaddr_t)v;
2502	paddr_t pa;
2503
2504#ifdef _LP64
2505	KASSERT(MIPS_XKPHYS_P(va));
2506	pa = MIPS_XKPHYS_TO_PHYS(va);
2507#else
2508	KASSERT(MIPS_KSEG0_P(va));
2509	pa = MIPS_KSEG0_TO_PHYS(va);
2510#endif
2511#ifdef MIPS3_PLUS
2512	if (MIPS_CACHE_VIRTUAL_ALIAS)
2513		mips_dcache_inv_range(va, PAGE_SIZE);
2514#endif
2515	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2516	KASSERT(pg != NULL);
2517	pmap_clear_mdpage_attributes(VM_PAGE_TO_MD(pg), PG_MD_POOLPAGE);
2518	uvm_pagefree(pg);
2519}
2520
2521pt_entry_t *
2522pmap_pte(pmap_t pmap, vaddr_t va)
2523{
2524	pt_entry_t *pte;
2525
2526	if (pmap == pmap_kernel())
2527		pte = kvtopte(va);
2528	else
2529		pte = pmap_pte_lookup(pmap, va);
2530	return pte;
2531}
2532
2533#ifdef MIPS3_PLUS	/* XXX mmu XXX */
2534/*
2535 * Find first virtual address >= *vap that doesn't cause
2536 * a cache alias conflict.
2537 */
2538void
2539pmap_prefer(vaddr_t foff, vaddr_t *vap, vsize_t sz, int td)
2540{
2541	const struct mips_cache_info * const mci = &mips_cache_info;
2542	vaddr_t	va;
2543	vsize_t d;
2544	vsize_t prefer_mask = ptoa(uvmexp.colormask);
2545
2546	PMAP_COUNT(prefer_requests);
2547
2548	if (MIPS_HAS_R4K_MMU) {
2549		prefer_mask |= mci->mci_cache_prefer_mask;
2550	}
2551
2552	if (prefer_mask) {
2553		va = *vap;
2554
2555		d = foff - va;
2556		d &= prefer_mask;
2557		if (d) {
2558			if (td)
2559				*vap = trunc_page(va -((-d) & prefer_mask));
2560			else
2561				*vap = round_page(va + d);
2562			PMAP_COUNT(prefer_adjustments);
2563		}
2564	}
2565}
2566#endif	/* MIPS3_PLUS */
2567
2568struct vm_page *
2569mips_pmap_alloc_poolpage(int flags)
2570{
2571	/*
2572	 * On 32bit kernels, we must make sure that we only allocate pages that
2573	 * can be mapped via KSEG0.  On 64bit kernels, try to allocated from
2574	 * the first 4G.  If all memory is in KSEG0/4G, then we can just
2575	 * use the default freelist otherwise we must use the pool page list.
2576	 */
2577	if (mips_poolpage_vmfreelist != VM_FREELIST_DEFAULT)
2578		return uvm_pagealloc_strat(NULL, 0, NULL, flags,
2579		    UVM_PGA_STRAT_ONLY, mips_poolpage_vmfreelist);
2580
2581	return uvm_pagealloc(NULL, 0, NULL, flags);
2582}
2583
2584vaddr_t
2585mips_pmap_map_poolpage(paddr_t pa)
2586{
2587	vaddr_t va;
2588
2589	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
2590	KASSERT(pg);
2591	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2592	pmap_set_mdpage_attributes(md, PG_MD_POOLPAGE);
2593
2594#ifdef PMAP_POOLPAGE_DEBUG
2595	KASSERT((poolpage.hint & MIPS_CACHE_ALIAS_MASK) == 0);
2596	vaddr_t va_offset = poolpage.hint + mips_cache_indexof(pa);
2597	pt_entry_t *pte = poolpage.sysmap + atop(va_offset);
2598	const size_t va_inc = MIPS_CACHE_ALIAS_MASK + PAGE_SIZE;
2599	const size_t pte_inc = atop(va_inc);
2600
2601	for (; va_offset < poolpage.size;
2602	     va_offset += va_inc, pte += pte_inc) {
2603		if (!mips_pg_v(pte->pt_entry))
2604			break;
2605	}
2606	if (va_offset >= poolpage.size) {
2607		for (va_offset -= poolpage.size, pte -= atop(poolpage.size);
2608		     va_offset < poolpage.hint;
2609		     va_offset += va_inc, pte += pte_inc) {
2610			if (!mips_pg_v(pte->pt_entry))
2611				break;
2612		}
2613	}
2614	KASSERT(!mips_pg_v(pte->pt_entry));
2615	va = poolpage.base + va_offset;
2616	poolpage.hint = roundup2(va_offset + 1, va_inc);
2617	pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
2618#else
2619#ifdef _LP64
2620	KASSERT(mips_options.mips3_xkphys_cached);
2621	va = MIPS_PHYS_TO_XKPHYS_CACHED(pa);
2622#else
2623	if (pa > MIPS_PHYS_MASK)
2624		panic("mips_pmap_map_poolpage: "
2625		    "pa #%"PRIxPADDR" can not be mapped into KSEG0", pa);
2626
2627	va = MIPS_PHYS_TO_KSEG0(pa);
2628#endif
2629#endif
2630#if !defined(_LP64) || defined(PMAP_POOLPAGE_DEBUG)
2631	if (MIPS_CACHE_VIRTUAL_ALIAS) {
2632		/*
2633		 * If this page was last mapped with an address that might
2634		 * cause aliases, flush the page from the cache.
2635		 */
2636		(void)PG_MD_PVLIST_LOCK(md, false);
2637		pv_entry_t pv = &md->pvh_first;
2638		vaddr_t last_va = pv->pv_va;
2639		KASSERT(pv->pv_pmap == NULL);
2640		pv->pv_va = va;
2641		if (PG_MD_CACHED_P(md) && mips_cache_badalias(last_va, va))
2642			mips_dcache_wbinv_range_index(last_va, PAGE_SIZE);
2643		PG_MD_PVLIST_UNLOCK(md);
2644	}
2645#endif
2646	return va;
2647}
2648
2649paddr_t
2650mips_pmap_unmap_poolpage(vaddr_t va)
2651{
2652	paddr_t pa;
2653#ifdef PMAP_POOLPAGE_DEBUG
2654	KASSERT(poolpage.base <= va && va < poolpage.base + poolpage.size);
2655	pa = mips_tlbpfn_to_paddr(kvtopte(va)->pt_entry);
2656#elif defined(_LP64)
2657	KASSERT(MIPS_XKPHYS_P(va));
2658	pa = MIPS_XKPHYS_TO_PHYS(va);
2659#else
2660	KASSERT(MIPS_KSEG0_P(va));
2661	pa = MIPS_KSEG0_TO_PHYS(va);
2662#endif
2663	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2664	KASSERT(pg);
2665	pmap_clear_mdpage_attributes(VM_PAGE_TO_MD(pg), PG_MD_POOLPAGE);
2666#if defined(MIPS3_PLUS)
2667	if (MIPS_CACHE_VIRTUAL_ALIAS) {
2668		/*
2669		 * We've unmapped a poolpage.  Its contents are irrelevant.
2670		 */
2671		mips_dcache_inv_range(va, PAGE_SIZE);
2672	}
2673#endif
2674#ifdef PMAP_POOLPAGE_DEBUG
2675	pmap_kremove(va, PAGE_SIZE);
2676#endif
2677	return pa;
2678}
2679
2680
2681
2682/******************** page table page management ********************/
2683
2684/* TO BE DONE */
2685