pmap.c revision 241287
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
38 *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42/*
43 *	Manages physical address maps.
44 *
45 *	Since the information managed by this module is
46 *	also stored by the logical address mapping module,
47 *	this module may throw away valid virtual-to-physical
48 *	mappings at almost any time.  However, invalidations
49 *	of virtual-to-physical mappings must be done as
50 *	requested.
51 *
52 *	In order to cope with hardware architectures which
53 *	make virtual-to-physical map invalidates expensive,
54 *	this module may delay invalidate or reduced protection
55 *	operations until such time as they are actually
56 *	necessary.  This module is given full information as
57 *	to which processors are currently using which maps,
58 *	and to when physical maps must be made correct.
59 */
60
61#include <sys/cdefs.h>
62__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 241287 2012-10-06 19:33:52Z alc $");
63
64#include "opt_ddb.h"
65#include "opt_pmap.h"
66
67#include <sys/param.h>
68#include <sys/systm.h>
69#include <sys/lock.h>
70#include <sys/mman.h>
71#include <sys/msgbuf.h>
72#include <sys/mutex.h>
73#include <sys/pcpu.h>
74#include <sys/proc.h>
75#include <sys/rwlock.h>
76#include <sys/sched.h>
77#ifdef SMP
78#include <sys/smp.h>
79#else
80#include <sys/cpuset.h>
81#endif
82#include <sys/sysctl.h>
83#include <sys/vmmeter.h>
84
85#ifdef DDB
86#include <ddb/ddb.h>
87#endif
88
89#include <vm/vm.h>
90#include <vm/vm_param.h>
91#include <vm/vm_kern.h>
92#include <vm/vm_page.h>
93#include <vm/vm_map.h>
94#include <vm/vm_object.h>
95#include <vm/vm_extern.h>
96#include <vm/vm_pageout.h>
97#include <vm/vm_pager.h>
98#include <vm/uma.h>
99
100#include <machine/cache.h>
101#include <machine/md_var.h>
102#include <machine/tlb.h>
103
104#undef PMAP_DEBUG
105
106#if !defined(DIAGNOSTIC)
107#define	PMAP_INLINE __inline
108#else
109#define	PMAP_INLINE
110#endif
111
112#ifdef PV_STATS
113#define PV_STAT(x)	do { x ; } while (0)
114#else
115#define PV_STAT(x)	do { } while (0)
116#endif
117
118/*
119 * Get PDEs and PTEs for user/kernel address space
120 */
121#define	pmap_seg_index(v)	(((v) >> SEGSHIFT) & (NPDEPG - 1))
122#define	pmap_pde_index(v)	(((v) >> PDRSHIFT) & (NPDEPG - 1))
123#define	pmap_pte_index(v)	(((v) >> PAGE_SHIFT) & (NPTEPG - 1))
124#define	pmap_pde_pindex(v)	((v) >> PDRSHIFT)
125
126#ifdef __mips_n64
127#define	NUPDE			(NPDEPG * NPDEPG)
128#define	NUSERPGTBLS		(NUPDE + NPDEPG)
129#else
130#define	NUPDE			(NPDEPG)
131#define	NUSERPGTBLS		(NUPDE)
132#endif
133
134#define	is_kernel_pmap(x)	((x) == kernel_pmap)
135
136struct pmap kernel_pmap_store;
137pd_entry_t *kernel_segmap;
138
139vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
140vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
141
142static int nkpt;
143unsigned pmap_max_asid;		/* max ASID supported by the system */
144
145#define	PMAP_ASID_RESERVED	0
146
147vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
148
149static void pmap_asid_alloc(pmap_t pmap);
150
151/*
152 * Isolate the global pv list lock from data and other locks to prevent false
153 * sharing within the cache.
154 */
155static struct {
156	struct rwlock	lock;
157	char		padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
158} pvh_global __aligned(CACHE_LINE_SIZE);
159
160#define	pvh_global_lock	pvh_global.lock
161
162/*
163 * Data for the pv entry allocation mechanism
164 */
165static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
166static int pv_entry_count;
167
168static void free_pv_chunk(struct pv_chunk *pc);
169static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
170static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
171static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
172static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
173static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
174    vm_offset_t va);
175static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
176    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
177static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
178    pd_entry_t pde);
179static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
180static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
181static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
182    vm_offset_t va, vm_page_t m);
183static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
184static void pmap_invalidate_all(pmap_t pmap);
185static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
186static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m);
187
188static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
189static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
190static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
191static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot);
192
193static void pmap_invalidate_page_action(void *arg);
194static void pmap_invalidate_range_action(void *arg);
195static void pmap_update_page_action(void *arg);
196
197#ifndef __mips_n64
198/*
199 * This structure is for high memory (memory above 512Meg in 32 bit) support.
200 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to
201 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc.
202 *
203 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To
204 * access a highmem physical address on a CPU, we map the physical address to
205 * the reserved virtual address for the CPU in the kernel pagetable.  This is
206 * done with interrupts disabled(although a spinlock and sched_pin would be
207 * sufficient).
208 */
209struct local_sysmaps {
210	vm_offset_t	base;
211	uint32_t	saved_intr;
212	uint16_t	valid1, valid2;
213};
214static struct local_sysmaps sysmap_lmem[MAXCPU];
215
216static __inline void
217pmap_alloc_lmem_map(void)
218{
219	int i;
220
221	for (i = 0; i < MAXCPU; i++) {
222		sysmap_lmem[i].base = virtual_avail;
223		virtual_avail += PAGE_SIZE * 2;
224		sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
225	}
226}
227
228static __inline vm_offset_t
229pmap_lmem_map1(vm_paddr_t phys)
230{
231	struct local_sysmaps *sysm;
232	pt_entry_t *pte, npte;
233	vm_offset_t va;
234	uint32_t intr;
235	int cpu;
236
237	intr = intr_disable();
238	cpu = PCPU_GET(cpuid);
239	sysm = &sysmap_lmem[cpu];
240	sysm->saved_intr = intr;
241	va = sysm->base;
242	npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
243	pte = pmap_pte(kernel_pmap, va);
244	*pte = npte;
245	sysm->valid1 = 1;
246	return (va);
247}
248
249static __inline vm_offset_t
250pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
251{
252	struct local_sysmaps *sysm;
253	pt_entry_t *pte, npte;
254	vm_offset_t va1, va2;
255	uint32_t intr;
256	int cpu;
257
258	intr = intr_disable();
259	cpu = PCPU_GET(cpuid);
260	sysm = &sysmap_lmem[cpu];
261	sysm->saved_intr = intr;
262	va1 = sysm->base;
263	va2 = sysm->base + PAGE_SIZE;
264	npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
265	pte = pmap_pte(kernel_pmap, va1);
266	*pte = npte;
267	npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
268	pte = pmap_pte(kernel_pmap, va2);
269	*pte = npte;
270	sysm->valid1 = 1;
271	sysm->valid2 = 1;
272	return (va1);
273}
274
275static __inline void
276pmap_lmem_unmap(void)
277{
278	struct local_sysmaps *sysm;
279	pt_entry_t *pte;
280	int cpu;
281
282	cpu = PCPU_GET(cpuid);
283	sysm = &sysmap_lmem[cpu];
284	pte = pmap_pte(kernel_pmap, sysm->base);
285	*pte = PTE_G;
286	tlb_invalidate_address(kernel_pmap, sysm->base);
287	sysm->valid1 = 0;
288	if (sysm->valid2) {
289		pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
290		*pte = PTE_G;
291		tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
292		sysm->valid2 = 0;
293	}
294	intr_restore(sysm->saved_intr);
295}
296#else  /* __mips_n64 */
297
298static __inline void
299pmap_alloc_lmem_map(void)
300{
301}
302
303static __inline vm_offset_t
304pmap_lmem_map1(vm_paddr_t phys)
305{
306
307	return (0);
308}
309
310static __inline vm_offset_t
311pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
312{
313
314	return (0);
315}
316
317static __inline vm_offset_t
318pmap_lmem_unmap(void)
319{
320
321	return (0);
322}
323#endif /* !__mips_n64 */
324
325/*
326 * Page table entry lookup routines.
327 */
328static __inline pd_entry_t *
329pmap_segmap(pmap_t pmap, vm_offset_t va)
330{
331
332	return (&pmap->pm_segtab[pmap_seg_index(va)]);
333}
334
335#ifdef __mips_n64
336static __inline pd_entry_t *
337pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
338{
339	pd_entry_t *pde;
340
341	pde = (pd_entry_t *)*pdpe;
342	return (&pde[pmap_pde_index(va)]);
343}
344
345static __inline pd_entry_t *
346pmap_pde(pmap_t pmap, vm_offset_t va)
347{
348	pd_entry_t *pdpe;
349
350	pdpe = pmap_segmap(pmap, va);
351	if (*pdpe == NULL)
352		return (NULL);
353
354	return (pmap_pdpe_to_pde(pdpe, va));
355}
356#else
357static __inline pd_entry_t *
358pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
359{
360
361	return (pdpe);
362}
363
364static __inline
365pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
366{
367
368	return (pmap_segmap(pmap, va));
369}
370#endif
371
372static __inline pt_entry_t *
373pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
374{
375	pt_entry_t *pte;
376
377	pte = (pt_entry_t *)*pde;
378	return (&pte[pmap_pte_index(va)]);
379}
380
381pt_entry_t *
382pmap_pte(pmap_t pmap, vm_offset_t va)
383{
384	pd_entry_t *pde;
385
386	pde = pmap_pde(pmap, va);
387	if (pde == NULL || *pde == NULL)
388		return (NULL);
389
390	return (pmap_pde_to_pte(pde, va));
391}
392
393vm_offset_t
394pmap_steal_memory(vm_size_t size)
395{
396	vm_paddr_t bank_size, pa;
397	vm_offset_t va;
398
399	size = round_page(size);
400	bank_size = phys_avail[1] - phys_avail[0];
401	while (size > bank_size) {
402		int i;
403
404		for (i = 0; phys_avail[i + 2]; i += 2) {
405			phys_avail[i] = phys_avail[i + 2];
406			phys_avail[i + 1] = phys_avail[i + 3];
407		}
408		phys_avail[i] = 0;
409		phys_avail[i + 1] = 0;
410		if (!phys_avail[0])
411			panic("pmap_steal_memory: out of memory");
412		bank_size = phys_avail[1] - phys_avail[0];
413	}
414
415	pa = phys_avail[0];
416	phys_avail[0] += size;
417	if (MIPS_DIRECT_MAPPABLE(pa) == 0)
418		panic("Out of memory below 512Meg?");
419	va = MIPS_PHYS_TO_DIRECT(pa);
420	bzero((caddr_t)va, size);
421	return (va);
422}
423
424/*
425 * Bootstrap the system enough to run with virtual memory.  This
426 * assumes that the phys_avail array has been initialized.
427 */
428static void
429pmap_create_kernel_pagetable(void)
430{
431	int i, j;
432	vm_offset_t ptaddr;
433	pt_entry_t *pte;
434#ifdef __mips_n64
435	pd_entry_t *pde;
436	vm_offset_t pdaddr;
437	int npt, npde;
438#endif
439
440	/*
441	 * Allocate segment table for the kernel
442	 */
443	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
444
445	/*
446	 * Allocate second level page tables for the kernel
447	 */
448#ifdef __mips_n64
449	npde = howmany(NKPT, NPDEPG);
450	pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
451#endif
452	nkpt = NKPT;
453	ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
454
455	/*
456	 * The R[4-7]?00 stores only one copy of the Global bit in the
457	 * translation lookaside buffer for each 2 page entry. Thus invalid
458	 * entrys must have the Global bit set so when Entry LO and Entry HI
459	 * G bits are anded together they will produce a global bit to store
460	 * in the tlb.
461	 */
462	for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
463		*pte = PTE_G;
464
465#ifdef __mips_n64
466	for (i = 0,  npt = nkpt; npt > 0; i++) {
467		kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
468		pde = (pd_entry_t *)kernel_segmap[i];
469
470		for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
471			pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
472	}
473#else
474	for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
475		kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
476#endif
477
478	PMAP_LOCK_INIT(kernel_pmap);
479	kernel_pmap->pm_segtab = kernel_segmap;
480	CPU_FILL(&kernel_pmap->pm_active);
481	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
482	kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
483	kernel_pmap->pm_asid[0].gen = 0;
484	kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
485}
486
487void
488pmap_bootstrap(void)
489{
490	int i;
491	int need_local_mappings = 0;
492
493	/* Sort. */
494again:
495	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
496		/*
497		 * Keep the memory aligned on page boundary.
498		 */
499		phys_avail[i] = round_page(phys_avail[i]);
500		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
501
502		if (i < 2)
503			continue;
504		if (phys_avail[i - 2] > phys_avail[i]) {
505			vm_paddr_t ptemp[2];
506
507			ptemp[0] = phys_avail[i + 0];
508			ptemp[1] = phys_avail[i + 1];
509
510			phys_avail[i + 0] = phys_avail[i - 2];
511			phys_avail[i + 1] = phys_avail[i - 1];
512
513			phys_avail[i - 2] = ptemp[0];
514			phys_avail[i - 1] = ptemp[1];
515			goto again;
516		}
517	}
518
519       	/*
520	 * In 32 bit, we may have memory which cannot be mapped directly.
521	 * This memory will need temporary mapping before it can be
522	 * accessed.
523	 */
524	if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1))
525		need_local_mappings = 1;
526
527	/*
528	 * Copy the phys_avail[] array before we start stealing memory from it.
529	 */
530	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
531		physmem_desc[i] = phys_avail[i];
532		physmem_desc[i + 1] = phys_avail[i + 1];
533	}
534
535	Maxmem = atop(phys_avail[i - 1]);
536
537	if (bootverbose) {
538		printf("Physical memory chunk(s):\n");
539		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
540			vm_paddr_t size;
541
542			size = phys_avail[i + 1] - phys_avail[i];
543			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
544			    (uintmax_t) phys_avail[i],
545			    (uintmax_t) phys_avail[i + 1] - 1,
546			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
547		}
548		printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem));
549	}
550	/*
551	 * Steal the message buffer from the beginning of memory.
552	 */
553	msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
554	msgbufinit(msgbufp, msgbufsize);
555
556	/*
557	 * Steal thread0 kstack.
558	 */
559	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
560
561	virtual_avail = VM_MIN_KERNEL_ADDRESS;
562	virtual_end = VM_MAX_KERNEL_ADDRESS;
563
564#ifdef SMP
565	/*
566	 * Steal some virtual address space to map the pcpu area.
567	 */
568	virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
569	pcpup = (struct pcpu *)virtual_avail;
570	virtual_avail += PAGE_SIZE * 2;
571
572	/*
573	 * Initialize the wired TLB entry mapping the pcpu region for
574	 * the BSP at 'pcpup'. Up until this point we were operating
575	 * with the 'pcpup' for the BSP pointing to a virtual address
576	 * in KSEG0 so there was no need for a TLB mapping.
577	 */
578	mips_pcpu_tlb_init(PCPU_ADDR(0));
579
580	if (bootverbose)
581		printf("pcpu is available at virtual address %p.\n", pcpup);
582#endif
583
584	if (need_local_mappings)
585		pmap_alloc_lmem_map();
586	pmap_create_kernel_pagetable();
587	pmap_max_asid = VMNUM_PIDS;
588	mips_wr_entryhi(0);
589	mips_wr_pagemask(0);
590
591 	/*
592	 * Initialize the global pv list lock.
593	 */
594	rw_init(&pvh_global_lock, "pmap pv global");
595}
596
597/*
598 * Initialize a vm_page's machine-dependent fields.
599 */
600void
601pmap_page_init(vm_page_t m)
602{
603
604	TAILQ_INIT(&m->md.pv_list);
605	m->md.pv_flags = 0;
606}
607
608/*
609 *	Initialize the pmap module.
610 *	Called by vm_init, to initialize any structures that the pmap
611 *	system needs to map virtual memory.
612 */
613void
614pmap_init(void)
615{
616}
617
618/***************************************************
619 * Low level helper routines.....
620 ***************************************************/
621
622#ifdef	SMP
623static __inline void
624pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
625{
626	int	cpuid, cpu, self;
627	cpuset_t active_cpus;
628
629	sched_pin();
630	if (is_kernel_pmap(pmap)) {
631		smp_rendezvous(NULL, fn, NULL, arg);
632		goto out;
633	}
634	/* Force ASID update on inactive CPUs */
635	CPU_FOREACH(cpu) {
636		if (!CPU_ISSET(cpu, &pmap->pm_active))
637			pmap->pm_asid[cpu].gen = 0;
638	}
639	cpuid = PCPU_GET(cpuid);
640	/*
641	 * XXX: barrier/locking for active?
642	 *
643	 * Take a snapshot of active here, any further changes are ignored.
644	 * tlb update/invalidate should be harmless on inactive CPUs
645	 */
646	active_cpus = pmap->pm_active;
647	self = CPU_ISSET(cpuid, &active_cpus);
648	CPU_CLR(cpuid, &active_cpus);
649	/* Optimize for the case where this cpu is the only active one */
650	if (CPU_EMPTY(&active_cpus)) {
651		if (self)
652			fn(arg);
653	} else {
654		if (self)
655			CPU_SET(cpuid, &active_cpus);
656		smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg);
657	}
658out:
659	sched_unpin();
660}
661#else /* !SMP */
662static __inline void
663pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
664{
665	int	cpuid;
666
667	if (is_kernel_pmap(pmap)) {
668		fn(arg);
669		return;
670	}
671	cpuid = PCPU_GET(cpuid);
672	if (!CPU_ISSET(cpuid, &pmap->pm_active))
673		pmap->pm_asid[cpuid].gen = 0;
674	else
675		fn(arg);
676}
677#endif /* SMP */
678
679static void
680pmap_invalidate_all(pmap_t pmap)
681{
682
683	pmap_call_on_active_cpus(pmap,
684	    (void (*)(void *))tlb_invalidate_all_user, pmap);
685}
686
687struct pmap_invalidate_page_arg {
688	pmap_t pmap;
689	vm_offset_t va;
690};
691
692static void
693pmap_invalidate_page_action(void *arg)
694{
695	struct pmap_invalidate_page_arg *p = arg;
696
697	tlb_invalidate_address(p->pmap, p->va);
698}
699
700static void
701pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
702{
703	struct pmap_invalidate_page_arg arg;
704
705	arg.pmap = pmap;
706	arg.va = va;
707	pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg);
708}
709
710struct pmap_invalidate_range_arg {
711	pmap_t pmap;
712	vm_offset_t sva;
713	vm_offset_t eva;
714};
715
716static void
717pmap_invalidate_range_action(void *arg)
718{
719	struct pmap_invalidate_range_arg *p = arg;
720
721	tlb_invalidate_range(p->pmap, p->sva, p->eva);
722}
723
724static void
725pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
726{
727	struct pmap_invalidate_range_arg arg;
728
729	arg.pmap = pmap;
730	arg.sva = sva;
731	arg.eva = eva;
732	pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg);
733}
734
735struct pmap_update_page_arg {
736	pmap_t pmap;
737	vm_offset_t va;
738	pt_entry_t pte;
739};
740
741static void
742pmap_update_page_action(void *arg)
743{
744	struct pmap_update_page_arg *p = arg;
745
746	tlb_update(p->pmap, p->va, p->pte);
747}
748
749static void
750pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
751{
752	struct pmap_update_page_arg arg;
753
754	arg.pmap = pmap;
755	arg.va = va;
756	arg.pte = pte;
757	pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg);
758}
759
760/*
761 *	Routine:	pmap_extract
762 *	Function:
763 *		Extract the physical page address associated
764 *		with the given map/virtual_address pair.
765 */
766vm_paddr_t
767pmap_extract(pmap_t pmap, vm_offset_t va)
768{
769	pt_entry_t *pte;
770	vm_offset_t retval = 0;
771
772	PMAP_LOCK(pmap);
773	pte = pmap_pte(pmap, va);
774	if (pte) {
775		retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
776	}
777	PMAP_UNLOCK(pmap);
778	return (retval);
779}
780
781/*
782 *	Routine:	pmap_extract_and_hold
783 *	Function:
784 *		Atomically extract and hold the physical page
785 *		with the given pmap and virtual address pair
786 *		if that mapping permits the given protection.
787 */
788vm_page_t
789pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
790{
791	pt_entry_t pte, *ptep;
792	vm_paddr_t pa, pte_pa;
793	vm_page_t m;
794
795	m = NULL;
796	pa = 0;
797	PMAP_LOCK(pmap);
798retry:
799	ptep = pmap_pte(pmap, va);
800	if (ptep != NULL) {
801		pte = *ptep;
802		if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
803		    (prot & VM_PROT_WRITE) == 0)) {
804			pte_pa = TLBLO_PTE_TO_PA(pte);
805			if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
806				goto retry;
807			m = PHYS_TO_VM_PAGE(pte_pa);
808			vm_page_hold(m);
809		}
810	}
811	PA_UNLOCK_COND(pa);
812	PMAP_UNLOCK(pmap);
813	return (m);
814}
815
816/***************************************************
817 * Low level mapping routines.....
818 ***************************************************/
819
820/*
821 * add a wired page to the kva
822 */
823void
824pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr)
825{
826	pt_entry_t *pte;
827	pt_entry_t opte, npte;
828
829#ifdef PMAP_DEBUG
830	printf("pmap_kenter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
831#endif
832
833	pte = pmap_pte(kernel_pmap, va);
834	opte = *pte;
835	npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G;
836	*pte = npte;
837	if (pte_test(&opte, PTE_V) && opte != npte)
838		pmap_update_page(kernel_pmap, va, npte);
839}
840
841void
842pmap_kenter(vm_offset_t va, vm_paddr_t pa)
843{
844
845	KASSERT(is_cacheable_mem(pa),
846		("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
847
848	pmap_kenter_attr(va, pa, PTE_C_CACHE);
849}
850
851/*
852 * remove a page from the kernel pagetables
853 */
854 /* PMAP_INLINE */ void
855pmap_kremove(vm_offset_t va)
856{
857	pt_entry_t *pte;
858
859	/*
860	 * Write back all caches from the page being destroyed
861	 */
862	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
863
864	pte = pmap_pte(kernel_pmap, va);
865	*pte = PTE_G;
866	pmap_invalidate_page(kernel_pmap, va);
867}
868
869/*
870 *	Used to map a range of physical addresses into kernel
871 *	virtual address space.
872 *
873 *	The value passed in '*virt' is a suggested virtual address for
874 *	the mapping. Architectures which can support a direct-mapped
875 *	physical to virtual region can return the appropriate address
876 *	within that region, leaving '*virt' unchanged. Other
877 *	architectures should map the pages starting at '*virt' and
878 *	update '*virt' with the first usable address after the mapped
879 *	region.
880 *
881 *	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
882 */
883vm_offset_t
884pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
885{
886	vm_offset_t va, sva;
887
888	if (MIPS_DIRECT_MAPPABLE(end - 1))
889		return (MIPS_PHYS_TO_DIRECT(start));
890
891	va = sva = *virt;
892	while (start < end) {
893		pmap_kenter(va, start);
894		va += PAGE_SIZE;
895		start += PAGE_SIZE;
896	}
897	*virt = va;
898	return (sva);
899}
900
901/*
902 * Add a list of wired pages to the kva
903 * this routine is only used for temporary
904 * kernel mappings that do not need to have
905 * page modification or references recorded.
906 * Note that old mappings are simply written
907 * over.  The page *must* be wired.
908 */
909void
910pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
911{
912	int i;
913	vm_offset_t origva = va;
914
915	for (i = 0; i < count; i++) {
916		pmap_flush_pvcache(m[i]);
917		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
918		va += PAGE_SIZE;
919	}
920
921	mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
922}
923
924/*
925 * this routine jerks page mappings from the
926 * kernel -- it is meant only for temporary mappings.
927 */
928void
929pmap_qremove(vm_offset_t va, int count)
930{
931	pt_entry_t *pte;
932	vm_offset_t origva;
933
934	if (count < 1)
935		return;
936	mips_dcache_wbinv_range_index(va, PAGE_SIZE * count);
937	origva = va;
938	do {
939		pte = pmap_pte(kernel_pmap, va);
940		*pte = PTE_G;
941		va += PAGE_SIZE;
942	} while (--count > 0);
943	pmap_invalidate_range(kernel_pmap, origva, va);
944}
945
946/***************************************************
947 * Page table page management routines.....
948 ***************************************************/
949
950/*
951 * Decrements a page table page's wire count, which is used to record the
952 * number of valid page table entries within the page.  If the wire count
953 * drops to zero, then the page table page is unmapped.  Returns TRUE if the
954 * page table page was unmapped and FALSE otherwise.
955 */
956static PMAP_INLINE boolean_t
957pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
958{
959
960	--m->wire_count;
961	if (m->wire_count == 0) {
962		_pmap_unwire_ptp(pmap, va, m);
963		return (TRUE);
964	} else
965		return (FALSE);
966}
967
968static void
969_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
970{
971	pd_entry_t *pde;
972
973	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
974	/*
975	 * unmap the page table page
976	 */
977#ifdef __mips_n64
978	if (m->pindex < NUPDE)
979		pde = pmap_pde(pmap, va);
980	else
981		pde = pmap_segmap(pmap, va);
982#else
983	pde = pmap_pde(pmap, va);
984#endif
985	*pde = 0;
986	pmap->pm_stats.resident_count--;
987
988#ifdef __mips_n64
989	if (m->pindex < NUPDE) {
990		pd_entry_t *pdp;
991		vm_page_t pdpg;
992
993		/*
994		 * Recursively decrement next level pagetable refcount
995		 */
996		pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
997		pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
998		pmap_unwire_ptp(pmap, va, pdpg);
999	}
1000#endif
1001
1002	/*
1003	 * If the page is finally unwired, simply free it.
1004	 */
1005	vm_page_free_zero(m);
1006	atomic_subtract_int(&cnt.v_wire_count, 1);
1007}
1008
1009/*
1010 * After removing a page table entry, this routine is used to
1011 * conditionally free the page, and manage the hold/wire counts.
1012 */
1013static int
1014pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
1015{
1016	vm_page_t mpte;
1017
1018	if (va >= VM_MAXUSER_ADDRESS)
1019		return (0);
1020	KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0"));
1021	mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde));
1022	return (pmap_unwire_ptp(pmap, va, mpte));
1023}
1024
1025void
1026pmap_pinit0(pmap_t pmap)
1027{
1028	int i;
1029
1030	PMAP_LOCK_INIT(pmap);
1031	pmap->pm_segtab = kernel_segmap;
1032	CPU_ZERO(&pmap->pm_active);
1033	for (i = 0; i < MAXCPU; i++) {
1034		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1035		pmap->pm_asid[i].gen = 0;
1036	}
1037	PCPU_SET(curpmap, pmap);
1038	TAILQ_INIT(&pmap->pm_pvchunk);
1039	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1040}
1041
1042void
1043pmap_grow_direct_page_cache()
1044{
1045
1046#ifdef __mips_n64
1047	vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS);
1048#else
1049	vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
1050#endif
1051}
1052
1053vm_page_t
1054pmap_alloc_direct_page(unsigned int index, int req)
1055{
1056	vm_page_t m;
1057
1058	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
1059	    VM_ALLOC_ZERO);
1060	if (m == NULL)
1061		return (NULL);
1062
1063	if ((m->flags & PG_ZERO) == 0)
1064		pmap_zero_page(m);
1065
1066	m->pindex = index;
1067	return (m);
1068}
1069
1070/*
1071 * Initialize a preallocated and zeroed pmap structure,
1072 * such as one in a vmspace structure.
1073 */
1074int
1075pmap_pinit(pmap_t pmap)
1076{
1077	vm_offset_t ptdva;
1078	vm_page_t ptdpg;
1079	int i;
1080
1081	PMAP_LOCK_INIT(pmap);
1082
1083	/*
1084	 * allocate the page directory page
1085	 */
1086	while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL)
1087	       pmap_grow_direct_page_cache();
1088
1089	ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1090	pmap->pm_segtab = (pd_entry_t *)ptdva;
1091	CPU_ZERO(&pmap->pm_active);
1092	for (i = 0; i < MAXCPU; i++) {
1093		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1094		pmap->pm_asid[i].gen = 0;
1095	}
1096	TAILQ_INIT(&pmap->pm_pvchunk);
1097	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1098
1099	return (1);
1100}
1101
1102/*
1103 * this routine is called if the page table page is not
1104 * mapped correctly.
1105 */
1106static vm_page_t
1107_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1108{
1109	vm_offset_t pageva;
1110	vm_page_t m;
1111
1112	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1113	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1114	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1115
1116	/*
1117	 * Find or fabricate a new pagetable page
1118	 */
1119	if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
1120		if (flags & M_WAITOK) {
1121			PMAP_UNLOCK(pmap);
1122			rw_wunlock(&pvh_global_lock);
1123			pmap_grow_direct_page_cache();
1124			rw_wlock(&pvh_global_lock);
1125			PMAP_LOCK(pmap);
1126		}
1127
1128		/*
1129		 * Indicate the need to retry.	While waiting, the page
1130		 * table page may have been allocated.
1131		 */
1132		return (NULL);
1133	}
1134
1135	/*
1136	 * Map the pagetable page into the process address space, if it
1137	 * isn't already there.
1138	 */
1139	pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1140
1141#ifdef __mips_n64
1142	if (ptepindex >= NUPDE) {
1143		pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1144	} else {
1145		pd_entry_t *pdep, *pde;
1146		int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1147		int pdeindex = ptepindex & (NPDEPG - 1);
1148		vm_page_t pg;
1149
1150		pdep = &pmap->pm_segtab[segindex];
1151		if (*pdep == NULL) {
1152			/* recurse for allocating page dir */
1153			if (_pmap_allocpte(pmap, NUPDE + segindex,
1154			    flags) == NULL) {
1155				/* alloc failed, release current */
1156				--m->wire_count;
1157				atomic_subtract_int(&cnt.v_wire_count, 1);
1158				vm_page_free_zero(m);
1159				return (NULL);
1160			}
1161		} else {
1162			pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1163			pg->wire_count++;
1164		}
1165		/* Next level entry */
1166		pde = (pd_entry_t *)*pdep;
1167		pde[pdeindex] = (pd_entry_t)pageva;
1168	}
1169#else
1170	pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1171#endif
1172	pmap->pm_stats.resident_count++;
1173	return (m);
1174}
1175
1176static vm_page_t
1177pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1178{
1179	unsigned ptepindex;
1180	pd_entry_t *pde;
1181	vm_page_t m;
1182
1183	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1184	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1185	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1186
1187	/*
1188	 * Calculate pagetable page index
1189	 */
1190	ptepindex = pmap_pde_pindex(va);
1191retry:
1192	/*
1193	 * Get the page directory entry
1194	 */
1195	pde = pmap_pde(pmap, va);
1196
1197	/*
1198	 * If the page table page is mapped, we just increment the hold
1199	 * count, and activate it.
1200	 */
1201	if (pde != NULL && *pde != NULL) {
1202		m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1203		m->wire_count++;
1204	} else {
1205		/*
1206		 * Here if the pte page isn't mapped, or if it has been
1207		 * deallocated.
1208		 */
1209		m = _pmap_allocpte(pmap, ptepindex, flags);
1210		if (m == NULL && (flags & M_WAITOK))
1211			goto retry;
1212	}
1213	return (m);
1214}
1215
1216
1217/***************************************************
1218 * Pmap allocation/deallocation routines.
1219 ***************************************************/
1220
1221/*
1222 * Release any resources held by the given physical map.
1223 * Called when a pmap initialized by pmap_pinit is being released.
1224 * Should only be called if the map contains no valid mappings.
1225 */
1226void
1227pmap_release(pmap_t pmap)
1228{
1229	vm_offset_t ptdva;
1230	vm_page_t ptdpg;
1231
1232	KASSERT(pmap->pm_stats.resident_count == 0,
1233	    ("pmap_release: pmap resident count %ld != 0",
1234	    pmap->pm_stats.resident_count));
1235
1236	ptdva = (vm_offset_t)pmap->pm_segtab;
1237	ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1238
1239	ptdpg->wire_count--;
1240	atomic_subtract_int(&cnt.v_wire_count, 1);
1241	vm_page_free_zero(ptdpg);
1242	PMAP_LOCK_DESTROY(pmap);
1243}
1244
1245/*
1246 * grow the number of kernel page table entries, if needed
1247 */
1248void
1249pmap_growkernel(vm_offset_t addr)
1250{
1251	vm_page_t nkpg;
1252	pd_entry_t *pde, *pdpe;
1253	pt_entry_t *pte;
1254	int i;
1255
1256	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1257	addr = roundup2(addr, NBSEG);
1258	if (addr - 1 >= kernel_map->max_offset)
1259		addr = kernel_map->max_offset;
1260	while (kernel_vm_end < addr) {
1261		pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1262#ifdef __mips_n64
1263		if (*pdpe == 0) {
1264			/* new intermediate page table entry */
1265			nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1266			if (nkpg == NULL)
1267				panic("pmap_growkernel: no memory to grow kernel");
1268			*pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1269			continue; /* try again */
1270		}
1271#endif
1272		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1273		if (*pde != 0) {
1274			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1275			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1276				kernel_vm_end = kernel_map->max_offset;
1277				break;
1278			}
1279			continue;
1280		}
1281
1282		/*
1283		 * This index is bogus, but out of the way
1284		 */
1285		nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1286		if (!nkpg)
1287			panic("pmap_growkernel: no memory to grow kernel");
1288		nkpt++;
1289		*pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1290
1291		/*
1292		 * The R[4-7]?00 stores only one copy of the Global bit in
1293		 * the translation lookaside buffer for each 2 page entry.
1294		 * Thus invalid entrys must have the Global bit set so when
1295		 * Entry LO and Entry HI G bits are anded together they will
1296		 * produce a global bit to store in the tlb.
1297		 */
1298		pte = (pt_entry_t *)*pde;
1299		for (i = 0; i < NPTEPG; i++)
1300			pte[i] = PTE_G;
1301
1302		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1303		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1304			kernel_vm_end = kernel_map->max_offset;
1305			break;
1306		}
1307	}
1308}
1309
1310/***************************************************
1311 * page management routines.
1312 ***************************************************/
1313
1314CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1315#ifdef __mips_n64
1316CTASSERT(_NPCM == 3);
1317CTASSERT(_NPCPV == 168);
1318#else
1319CTASSERT(_NPCM == 11);
1320CTASSERT(_NPCPV == 336);
1321#endif
1322
1323static __inline struct pv_chunk *
1324pv_to_chunk(pv_entry_t pv)
1325{
1326
1327	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1328}
1329
1330#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1331
1332#ifdef __mips_n64
1333#define	PC_FREE0_1	0xfffffffffffffffful
1334#define	PC_FREE2	0x000000fffffffffful
1335#else
1336#define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
1337#define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
1338#endif
1339
1340static const u_long pc_freemask[_NPCM] = {
1341#ifdef __mips_n64
1342	PC_FREE0_1, PC_FREE0_1, PC_FREE2
1343#else
1344	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1345	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1346	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1347	PC_FREE0_9, PC_FREE10
1348#endif
1349};
1350
1351static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
1352
1353SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1354    "Current number of pv entries");
1355
1356#ifdef PV_STATS
1357static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1358
1359SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1360    "Current number of pv entry chunks");
1361SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1362    "Current number of pv entry chunks allocated");
1363SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1364    "Current number of pv entry chunks frees");
1365SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1366    "Number of times tried to get a chunk page but failed.");
1367
1368static long pv_entry_frees, pv_entry_allocs;
1369static int pv_entry_spare;
1370
1371SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1372    "Current number of pv entry frees");
1373SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1374    "Current number of pv entry allocs");
1375SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1376    "Current number of spare pv entries");
1377#endif
1378
1379/*
1380 * We are in a serious low memory condition.  Resort to
1381 * drastic measures to free some pages so we can allocate
1382 * another pv entry chunk.
1383 */
1384static vm_page_t
1385pmap_pv_reclaim(pmap_t locked_pmap)
1386{
1387	struct pch newtail;
1388	struct pv_chunk *pc;
1389	pd_entry_t *pde;
1390	pmap_t pmap;
1391	pt_entry_t *pte, oldpte;
1392	pv_entry_t pv;
1393	vm_offset_t va;
1394	vm_page_t m, m_pc;
1395	u_long inuse;
1396	int bit, field, freed, idx;
1397
1398	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1399	pmap = NULL;
1400	m_pc = NULL;
1401	TAILQ_INIT(&newtail);
1402	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
1403		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1404		if (pmap != pc->pc_pmap) {
1405			if (pmap != NULL) {
1406				pmap_invalidate_all(pmap);
1407				if (pmap != locked_pmap)
1408					PMAP_UNLOCK(pmap);
1409			}
1410			pmap = pc->pc_pmap;
1411			/* Avoid deadlock and lock recursion. */
1412			if (pmap > locked_pmap)
1413				PMAP_LOCK(pmap);
1414			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
1415				pmap = NULL;
1416				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1417				continue;
1418			}
1419		}
1420
1421		/*
1422		 * Destroy every non-wired, 4 KB page mapping in the chunk.
1423		 */
1424		freed = 0;
1425		for (field = 0; field < _NPCM; field++) {
1426			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1427			    inuse != 0; inuse &= ~(1UL << bit)) {
1428				bit = ffsl(inuse) - 1;
1429				idx = field * sizeof(inuse) * NBBY + bit;
1430				pv = &pc->pc_pventry[idx];
1431				va = pv->pv_va;
1432				pde = pmap_pde(pmap, va);
1433				KASSERT(pde != NULL && *pde != 0,
1434				    ("pmap_pv_reclaim: pde"));
1435				pte = pmap_pde_to_pte(pde, va);
1436				oldpte = *pte;
1437				KASSERT(!pte_test(&oldpte, PTE_W),
1438				    ("wired pte for unwired page"));
1439				if (is_kernel_pmap(pmap))
1440					*pte = PTE_G;
1441				else
1442					*pte = 0;
1443				pmap_invalidate_page(pmap, va);
1444				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte));
1445				if (pte_test(&oldpte, PTE_D))
1446					vm_page_dirty(m);
1447				if (m->md.pv_flags & PV_TABLE_REF)
1448					vm_page_aflag_set(m, PGA_REFERENCED);
1449				m->md.pv_flags &= ~PV_TABLE_REF;
1450				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1451				if (TAILQ_EMPTY(&m->md.pv_list))
1452					vm_page_aflag_clear(m, PGA_WRITEABLE);
1453				pc->pc_map[field] |= 1UL << bit;
1454				pmap_unuse_pt(pmap, va, *pde);
1455				freed++;
1456			}
1457		}
1458		if (freed == 0) {
1459			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1460			continue;
1461		}
1462		/* Every freed mapping is for a 4 KB page. */
1463		pmap->pm_stats.resident_count -= freed;
1464		PV_STAT(pv_entry_frees += freed);
1465		PV_STAT(pv_entry_spare += freed);
1466		pv_entry_count -= freed;
1467		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1468		for (field = 0; field < _NPCM; field++)
1469			if (pc->pc_map[field] != pc_freemask[field]) {
1470				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1471				    pc_list);
1472				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1473
1474				/*
1475				 * One freed pv entry in locked_pmap is
1476				 * sufficient.
1477				 */
1478				if (pmap == locked_pmap)
1479					goto out;
1480				break;
1481			}
1482		if (field == _NPCM) {
1483			PV_STAT(pv_entry_spare -= _NPCPV);
1484			PV_STAT(pc_chunk_count--);
1485			PV_STAT(pc_chunk_frees++);
1486			/* Entire chunk is free; return it. */
1487			m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(
1488			    (vm_offset_t)pc));
1489			break;
1490		}
1491	}
1492out:
1493	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
1494	if (pmap != NULL) {
1495		pmap_invalidate_all(pmap);
1496		if (pmap != locked_pmap)
1497			PMAP_UNLOCK(pmap);
1498	}
1499	return (m_pc);
1500}
1501
1502/*
1503 * free the pv_entry back to the free list
1504 */
1505static void
1506free_pv_entry(pmap_t pmap, pv_entry_t pv)
1507{
1508	struct pv_chunk *pc;
1509	int bit, field, idx;
1510
1511	rw_assert(&pvh_global_lock, RA_WLOCKED);
1512	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1513	PV_STAT(pv_entry_frees++);
1514	PV_STAT(pv_entry_spare++);
1515	pv_entry_count--;
1516	pc = pv_to_chunk(pv);
1517	idx = pv - &pc->pc_pventry[0];
1518	field = idx / (sizeof(u_long) * NBBY);
1519	bit = idx % (sizeof(u_long) * NBBY);
1520	pc->pc_map[field] |= 1ul << bit;
1521	for (idx = 0; idx < _NPCM; idx++)
1522		if (pc->pc_map[idx] != pc_freemask[idx]) {
1523			/*
1524			 * 98% of the time, pc is already at the head of the
1525			 * list.  If it isn't already, move it to the head.
1526			 */
1527			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
1528			    pc)) {
1529				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1530				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1531				    pc_list);
1532			}
1533			return;
1534		}
1535	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1536	free_pv_chunk(pc);
1537}
1538
1539static void
1540free_pv_chunk(struct pv_chunk *pc)
1541{
1542	vm_page_t m;
1543
1544 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1545	PV_STAT(pv_entry_spare -= _NPCPV);
1546	PV_STAT(pc_chunk_count--);
1547	PV_STAT(pc_chunk_frees++);
1548	/* entire chunk is free, return it */
1549	m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc));
1550	vm_page_unwire(m, 0);
1551	vm_page_free(m);
1552}
1553
1554/*
1555 * get a new pv_entry, allocating a block from the system
1556 * when needed.
1557 */
1558static pv_entry_t
1559get_pv_entry(pmap_t pmap, boolean_t try)
1560{
1561	struct pv_chunk *pc;
1562	pv_entry_t pv;
1563	vm_page_t m;
1564	int bit, field, idx;
1565
1566	rw_assert(&pvh_global_lock, RA_WLOCKED);
1567	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1568	PV_STAT(pv_entry_allocs++);
1569	pv_entry_count++;
1570retry:
1571	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1572	if (pc != NULL) {
1573		for (field = 0; field < _NPCM; field++) {
1574			if (pc->pc_map[field]) {
1575				bit = ffsl(pc->pc_map[field]) - 1;
1576				break;
1577			}
1578		}
1579		if (field < _NPCM) {
1580			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
1581			pv = &pc->pc_pventry[idx];
1582			pc->pc_map[field] &= ~(1ul << bit);
1583			/* If this was the last item, move it to tail */
1584			for (field = 0; field < _NPCM; field++)
1585				if (pc->pc_map[field] != 0) {
1586					PV_STAT(pv_entry_spare--);
1587					return (pv);	/* not full, return */
1588				}
1589			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1590			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
1591			PV_STAT(pv_entry_spare--);
1592			return (pv);
1593		}
1594	}
1595	/* No free items, allocate another chunk */
1596	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
1597	    VM_ALLOC_WIRED);
1598	if (m == NULL) {
1599		if (try) {
1600			pv_entry_count--;
1601			PV_STAT(pc_chunk_tryfail++);
1602			return (NULL);
1603		}
1604		m = pmap_pv_reclaim(pmap);
1605		if (m == NULL)
1606			goto retry;
1607	}
1608	PV_STAT(pc_chunk_count++);
1609	PV_STAT(pc_chunk_allocs++);
1610	pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1611	pc->pc_pmap = pmap;
1612	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
1613	for (field = 1; field < _NPCM; field++)
1614		pc->pc_map[field] = pc_freemask[field];
1615	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1616	pv = &pc->pc_pventry[0];
1617	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1618	PV_STAT(pv_entry_spare += _NPCPV - 1);
1619	return (pv);
1620}
1621
1622static pv_entry_t
1623pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1624{
1625	pv_entry_t pv;
1626
1627	rw_assert(&pvh_global_lock, RA_WLOCKED);
1628	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1629		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1630			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1631			break;
1632		}
1633	}
1634	return (pv);
1635}
1636
1637static void
1638pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1639{
1640	pv_entry_t pv;
1641
1642	pv = pmap_pvh_remove(pvh, pmap, va);
1643	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1644	     (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)),
1645	     (u_long)va));
1646	free_pv_entry(pmap, pv);
1647}
1648
1649static void
1650pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1651{
1652
1653	rw_assert(&pvh_global_lock, RA_WLOCKED);
1654	pmap_pvh_free(&m->md, pmap, va);
1655	if (TAILQ_EMPTY(&m->md.pv_list))
1656		vm_page_aflag_clear(m, PGA_WRITEABLE);
1657}
1658
1659/*
1660 * Conditionally create a pv entry.
1661 */
1662static boolean_t
1663pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1664    vm_page_t m)
1665{
1666	pv_entry_t pv;
1667
1668	rw_assert(&pvh_global_lock, RA_WLOCKED);
1669	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1670	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1671		pv->pv_va = va;
1672		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1673		return (TRUE);
1674	} else
1675		return (FALSE);
1676}
1677
1678/*
1679 * pmap_remove_pte: do the things to unmap a page in a process
1680 */
1681static int
1682pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
1683    pd_entry_t pde)
1684{
1685	pt_entry_t oldpte;
1686	vm_page_t m;
1687	vm_paddr_t pa;
1688
1689	rw_assert(&pvh_global_lock, RA_WLOCKED);
1690	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1691
1692	/*
1693	 * Write back all cache lines from the page being unmapped.
1694	 */
1695	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1696
1697	oldpte = *ptq;
1698	if (is_kernel_pmap(pmap))
1699		*ptq = PTE_G;
1700	else
1701		*ptq = 0;
1702
1703	if (pte_test(&oldpte, PTE_W))
1704		pmap->pm_stats.wired_count -= 1;
1705
1706	pmap->pm_stats.resident_count -= 1;
1707
1708	if (pte_test(&oldpte, PTE_MANAGED)) {
1709		pa = TLBLO_PTE_TO_PA(oldpte);
1710		m = PHYS_TO_VM_PAGE(pa);
1711		if (pte_test(&oldpte, PTE_D)) {
1712			KASSERT(!pte_test(&oldpte, PTE_RO),
1713			    ("%s: modified page not writable: va: %p, pte: %#jx",
1714			    __func__, (void *)va, (uintmax_t)oldpte));
1715			vm_page_dirty(m);
1716		}
1717		if (m->md.pv_flags & PV_TABLE_REF)
1718			vm_page_aflag_set(m, PGA_REFERENCED);
1719		m->md.pv_flags &= ~PV_TABLE_REF;
1720
1721		pmap_remove_entry(pmap, m, va);
1722	}
1723	return (pmap_unuse_pt(pmap, va, pde));
1724}
1725
1726/*
1727 * Remove a single page from a process address space
1728 */
1729static void
1730pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1731{
1732	pd_entry_t *pde;
1733	pt_entry_t *ptq;
1734
1735	rw_assert(&pvh_global_lock, RA_WLOCKED);
1736	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1737	pde = pmap_pde(pmap, va);
1738	if (pde == NULL || *pde == 0)
1739		return;
1740	ptq = pmap_pde_to_pte(pde, va);
1741
1742	/*
1743	 * If there is no pte for this address, just skip it!
1744	 */
1745	if (!pte_test(ptq, PTE_V))
1746		return;
1747
1748	(void)pmap_remove_pte(pmap, ptq, va, *pde);
1749	pmap_invalidate_page(pmap, va);
1750}
1751
1752/*
1753 *	Remove the given range of addresses from the specified map.
1754 *
1755 *	It is assumed that the start and end are properly
1756 *	rounded to the page size.
1757 */
1758void
1759pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1760{
1761	pd_entry_t *pde, *pdpe;
1762	pt_entry_t *pte;
1763	vm_offset_t va, va_next;
1764
1765	/*
1766	 * Perform an unsynchronized read.  This is, however, safe.
1767	 */
1768	if (pmap->pm_stats.resident_count == 0)
1769		return;
1770
1771	rw_wlock(&pvh_global_lock);
1772	PMAP_LOCK(pmap);
1773
1774	/*
1775	 * special handling of removing one page.  a very common operation
1776	 * and easy to short circuit some code.
1777	 */
1778	if ((sva + PAGE_SIZE) == eva) {
1779		pmap_remove_page(pmap, sva);
1780		goto out;
1781	}
1782	for (; sva < eva; sva = va_next) {
1783		pdpe = pmap_segmap(pmap, sva);
1784#ifdef __mips_n64
1785		if (*pdpe == 0) {
1786			va_next = (sva + NBSEG) & ~SEGMASK;
1787			if (va_next < sva)
1788				va_next = eva;
1789			continue;
1790		}
1791#endif
1792		va_next = (sva + NBPDR) & ~PDRMASK;
1793		if (va_next < sva)
1794			va_next = eva;
1795
1796		pde = pmap_pdpe_to_pde(pdpe, sva);
1797		if (*pde == NULL)
1798			continue;
1799
1800		/*
1801		 * Limit our scan to either the end of the va represented
1802		 * by the current page table page, or to the end of the
1803		 * range being removed.
1804		 */
1805		if (va_next > eva)
1806			va_next = eva;
1807
1808		va = va_next;
1809		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1810		    sva += PAGE_SIZE) {
1811			if (!pte_test(pte, PTE_V)) {
1812				if (va != va_next) {
1813					pmap_invalidate_range(pmap, va, sva);
1814					va = va_next;
1815				}
1816				continue;
1817			}
1818			if (va == va_next)
1819				va = sva;
1820			if (pmap_remove_pte(pmap, pte, sva, *pde)) {
1821				sva += PAGE_SIZE;
1822				break;
1823			}
1824		}
1825		if (va != va_next)
1826			pmap_invalidate_range(pmap, va, sva);
1827	}
1828out:
1829	rw_wunlock(&pvh_global_lock);
1830	PMAP_UNLOCK(pmap);
1831}
1832
1833/*
1834 *	Routine:	pmap_remove_all
1835 *	Function:
1836 *		Removes this physical page from
1837 *		all physical maps in which it resides.
1838 *		Reflects back modify bits to the pager.
1839 *
1840 *	Notes:
1841 *		Original versions of this routine were very
1842 *		inefficient because they iteratively called
1843 *		pmap_remove (slow...)
1844 */
1845
1846void
1847pmap_remove_all(vm_page_t m)
1848{
1849	pv_entry_t pv;
1850	pmap_t pmap;
1851	pd_entry_t *pde;
1852	pt_entry_t *pte, tpte;
1853
1854	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1855	    ("pmap_remove_all: page %p is not managed", m));
1856	rw_wlock(&pvh_global_lock);
1857
1858	if (m->md.pv_flags & PV_TABLE_REF)
1859		vm_page_aflag_set(m, PGA_REFERENCED);
1860
1861	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1862		pmap = PV_PMAP(pv);
1863		PMAP_LOCK(pmap);
1864
1865		/*
1866		 * If it's last mapping writeback all caches from
1867		 * the page being destroyed
1868	 	 */
1869		if (TAILQ_NEXT(pv, pv_list) == NULL)
1870			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1871
1872		pmap->pm_stats.resident_count--;
1873
1874		pde = pmap_pde(pmap, pv->pv_va);
1875		KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde"));
1876		pte = pmap_pde_to_pte(pde, pv->pv_va);
1877
1878		tpte = *pte;
1879		if (is_kernel_pmap(pmap))
1880			*pte = PTE_G;
1881		else
1882			*pte = 0;
1883
1884		if (pte_test(&tpte, PTE_W))
1885			pmap->pm_stats.wired_count--;
1886
1887		/*
1888		 * Update the vm_page_t clean and reference bits.
1889		 */
1890		if (pte_test(&tpte, PTE_D)) {
1891			KASSERT(!pte_test(&tpte, PTE_RO),
1892			    ("%s: modified page not writable: va: %p, pte: %#jx",
1893			    __func__, (void *)pv->pv_va, (uintmax_t)tpte));
1894			vm_page_dirty(m);
1895		}
1896		pmap_invalidate_page(pmap, pv->pv_va);
1897
1898		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1899		pmap_unuse_pt(pmap, pv->pv_va, *pde);
1900		free_pv_entry(pmap, pv);
1901		PMAP_UNLOCK(pmap);
1902	}
1903
1904	vm_page_aflag_clear(m, PGA_WRITEABLE);
1905	m->md.pv_flags &= ~PV_TABLE_REF;
1906	rw_wunlock(&pvh_global_lock);
1907}
1908
1909/*
1910 *	Set the physical protection on the
1911 *	specified range of this map as requested.
1912 */
1913void
1914pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1915{
1916	pt_entry_t *pte;
1917	pd_entry_t *pde, *pdpe;
1918	vm_offset_t va_next;
1919
1920	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1921		pmap_remove(pmap, sva, eva);
1922		return;
1923	}
1924	if (prot & VM_PROT_WRITE)
1925		return;
1926
1927	rw_wlock(&pvh_global_lock);
1928	PMAP_LOCK(pmap);
1929	for (; sva < eva; sva = va_next) {
1930		pt_entry_t pbits;
1931		vm_page_t m;
1932		vm_paddr_t pa;
1933
1934		pdpe = pmap_segmap(pmap, sva);
1935#ifdef __mips_n64
1936		if (*pdpe == 0) {
1937			va_next = (sva + NBSEG) & ~SEGMASK;
1938			if (va_next < sva)
1939				va_next = eva;
1940			continue;
1941		}
1942#endif
1943		va_next = (sva + NBPDR) & ~PDRMASK;
1944		if (va_next < sva)
1945			va_next = eva;
1946
1947		pde = pmap_pdpe_to_pde(pdpe, sva);
1948		if (*pde == NULL)
1949			continue;
1950		if (va_next > eva)
1951			va_next = eva;
1952
1953		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1954		     sva += PAGE_SIZE) {
1955
1956			/* Skip invalid PTEs */
1957			if (!pte_test(pte, PTE_V))
1958				continue;
1959			pbits = *pte;
1960			if (pte_test(&pbits, PTE_MANAGED | PTE_D)) {
1961				pa = TLBLO_PTE_TO_PA(pbits);
1962				m = PHYS_TO_VM_PAGE(pa);
1963				vm_page_dirty(m);
1964			}
1965			pte_clear(&pbits, PTE_D);
1966			pte_set(&pbits, PTE_RO);
1967
1968			if (pbits != *pte) {
1969				*pte = pbits;
1970				pmap_update_page(pmap, sva, pbits);
1971			}
1972		}
1973	}
1974	rw_wunlock(&pvh_global_lock);
1975	PMAP_UNLOCK(pmap);
1976}
1977
1978/*
1979 *	Insert the given physical page (p) at
1980 *	the specified virtual address (v) in the
1981 *	target physical map with the protection requested.
1982 *
1983 *	If specified, the page will be wired down, meaning
1984 *	that the related pte can not be reclaimed.
1985 *
1986 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1987 *	or lose information.  That is, this routine must actually
1988 *	insert this page into the given map NOW.
1989 */
1990void
1991pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1992    vm_prot_t prot, boolean_t wired)
1993{
1994	vm_paddr_t pa, opa;
1995	pt_entry_t *pte;
1996	pt_entry_t origpte, newpte;
1997	pv_entry_t pv;
1998	vm_page_t mpte, om;
1999
2000	va &= ~PAGE_MASK;
2001 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
2002	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
2003	    va >= kmi.clean_eva,
2004	    ("pmap_enter: managed mapping within the clean submap"));
2005	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
2006	    ("pmap_enter: page %p is not busy", m));
2007	pa = VM_PAGE_TO_PHYS(m);
2008	newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot);
2009	if (wired)
2010		newpte |= PTE_W;
2011	if (is_kernel_pmap(pmap))
2012		newpte |= PTE_G;
2013	if (is_cacheable_mem(pa))
2014		newpte |= PTE_C_CACHE;
2015	else
2016		newpte |= PTE_C_UNCACHED;
2017
2018	mpte = NULL;
2019
2020	rw_wlock(&pvh_global_lock);
2021	PMAP_LOCK(pmap);
2022
2023	/*
2024	 * In the case that a page table page is not resident, we are
2025	 * creating it here.
2026	 */
2027	if (va < VM_MAXUSER_ADDRESS) {
2028		mpte = pmap_allocpte(pmap, va, M_WAITOK);
2029	}
2030	pte = pmap_pte(pmap, va);
2031
2032	/*
2033	 * Page Directory table entry not valid, we need a new PT page
2034	 */
2035	if (pte == NULL) {
2036		panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
2037		    (void *)pmap->pm_segtab, (void *)va);
2038	}
2039	om = NULL;
2040	origpte = *pte;
2041	opa = TLBLO_PTE_TO_PA(origpte);
2042
2043	/*
2044	 * Mapping has not changed, must be protection or wiring change.
2045	 */
2046	if (pte_test(&origpte, PTE_V) && opa == pa) {
2047		/*
2048		 * Wiring change, just update stats. We don't worry about
2049		 * wiring PT pages as they remain resident as long as there
2050		 * are valid mappings in them. Hence, if a user page is
2051		 * wired, the PT page will be also.
2052		 */
2053		if (wired && !pte_test(&origpte, PTE_W))
2054			pmap->pm_stats.wired_count++;
2055		else if (!wired && pte_test(&origpte, PTE_W))
2056			pmap->pm_stats.wired_count--;
2057
2058		KASSERT(!pte_test(&origpte, PTE_D | PTE_RO),
2059		    ("%s: modified page not writable: va: %p, pte: %#jx",
2060		    __func__, (void *)va, (uintmax_t)origpte));
2061
2062		/*
2063		 * Remove extra pte reference
2064		 */
2065		if (mpte)
2066			mpte->wire_count--;
2067
2068		if (pte_test(&origpte, PTE_MANAGED)) {
2069			m->md.pv_flags |= PV_TABLE_REF;
2070			om = m;
2071			newpte |= PTE_MANAGED;
2072			if (!pte_test(&newpte, PTE_RO))
2073				vm_page_aflag_set(m, PGA_WRITEABLE);
2074		}
2075		goto validate;
2076	}
2077
2078	pv = NULL;
2079
2080	/*
2081	 * Mapping has changed, invalidate old range and fall through to
2082	 * handle validating new mapping.
2083	 */
2084	if (opa) {
2085		if (pte_test(&origpte, PTE_W))
2086			pmap->pm_stats.wired_count--;
2087
2088		if (pte_test(&origpte, PTE_MANAGED)) {
2089			om = PHYS_TO_VM_PAGE(opa);
2090			pv = pmap_pvh_remove(&om->md, pmap, va);
2091		}
2092		if (mpte != NULL) {
2093			mpte->wire_count--;
2094			KASSERT(mpte->wire_count > 0,
2095			    ("pmap_enter: missing reference to page table page,"
2096			    " va: %p", (void *)va));
2097		}
2098	} else
2099		pmap->pm_stats.resident_count++;
2100
2101	/*
2102	 * Enter on the PV list if part of our managed memory.
2103	 */
2104	if ((m->oflags & VPO_UNMANAGED) == 0) {
2105		m->md.pv_flags |= PV_TABLE_REF;
2106		if (pv == NULL)
2107			pv = get_pv_entry(pmap, FALSE);
2108		pv->pv_va = va;
2109		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2110		newpte |= PTE_MANAGED;
2111		if (!pte_test(&newpte, PTE_RO))
2112			vm_page_aflag_set(m, PGA_WRITEABLE);
2113	} else if (pv != NULL)
2114		free_pv_entry(pmap, pv);
2115
2116	/*
2117	 * Increment counters
2118	 */
2119	if (wired)
2120		pmap->pm_stats.wired_count++;
2121
2122validate:
2123
2124#ifdef PMAP_DEBUG
2125	printf("pmap_enter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
2126#endif
2127
2128	/*
2129	 * if the mapping or permission bits are different, we need to
2130	 * update the pte.
2131	 */
2132	if (origpte != newpte) {
2133		*pte = newpte;
2134		if (pte_test(&origpte, PTE_V)) {
2135			if (pte_test(&origpte, PTE_MANAGED) && opa != pa) {
2136				if (om->md.pv_flags & PV_TABLE_REF)
2137					vm_page_aflag_set(om, PGA_REFERENCED);
2138				om->md.pv_flags &= ~PV_TABLE_REF;
2139			}
2140			if (pte_test(&origpte, PTE_D)) {
2141				KASSERT(!pte_test(&origpte, PTE_RO),
2142				    ("pmap_enter: modified page not writable:"
2143				    " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte));
2144				if (pte_test(&origpte, PTE_MANAGED))
2145					vm_page_dirty(om);
2146			}
2147			if (pte_test(&origpte, PTE_MANAGED) &&
2148			    TAILQ_EMPTY(&om->md.pv_list))
2149				vm_page_aflag_clear(om, PGA_WRITEABLE);
2150			pmap_update_page(pmap, va, newpte);
2151		}
2152	}
2153
2154	/*
2155	 * Sync I & D caches for executable pages.  Do this only if the
2156	 * target pmap belongs to the current process.  Otherwise, an
2157	 * unresolvable TLB miss may occur.
2158	 */
2159	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2160	    (prot & VM_PROT_EXECUTE)) {
2161		mips_icache_sync_range(va, PAGE_SIZE);
2162		mips_dcache_wbinv_range(va, PAGE_SIZE);
2163	}
2164	rw_wunlock(&pvh_global_lock);
2165	PMAP_UNLOCK(pmap);
2166}
2167
2168/*
2169 * this code makes some *MAJOR* assumptions:
2170 * 1. Current pmap & pmap exists.
2171 * 2. Not wired.
2172 * 3. Read access.
2173 * 4. No page table pages.
2174 * but is *MUCH* faster than pmap_enter...
2175 */
2176
2177void
2178pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2179{
2180
2181	rw_wlock(&pvh_global_lock);
2182	PMAP_LOCK(pmap);
2183	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2184	rw_wunlock(&pvh_global_lock);
2185	PMAP_UNLOCK(pmap);
2186}
2187
2188static vm_page_t
2189pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2190    vm_prot_t prot, vm_page_t mpte)
2191{
2192	pt_entry_t *pte;
2193	vm_paddr_t pa;
2194
2195	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2196	    (m->oflags & VPO_UNMANAGED) != 0,
2197	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2198	rw_assert(&pvh_global_lock, RA_WLOCKED);
2199	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2200
2201	/*
2202	 * In the case that a page table page is not resident, we are
2203	 * creating it here.
2204	 */
2205	if (va < VM_MAXUSER_ADDRESS) {
2206		pd_entry_t *pde;
2207		unsigned ptepindex;
2208
2209		/*
2210		 * Calculate pagetable page index
2211		 */
2212		ptepindex = pmap_pde_pindex(va);
2213		if (mpte && (mpte->pindex == ptepindex)) {
2214			mpte->wire_count++;
2215		} else {
2216			/*
2217			 * Get the page directory entry
2218			 */
2219			pde = pmap_pde(pmap, va);
2220
2221			/*
2222			 * If the page table page is mapped, we just
2223			 * increment the hold count, and activate it.
2224			 */
2225			if (pde && *pde != 0) {
2226				mpte = PHYS_TO_VM_PAGE(
2227				    MIPS_DIRECT_TO_PHYS(*pde));
2228				mpte->wire_count++;
2229			} else {
2230				mpte = _pmap_allocpte(pmap, ptepindex,
2231				    M_NOWAIT);
2232				if (mpte == NULL)
2233					return (mpte);
2234			}
2235		}
2236	} else {
2237		mpte = NULL;
2238	}
2239
2240	pte = pmap_pte(pmap, va);
2241	if (pte_test(pte, PTE_V)) {
2242		if (mpte != NULL) {
2243			mpte->wire_count--;
2244			mpte = NULL;
2245		}
2246		return (mpte);
2247	}
2248
2249	/*
2250	 * Enter on the PV list if part of our managed memory.
2251	 */
2252	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2253	    !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2254		if (mpte != NULL) {
2255			pmap_unwire_ptp(pmap, va, mpte);
2256			mpte = NULL;
2257		}
2258		return (mpte);
2259	}
2260
2261	/*
2262	 * Increment counters
2263	 */
2264	pmap->pm_stats.resident_count++;
2265
2266	pa = VM_PAGE_TO_PHYS(m);
2267
2268	/*
2269	 * Now validate mapping with RO protection
2270	 */
2271	*pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V;
2272	if ((m->oflags & VPO_UNMANAGED) == 0)
2273		*pte |= PTE_MANAGED;
2274
2275	if (is_cacheable_mem(pa))
2276		*pte |= PTE_C_CACHE;
2277	else
2278		*pte |= PTE_C_UNCACHED;
2279
2280	if (is_kernel_pmap(pmap))
2281		*pte |= PTE_G;
2282	else {
2283		/*
2284		 * Sync I & D caches.  Do this only if the target pmap
2285		 * belongs to the current process.  Otherwise, an
2286		 * unresolvable TLB miss may occur. */
2287		if (pmap == &curproc->p_vmspace->vm_pmap) {
2288			va &= ~PAGE_MASK;
2289			mips_icache_sync_range(va, PAGE_SIZE);
2290			mips_dcache_wbinv_range(va, PAGE_SIZE);
2291		}
2292	}
2293	return (mpte);
2294}
2295
2296/*
2297 * Make a temporary mapping for a physical address.  This is only intended
2298 * to be used for panic dumps.
2299 *
2300 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2301 */
2302void *
2303pmap_kenter_temporary(vm_paddr_t pa, int i)
2304{
2305	vm_offset_t va;
2306
2307	if (i != 0)
2308		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2309		    __func__);
2310
2311	if (MIPS_DIRECT_MAPPABLE(pa)) {
2312		va = MIPS_PHYS_TO_DIRECT(pa);
2313	} else {
2314#ifndef __mips_n64    /* XXX : to be converted to new style */
2315		int cpu;
2316		register_t intr;
2317		struct local_sysmaps *sysm;
2318		pt_entry_t *pte, npte;
2319
2320		/* If this is used other than for dumps, we may need to leave
2321		 * interrupts disasbled on return. If crash dumps don't work when
2322		 * we get to this point, we might want to consider this (leaving things
2323		 * disabled as a starting point ;-)
2324	 	 */
2325		intr = intr_disable();
2326		cpu = PCPU_GET(cpuid);
2327		sysm = &sysmap_lmem[cpu];
2328		/* Since this is for the debugger, no locks or any other fun */
2329		npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V |
2330		    PTE_G;
2331		pte = pmap_pte(kernel_pmap, sysm->base);
2332		*pte = npte;
2333		sysm->valid1 = 1;
2334		pmap_update_page(kernel_pmap, sysm->base, npte);
2335		va = sysm->base;
2336		intr_restore(intr);
2337#endif
2338	}
2339	return ((void *)va);
2340}
2341
2342void
2343pmap_kenter_temporary_free(vm_paddr_t pa)
2344{
2345#ifndef __mips_n64    /* XXX : to be converted to new style */
2346	int cpu;
2347	register_t intr;
2348	struct local_sysmaps *sysm;
2349#endif
2350
2351	if (MIPS_DIRECT_MAPPABLE(pa)) {
2352		/* nothing to do for this case */
2353		return;
2354	}
2355#ifndef __mips_n64    /* XXX : to be converted to new style */
2356	cpu = PCPU_GET(cpuid);
2357	sysm = &sysmap_lmem[cpu];
2358	if (sysm->valid1) {
2359		pt_entry_t *pte;
2360
2361		intr = intr_disable();
2362		pte = pmap_pte(kernel_pmap, sysm->base);
2363		*pte = PTE_G;
2364		pmap_invalidate_page(kernel_pmap, sysm->base);
2365		intr_restore(intr);
2366		sysm->valid1 = 0;
2367	}
2368#endif
2369}
2370
2371/*
2372 * Maps a sequence of resident pages belonging to the same object.
2373 * The sequence begins with the given page m_start.  This page is
2374 * mapped at the given virtual address start.  Each subsequent page is
2375 * mapped at a virtual address that is offset from start by the same
2376 * amount as the page is offset from m_start within the object.  The
2377 * last page in the sequence is the page with the largest offset from
2378 * m_start that can be mapped at a virtual address less than the given
2379 * virtual address end.  Not every virtual page between start and end
2380 * is mapped; only those for which a resident page exists with the
2381 * corresponding offset from m_start are mapped.
2382 */
2383void
2384pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2385    vm_page_t m_start, vm_prot_t prot)
2386{
2387	vm_page_t m, mpte;
2388	vm_pindex_t diff, psize;
2389
2390	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
2391	psize = atop(end - start);
2392	mpte = NULL;
2393	m = m_start;
2394	rw_wlock(&pvh_global_lock);
2395	PMAP_LOCK(pmap);
2396	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2397		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2398		    prot, mpte);
2399		m = TAILQ_NEXT(m, listq);
2400	}
2401	rw_wunlock(&pvh_global_lock);
2402 	PMAP_UNLOCK(pmap);
2403}
2404
2405/*
2406 * pmap_object_init_pt preloads the ptes for a given object
2407 * into the specified pmap.  This eliminates the blast of soft
2408 * faults on process startup and immediately after an mmap.
2409 */
2410void
2411pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2412    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2413{
2414	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2415	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2416	    ("pmap_object_init_pt: non-device object"));
2417}
2418
2419/*
2420 *	Routine:	pmap_change_wiring
2421 *	Function:	Change the wiring attribute for a map/virtual-address
2422 *			pair.
2423 *	In/out conditions:
2424 *			The mapping must already exist in the pmap.
2425 */
2426void
2427pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2428{
2429	pt_entry_t *pte;
2430
2431	PMAP_LOCK(pmap);
2432	pte = pmap_pte(pmap, va);
2433
2434	if (wired && !pte_test(pte, PTE_W))
2435		pmap->pm_stats.wired_count++;
2436	else if (!wired && pte_test(pte, PTE_W))
2437		pmap->pm_stats.wired_count--;
2438
2439	/*
2440	 * Wiring is not a hardware characteristic so there is no need to
2441	 * invalidate TLB.
2442	 */
2443	if (wired)
2444		pte_set(pte, PTE_W);
2445	else
2446		pte_clear(pte, PTE_W);
2447	PMAP_UNLOCK(pmap);
2448}
2449
2450/*
2451 *	Copy the range specified by src_addr/len
2452 *	from the source map to the range dst_addr/len
2453 *	in the destination map.
2454 *
2455 *	This routine is only advisory and need not do anything.
2456 */
2457
2458void
2459pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2460    vm_size_t len, vm_offset_t src_addr)
2461{
2462}
2463
2464/*
2465 *	pmap_zero_page zeros the specified hardware page by mapping
2466 *	the page into KVM and using bzero to clear its contents.
2467 *
2468 * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2469 */
2470void
2471pmap_zero_page(vm_page_t m)
2472{
2473	vm_offset_t va;
2474	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2475
2476	if (MIPS_DIRECT_MAPPABLE(phys)) {
2477		va = MIPS_PHYS_TO_DIRECT(phys);
2478		bzero((caddr_t)va, PAGE_SIZE);
2479		mips_dcache_wbinv_range(va, PAGE_SIZE);
2480	} else {
2481		va = pmap_lmem_map1(phys);
2482		bzero((caddr_t)va, PAGE_SIZE);
2483		mips_dcache_wbinv_range(va, PAGE_SIZE);
2484		pmap_lmem_unmap();
2485	}
2486}
2487
2488/*
2489 *	pmap_zero_page_area zeros the specified hardware page by mapping
2490 *	the page into KVM and using bzero to clear its contents.
2491 *
2492 *	off and size may not cover an area beyond a single hardware page.
2493 */
2494void
2495pmap_zero_page_area(vm_page_t m, int off, int size)
2496{
2497	vm_offset_t va;
2498	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2499
2500	if (MIPS_DIRECT_MAPPABLE(phys)) {
2501		va = MIPS_PHYS_TO_DIRECT(phys);
2502		bzero((char *)(caddr_t)va + off, size);
2503		mips_dcache_wbinv_range(va + off, size);
2504	} else {
2505		va = pmap_lmem_map1(phys);
2506		bzero((char *)va + off, size);
2507		mips_dcache_wbinv_range(va + off, size);
2508		pmap_lmem_unmap();
2509	}
2510}
2511
2512void
2513pmap_zero_page_idle(vm_page_t m)
2514{
2515	vm_offset_t va;
2516	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2517
2518	if (MIPS_DIRECT_MAPPABLE(phys)) {
2519		va = MIPS_PHYS_TO_DIRECT(phys);
2520		bzero((caddr_t)va, PAGE_SIZE);
2521		mips_dcache_wbinv_range(va, PAGE_SIZE);
2522	} else {
2523		va = pmap_lmem_map1(phys);
2524		bzero((caddr_t)va, PAGE_SIZE);
2525		mips_dcache_wbinv_range(va, PAGE_SIZE);
2526		pmap_lmem_unmap();
2527	}
2528}
2529
2530/*
2531 *	pmap_copy_page copies the specified (machine independent)
2532 *	page by mapping the page into virtual memory and using
2533 *	bcopy to copy the page, one machine dependent page at a
2534 *	time.
2535 *
2536 * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2537 */
2538void
2539pmap_copy_page(vm_page_t src, vm_page_t dst)
2540{
2541	vm_offset_t va_src, va_dst;
2542	vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2543	vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2544
2545	if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2546		/* easy case, all can be accessed via KSEG0 */
2547		/*
2548		 * Flush all caches for VA that are mapped to this page
2549		 * to make sure that data in SDRAM is up to date
2550		 */
2551		pmap_flush_pvcache(src);
2552		mips_dcache_wbinv_range_index(
2553		    MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2554		va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2555		va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2556		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2557		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2558	} else {
2559		va_src = pmap_lmem_map2(phys_src, phys_dst);
2560		va_dst = va_src + PAGE_SIZE;
2561		bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2562		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2563		pmap_lmem_unmap();
2564	}
2565}
2566
2567/*
2568 * Returns true if the pmap's pv is one of the first
2569 * 16 pvs linked to from this page.  This count may
2570 * be changed upwards or downwards in the future; it
2571 * is only necessary that true be returned for a small
2572 * subset of pmaps for proper page aging.
2573 */
2574boolean_t
2575pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2576{
2577	pv_entry_t pv;
2578	int loops = 0;
2579	boolean_t rv;
2580
2581	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2582	    ("pmap_page_exists_quick: page %p is not managed", m));
2583	rv = FALSE;
2584	rw_wlock(&pvh_global_lock);
2585	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2586		if (PV_PMAP(pv) == pmap) {
2587			rv = TRUE;
2588			break;
2589		}
2590		loops++;
2591		if (loops >= 16)
2592			break;
2593	}
2594	rw_wunlock(&pvh_global_lock);
2595	return (rv);
2596}
2597
2598/*
2599 * Remove all pages from specified address space
2600 * this aids process exit speeds.  Also, this code
2601 * is special cased for current process only, but
2602 * can have the more generic (and slightly slower)
2603 * mode enabled.  This is much faster than pmap_remove
2604 * in the case of running down an entire address space.
2605 */
2606void
2607pmap_remove_pages(pmap_t pmap)
2608{
2609	pd_entry_t *pde;
2610	pt_entry_t *pte, tpte;
2611	pv_entry_t pv;
2612	vm_page_t m;
2613	struct pv_chunk *pc, *npc;
2614	u_long inuse, bitmask;
2615	int allfree, bit, field, idx;
2616
2617	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2618		printf("warning: pmap_remove_pages called with non-current pmap\n");
2619		return;
2620	}
2621	rw_wlock(&pvh_global_lock);
2622	PMAP_LOCK(pmap);
2623	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2624		allfree = 1;
2625		for (field = 0; field < _NPCM; field++) {
2626			inuse = ~pc->pc_map[field] & pc_freemask[field];
2627			while (inuse != 0) {
2628				bit = ffsl(inuse) - 1;
2629				bitmask = 1UL << bit;
2630				idx = field * sizeof(inuse) * NBBY + bit;
2631				pv = &pc->pc_pventry[idx];
2632				inuse &= ~bitmask;
2633
2634				pde = pmap_pde(pmap, pv->pv_va);
2635				KASSERT(pde != NULL && *pde != 0,
2636				    ("pmap_remove_pages: pde"));
2637				pte = pmap_pde_to_pte(pde, pv->pv_va);
2638				if (!pte_test(pte, PTE_V))
2639					panic("pmap_remove_pages: bad pte");
2640				tpte = *pte;
2641
2642/*
2643 * We cannot remove wired pages from a process' mapping at this time
2644 */
2645				if (pte_test(&tpte, PTE_W)) {
2646					allfree = 0;
2647					continue;
2648				}
2649				*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2650
2651				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2652				KASSERT(m != NULL,
2653				    ("pmap_remove_pages: bad tpte %#jx",
2654				    (uintmax_t)tpte));
2655
2656				/*
2657				 * Update the vm_page_t clean and reference bits.
2658				 */
2659				if (pte_test(&tpte, PTE_D))
2660					vm_page_dirty(m);
2661
2662				/* Mark free */
2663				PV_STAT(pv_entry_frees++);
2664				PV_STAT(pv_entry_spare++);
2665				pv_entry_count--;
2666				pc->pc_map[field] |= bitmask;
2667				pmap->pm_stats.resident_count--;
2668				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2669				if (TAILQ_EMPTY(&m->md.pv_list))
2670					vm_page_aflag_clear(m, PGA_WRITEABLE);
2671				pmap_unuse_pt(pmap, pv->pv_va, *pde);
2672			}
2673		}
2674		if (allfree) {
2675			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2676			free_pv_chunk(pc);
2677		}
2678	}
2679	pmap_invalidate_all(pmap);
2680	PMAP_UNLOCK(pmap);
2681	rw_wunlock(&pvh_global_lock);
2682}
2683
2684/*
2685 * pmap_testbit tests bits in pte's
2686 */
2687static boolean_t
2688pmap_testbit(vm_page_t m, int bit)
2689{
2690	pv_entry_t pv;
2691	pmap_t pmap;
2692	pt_entry_t *pte;
2693	boolean_t rv = FALSE;
2694
2695	if (m->oflags & VPO_UNMANAGED)
2696		return (rv);
2697
2698	rw_assert(&pvh_global_lock, RA_WLOCKED);
2699	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2700		pmap = PV_PMAP(pv);
2701		PMAP_LOCK(pmap);
2702		pte = pmap_pte(pmap, pv->pv_va);
2703		rv = pte_test(pte, bit);
2704		PMAP_UNLOCK(pmap);
2705		if (rv)
2706			break;
2707	}
2708	return (rv);
2709}
2710
2711/*
2712 *	pmap_page_wired_mappings:
2713 *
2714 *	Return the number of managed mappings to the given physical page
2715 *	that are wired.
2716 */
2717int
2718pmap_page_wired_mappings(vm_page_t m)
2719{
2720	pv_entry_t pv;
2721	pmap_t pmap;
2722	pt_entry_t *pte;
2723	int count;
2724
2725	count = 0;
2726	if ((m->oflags & VPO_UNMANAGED) != 0)
2727		return (count);
2728	rw_wlock(&pvh_global_lock);
2729	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2730		pmap = PV_PMAP(pv);
2731		PMAP_LOCK(pmap);
2732		pte = pmap_pte(pmap, pv->pv_va);
2733		if (pte_test(pte, PTE_W))
2734			count++;
2735		PMAP_UNLOCK(pmap);
2736	}
2737	rw_wunlock(&pvh_global_lock);
2738	return (count);
2739}
2740
2741/*
2742 * Clear the write and modified bits in each of the given page's mappings.
2743 */
2744void
2745pmap_remove_write(vm_page_t m)
2746{
2747	pmap_t pmap;
2748	pt_entry_t pbits, *pte;
2749	pv_entry_t pv;
2750
2751	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2752	    ("pmap_remove_write: page %p is not managed", m));
2753
2754	/*
2755	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
2756	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
2757	 * is clear, no page table entries need updating.
2758	 */
2759	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2760	if ((m->oflags & VPO_BUSY) == 0 &&
2761	    (m->aflags & PGA_WRITEABLE) == 0)
2762		return;
2763	rw_wlock(&pvh_global_lock);
2764	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2765		pmap = PV_PMAP(pv);
2766		PMAP_LOCK(pmap);
2767		pte = pmap_pte(pmap, pv->pv_va);
2768		KASSERT(pte != NULL && pte_test(pte, PTE_V),
2769		    ("page on pv_list has no pte"));
2770		pbits = *pte;
2771		if (pte_test(&pbits, PTE_D)) {
2772			pte_clear(&pbits, PTE_D);
2773			vm_page_dirty(m);
2774		}
2775		pte_set(&pbits, PTE_RO);
2776		if (pbits != *pte) {
2777			*pte = pbits;
2778			pmap_update_page(pmap, pv->pv_va, pbits);
2779		}
2780		PMAP_UNLOCK(pmap);
2781	}
2782	vm_page_aflag_clear(m, PGA_WRITEABLE);
2783	rw_wunlock(&pvh_global_lock);
2784}
2785
2786/*
2787 *	pmap_ts_referenced:
2788 *
2789 *	Return the count of reference bits for a page, clearing all of them.
2790 */
2791int
2792pmap_ts_referenced(vm_page_t m)
2793{
2794
2795	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2796	    ("pmap_ts_referenced: page %p is not managed", m));
2797	if (m->md.pv_flags & PV_TABLE_REF) {
2798		rw_wlock(&pvh_global_lock);
2799		m->md.pv_flags &= ~PV_TABLE_REF;
2800		rw_wunlock(&pvh_global_lock);
2801		return (1);
2802	}
2803	return (0);
2804}
2805
2806/*
2807 *	pmap_is_modified:
2808 *
2809 *	Return whether or not the specified physical page was modified
2810 *	in any physical maps.
2811 */
2812boolean_t
2813pmap_is_modified(vm_page_t m)
2814{
2815	boolean_t rv;
2816
2817	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2818	    ("pmap_is_modified: page %p is not managed", m));
2819
2820	/*
2821	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
2822	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2823	 * is clear, no PTEs can have PTE_D set.
2824	 */
2825	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2826	if ((m->oflags & VPO_BUSY) == 0 &&
2827	    (m->aflags & PGA_WRITEABLE) == 0)
2828		return (FALSE);
2829	rw_wlock(&pvh_global_lock);
2830	rv = pmap_testbit(m, PTE_D);
2831	rw_wunlock(&pvh_global_lock);
2832	return (rv);
2833}
2834
2835/* N/C */
2836
2837/*
2838 *	pmap_is_prefaultable:
2839 *
2840 *	Return whether or not the specified virtual address is elgible
2841 *	for prefault.
2842 */
2843boolean_t
2844pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2845{
2846	pd_entry_t *pde;
2847	pt_entry_t *pte;
2848	boolean_t rv;
2849
2850	rv = FALSE;
2851	PMAP_LOCK(pmap);
2852	pde = pmap_pde(pmap, addr);
2853	if (pde != NULL && *pde != 0) {
2854		pte = pmap_pde_to_pte(pde, addr);
2855		rv = (*pte == 0);
2856	}
2857	PMAP_UNLOCK(pmap);
2858	return (rv);
2859}
2860
2861/*
2862 *	Clear the modify bits on the specified physical page.
2863 */
2864void
2865pmap_clear_modify(vm_page_t m)
2866{
2867	pmap_t pmap;
2868	pt_entry_t *pte;
2869	pv_entry_t pv;
2870
2871	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2872	    ("pmap_clear_modify: page %p is not managed", m));
2873	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2874	KASSERT((m->oflags & VPO_BUSY) == 0,
2875	    ("pmap_clear_modify: page %p is busy", m));
2876
2877	/*
2878	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
2879	 * If the object containing the page is locked and the page is not
2880	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
2881	 */
2882	if ((m->aflags & PGA_WRITEABLE) == 0)
2883		return;
2884	rw_wlock(&pvh_global_lock);
2885	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2886		pmap = PV_PMAP(pv);
2887		PMAP_LOCK(pmap);
2888		pte = pmap_pte(pmap, pv->pv_va);
2889		if (pte_test(pte, PTE_D)) {
2890			pte_clear(pte, PTE_D);
2891			pmap_update_page(pmap, pv->pv_va, *pte);
2892		}
2893		PMAP_UNLOCK(pmap);
2894	}
2895	rw_wunlock(&pvh_global_lock);
2896}
2897
2898/*
2899 *	pmap_is_referenced:
2900 *
2901 *	Return whether or not the specified physical page was referenced
2902 *	in any physical maps.
2903 */
2904boolean_t
2905pmap_is_referenced(vm_page_t m)
2906{
2907
2908	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2909	    ("pmap_is_referenced: page %p is not managed", m));
2910	return ((m->md.pv_flags & PV_TABLE_REF) != 0);
2911}
2912
2913/*
2914 *	pmap_clear_reference:
2915 *
2916 *	Clear the reference bit on the specified physical page.
2917 */
2918void
2919pmap_clear_reference(vm_page_t m)
2920{
2921
2922	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2923	    ("pmap_clear_reference: page %p is not managed", m));
2924	rw_wlock(&pvh_global_lock);
2925	if (m->md.pv_flags & PV_TABLE_REF) {
2926		m->md.pv_flags &= ~PV_TABLE_REF;
2927	}
2928	rw_wunlock(&pvh_global_lock);
2929}
2930
2931/*
2932 * Miscellaneous support routines follow
2933 */
2934
2935/*
2936 * Map a set of physical memory pages into the kernel virtual
2937 * address space. Return a pointer to where it is mapped. This
2938 * routine is intended to be used for mapping device memory,
2939 * NOT real memory.
2940 *
2941 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
2942 */
2943void *
2944pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2945{
2946        vm_offset_t va, tmpva, offset;
2947
2948	/*
2949	 * KSEG1 maps only first 512M of phys address space. For
2950	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2951	 */
2952	if (MIPS_DIRECT_MAPPABLE(pa + size - 1))
2953		return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
2954	else {
2955		offset = pa & PAGE_MASK;
2956		size = roundup(size + offset, PAGE_SIZE);
2957
2958		va = kmem_alloc_nofault(kernel_map, size);
2959		if (!va)
2960			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2961		pa = trunc_page(pa);
2962		for (tmpva = va; size > 0;) {
2963			pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED);
2964			size -= PAGE_SIZE;
2965			tmpva += PAGE_SIZE;
2966			pa += PAGE_SIZE;
2967		}
2968	}
2969
2970	return ((void *)(va + offset));
2971}
2972
2973void
2974pmap_unmapdev(vm_offset_t va, vm_size_t size)
2975{
2976#ifndef __mips_n64
2977	vm_offset_t base, offset;
2978
2979	/* If the address is within KSEG1 then there is nothing to do */
2980	if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
2981		return;
2982
2983	base = trunc_page(va);
2984	offset = va & PAGE_MASK;
2985	size = roundup(size + offset, PAGE_SIZE);
2986	kmem_free(kernel_map, base, size);
2987#endif
2988}
2989
2990/*
2991 * perform the pmap work for mincore
2992 */
2993int
2994pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2995{
2996	pt_entry_t *ptep, pte;
2997	vm_paddr_t pa;
2998	vm_page_t m;
2999	int val;
3000
3001	PMAP_LOCK(pmap);
3002retry:
3003	ptep = pmap_pte(pmap, addr);
3004	pte = (ptep != NULL) ? *ptep : 0;
3005	if (!pte_test(&pte, PTE_V)) {
3006		val = 0;
3007		goto out;
3008	}
3009	val = MINCORE_INCORE;
3010	if (pte_test(&pte, PTE_D))
3011		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3012	pa = TLBLO_PTE_TO_PA(pte);
3013	if (pte_test(&pte, PTE_MANAGED)) {
3014		/*
3015		 * This may falsely report the given address as
3016		 * MINCORE_REFERENCED.  Unfortunately, due to the lack of
3017		 * per-PTE reference information, it is impossible to
3018		 * determine if the address is MINCORE_REFERENCED.
3019		 */
3020		m = PHYS_TO_VM_PAGE(pa);
3021		if ((m->aflags & PGA_REFERENCED) != 0)
3022			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3023	}
3024	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3025	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
3026	    pte_test(&pte, PTE_MANAGED)) {
3027		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3028		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3029			goto retry;
3030	} else
3031out:
3032		PA_UNLOCK_COND(*locked_pa);
3033	PMAP_UNLOCK(pmap);
3034	return (val);
3035}
3036
3037void
3038pmap_activate(struct thread *td)
3039{
3040	pmap_t pmap, oldpmap;
3041	struct proc *p = td->td_proc;
3042	u_int cpuid;
3043
3044	critical_enter();
3045
3046	pmap = vmspace_pmap(p->p_vmspace);
3047	oldpmap = PCPU_GET(curpmap);
3048	cpuid = PCPU_GET(cpuid);
3049
3050	if (oldpmap)
3051		CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
3052	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
3053	pmap_asid_alloc(pmap);
3054	if (td == curthread) {
3055		PCPU_SET(segbase, pmap->pm_segtab);
3056		mips_wr_entryhi(pmap->pm_asid[cpuid].asid);
3057	}
3058
3059	PCPU_SET(curpmap, pmap);
3060	critical_exit();
3061}
3062
3063void
3064pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3065{
3066}
3067
3068/*
3069 *	Increase the starting virtual address of the given mapping if a
3070 *	different alignment might result in more superpage mappings.
3071 */
3072void
3073pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3074    vm_offset_t *addr, vm_size_t size)
3075{
3076	vm_offset_t superpage_offset;
3077
3078	if (size < NBSEG)
3079		return;
3080	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3081		offset += ptoa(object->pg_color);
3082	superpage_offset = offset & SEGMASK;
3083	if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG ||
3084	    (*addr & SEGMASK) == superpage_offset)
3085		return;
3086	if ((*addr & SEGMASK) < superpage_offset)
3087		*addr = (*addr & ~SEGMASK) + superpage_offset;
3088	else
3089		*addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset;
3090}
3091
3092/*
3093 * 	Increase the starting virtual address of the given mapping so
3094 * 	that it is aligned to not be the second page in a TLB entry.
3095 * 	This routine assumes that the length is appropriately-sized so
3096 * 	that the allocation does not share a TLB entry at all if required.
3097 */
3098void
3099pmap_align_tlb(vm_offset_t *addr)
3100{
3101	if ((*addr & PAGE_SIZE) == 0)
3102		return;
3103	*addr += PAGE_SIZE;
3104	return;
3105}
3106
3107#ifdef DDB
3108DB_SHOW_COMMAND(ptable, ddb_pid_dump)
3109{
3110	pmap_t pmap;
3111	struct thread *td = NULL;
3112	struct proc *p;
3113	int i, j, k;
3114	vm_paddr_t pa;
3115	vm_offset_t va;
3116
3117	if (have_addr) {
3118		td = db_lookup_thread(addr, TRUE);
3119		if (td == NULL) {
3120			db_printf("Invalid pid or tid");
3121			return;
3122		}
3123		p = td->td_proc;
3124		if (p->p_vmspace == NULL) {
3125			db_printf("No vmspace for process");
3126			return;
3127		}
3128			pmap = vmspace_pmap(p->p_vmspace);
3129	} else
3130		pmap = kernel_pmap;
3131
3132	db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
3133	    pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
3134	    pmap->pm_asid[0].gen);
3135	for (i = 0; i < NPDEPG; i++) {
3136		pd_entry_t *pdpe;
3137		pt_entry_t *pde;
3138		pt_entry_t pte;
3139
3140		pdpe = (pd_entry_t *)pmap->pm_segtab[i];
3141		if (pdpe == NULL)
3142			continue;
3143		db_printf("[%4d] %p\n", i, pdpe);
3144#ifdef __mips_n64
3145		for (j = 0; j < NPDEPG; j++) {
3146			pde = (pt_entry_t *)pdpe[j];
3147			if (pde == NULL)
3148				continue;
3149			db_printf("\t[%4d] %p\n", j, pde);
3150#else
3151		{
3152			j = 0;
3153			pde =  (pt_entry_t *)pdpe;
3154#endif
3155			for (k = 0; k < NPTEPG; k++) {
3156				pte = pde[k];
3157				if (pte == 0 || !pte_test(&pte, PTE_V))
3158					continue;
3159				pa = TLBLO_PTE_TO_PA(pte);
3160				va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3161				db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n",
3162				       k, (void *)va, (uintmax_t)pte, (uintmax_t)pa);
3163			}
3164		}
3165	}
3166}
3167#endif
3168
3169#if defined(DEBUG)
3170
3171static void pads(pmap_t pm);
3172void pmap_pvdump(vm_offset_t pa);
3173
3174/* print address space of pmap*/
3175static void
3176pads(pmap_t pm)
3177{
3178	unsigned va, i, j;
3179	pt_entry_t *ptep;
3180
3181	if (pm == kernel_pmap)
3182		return;
3183	for (i = 0; i < NPTEPG; i++)
3184		if (pm->pm_segtab[i])
3185			for (j = 0; j < NPTEPG; j++) {
3186				va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
3187				if (pm == kernel_pmap && va < KERNBASE)
3188					continue;
3189				if (pm != kernel_pmap &&
3190				    va >= VM_MAXUSER_ADDRESS)
3191					continue;
3192				ptep = pmap_pte(pm, va);
3193				if (pte_test(ptep, PTE_V))
3194					printf("%x:%x ", va, *(int *)ptep);
3195			}
3196
3197}
3198
3199void
3200pmap_pvdump(vm_offset_t pa)
3201{
3202	register pv_entry_t pv;
3203	vm_page_t m;
3204
3205	printf("pa %x", pa);
3206	m = PHYS_TO_VM_PAGE(pa);
3207	for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3208	    pv = TAILQ_NEXT(pv, pv_list)) {
3209		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3210		pads(pv->pv_pmap);
3211	}
3212	printf(" ");
3213}
3214
3215/* N/C */
3216#endif
3217
3218
3219/*
3220 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3221 * It takes almost as much or more time to search the TLB for a
3222 * specific ASID and flush those entries as it does to flush the entire TLB.
3223 * Therefore, when we allocate a new ASID, we just take the next number. When
3224 * we run out of numbers, we flush the TLB, increment the generation count
3225 * and start over. ASID zero is reserved for kernel use.
3226 */
3227static void
3228pmap_asid_alloc(pmap)
3229	pmap_t pmap;
3230{
3231	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3232	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3233	else {
3234		if (PCPU_GET(next_asid) == pmap_max_asid) {
3235			tlb_invalidate_all_user(NULL);
3236			PCPU_SET(asid_generation,
3237			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3238			if (PCPU_GET(asid_generation) == 0) {
3239				PCPU_SET(asid_generation, 1);
3240			}
3241			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3242		}
3243		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3244		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3245		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3246	}
3247}
3248
3249static pt_entry_t
3250init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot)
3251{
3252	pt_entry_t rw;
3253
3254	if (!(prot & VM_PROT_WRITE))
3255		rw = PTE_V | PTE_RO;
3256	else if ((m->oflags & VPO_UNMANAGED) == 0) {
3257		if ((access & VM_PROT_WRITE) != 0)
3258			rw = PTE_V | PTE_D;
3259		else
3260			rw = PTE_V;
3261	} else
3262		/* Needn't emulate a modified bit for unmanaged pages. */
3263		rw = PTE_V | PTE_D;
3264	return (rw);
3265}
3266
3267/*
3268 * pmap_emulate_modified : do dirty bit emulation
3269 *
3270 * On SMP, update just the local TLB, other CPUs will update their
3271 * TLBs from PTE lazily, if they get the exception.
3272 * Returns 0 in case of sucess, 1 if the page is read only and we
3273 * need to fault.
3274 */
3275int
3276pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3277{
3278	pt_entry_t *pte;
3279
3280	PMAP_LOCK(pmap);
3281	pte = pmap_pte(pmap, va);
3282	if (pte == NULL)
3283		panic("pmap_emulate_modified: can't find PTE");
3284#ifdef SMP
3285	/* It is possible that some other CPU changed m-bit */
3286	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3287		tlb_update(pmap, va, *pte);
3288		PMAP_UNLOCK(pmap);
3289		return (0);
3290	}
3291#else
3292	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3293		panic("pmap_emulate_modified: invalid pte");
3294#endif
3295	if (pte_test(pte, PTE_RO)) {
3296		PMAP_UNLOCK(pmap);
3297		return (1);
3298	}
3299	pte_set(pte, PTE_D);
3300	tlb_update(pmap, va, *pte);
3301	if (!pte_test(pte, PTE_MANAGED))
3302		panic("pmap_emulate_modified: unmanaged page");
3303	PMAP_UNLOCK(pmap);
3304	return (0);
3305}
3306
3307/*
3308 *	Routine:	pmap_kextract
3309 *	Function:
3310 *		Extract the physical page address associated
3311 *		virtual address.
3312 */
3313vm_paddr_t
3314pmap_kextract(vm_offset_t va)
3315{
3316	int mapped;
3317
3318	/*
3319	 * First, the direct-mapped regions.
3320	 */
3321#if defined(__mips_n64)
3322	if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3323		return (MIPS_XKPHYS_TO_PHYS(va));
3324#endif
3325	if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3326		return (MIPS_KSEG0_TO_PHYS(va));
3327
3328	if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3329		return (MIPS_KSEG1_TO_PHYS(va));
3330
3331	/*
3332	 * User virtual addresses.
3333	 */
3334	if (va < VM_MAXUSER_ADDRESS) {
3335		pt_entry_t *ptep;
3336
3337		if (curproc && curproc->p_vmspace) {
3338			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3339			if (ptep) {
3340				return (TLBLO_PTE_TO_PA(*ptep) |
3341				    (va & PAGE_MASK));
3342			}
3343			return (0);
3344		}
3345	}
3346
3347	/*
3348	 * Should be kernel virtual here, otherwise fail
3349	 */
3350	mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3351#if defined(__mips_n64)
3352	mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3353#endif
3354	/*
3355	 * Kernel virtual.
3356	 */
3357
3358	if (mapped) {
3359		pt_entry_t *ptep;
3360
3361		/* Is the kernel pmap initialized? */
3362		if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
3363			/* It's inside the virtual address range */
3364			ptep = pmap_pte(kernel_pmap, va);
3365			if (ptep) {
3366				return (TLBLO_PTE_TO_PA(*ptep) |
3367				    (va & PAGE_MASK));
3368			}
3369		}
3370		return (0);
3371	}
3372
3373	panic("%s for unknown address space %p.", __func__, (void *)va);
3374}
3375
3376
3377void
3378pmap_flush_pvcache(vm_page_t m)
3379{
3380	pv_entry_t pv;
3381
3382	if (m != NULL) {
3383		for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3384		    pv = TAILQ_NEXT(pv, pv_list)) {
3385			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3386		}
3387	}
3388}
3389