pmap.c revision 242534
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
38 *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42/*
43 *	Manages physical address maps.
44 *
45 *	Since the information managed by this module is
46 *	also stored by the logical address mapping module,
47 *	this module may throw away valid virtual-to-physical
48 *	mappings at almost any time.  However, invalidations
49 *	of virtual-to-physical mappings must be done as
50 *	requested.
51 *
52 *	In order to cope with hardware architectures which
53 *	make virtual-to-physical map invalidates expensive,
54 *	this module may delay invalidate or reduced protection
55 *	operations until such time as they are actually
56 *	necessary.  This module is given full information as
57 *	to which processors are currently using which maps,
58 *	and to when physical maps must be made correct.
59 */
60
61#include <sys/cdefs.h>
62__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 242534 2012-11-03 23:03:14Z attilio $");
63
64#include "opt_ddb.h"
65#include "opt_pmap.h"
66
67#include <sys/param.h>
68#include <sys/systm.h>
69#include <sys/lock.h>
70#include <sys/mman.h>
71#include <sys/msgbuf.h>
72#include <sys/mutex.h>
73#include <sys/pcpu.h>
74#include <sys/proc.h>
75#include <sys/rwlock.h>
76#include <sys/sched.h>
77#ifdef SMP
78#include <sys/smp.h>
79#else
80#include <sys/cpuset.h>
81#endif
82#include <sys/sysctl.h>
83#include <sys/vmmeter.h>
84
85#ifdef DDB
86#include <ddb/ddb.h>
87#endif
88
89#include <vm/vm.h>
90#include <vm/vm_param.h>
91#include <vm/vm_kern.h>
92#include <vm/vm_page.h>
93#include <vm/vm_map.h>
94#include <vm/vm_object.h>
95#include <vm/vm_extern.h>
96#include <vm/vm_pageout.h>
97#include <vm/vm_pager.h>
98#include <vm/uma.h>
99
100#include <machine/cache.h>
101#include <machine/md_var.h>
102#include <machine/tlb.h>
103
104#undef PMAP_DEBUG
105
106#if !defined(DIAGNOSTIC)
107#define	PMAP_INLINE __inline
108#else
109#define	PMAP_INLINE
110#endif
111
112#ifdef PV_STATS
113#define PV_STAT(x)	do { x ; } while (0)
114#else
115#define PV_STAT(x)	do { } while (0)
116#endif
117
118/*
119 * Get PDEs and PTEs for user/kernel address space
120 */
121#define	pmap_seg_index(v)	(((v) >> SEGSHIFT) & (NPDEPG - 1))
122#define	pmap_pde_index(v)	(((v) >> PDRSHIFT) & (NPDEPG - 1))
123#define	pmap_pte_index(v)	(((v) >> PAGE_SHIFT) & (NPTEPG - 1))
124#define	pmap_pde_pindex(v)	((v) >> PDRSHIFT)
125
126#ifdef __mips_n64
127#define	NUPDE			(NPDEPG * NPDEPG)
128#define	NUSERPGTBLS		(NUPDE + NPDEPG)
129#else
130#define	NUPDE			(NPDEPG)
131#define	NUSERPGTBLS		(NUPDE)
132#endif
133
134#define	is_kernel_pmap(x)	((x) == kernel_pmap)
135
136struct pmap kernel_pmap_store;
137pd_entry_t *kernel_segmap;
138
139vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
140vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
141
142static int nkpt;
143unsigned pmap_max_asid;		/* max ASID supported by the system */
144
145#define	PMAP_ASID_RESERVED	0
146
147vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
148
149static void pmap_asid_alloc(pmap_t pmap);
150
151static struct rwlock_padalign pvh_global_lock;
152
153/*
154 * Data for the pv entry allocation mechanism
155 */
156static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
157static int pv_entry_count;
158
159static void free_pv_chunk(struct pv_chunk *pc);
160static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
161static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
162static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
163static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
164static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
165    vm_offset_t va);
166static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
167    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
168static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
169    pd_entry_t pde);
170static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
171static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
172static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
173    vm_offset_t va, vm_page_t m);
174static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
175static void pmap_invalidate_all(pmap_t pmap);
176static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
177static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m);
178
179static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
180static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
181static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
182static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot);
183
184static void pmap_invalidate_page_action(void *arg);
185static void pmap_invalidate_range_action(void *arg);
186static void pmap_update_page_action(void *arg);
187
188#ifndef __mips_n64
189/*
190 * This structure is for high memory (memory above 512Meg in 32 bit) support.
191 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to
192 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc.
193 *
194 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To
195 * access a highmem physical address on a CPU, we map the physical address to
196 * the reserved virtual address for the CPU in the kernel pagetable.  This is
197 * done with interrupts disabled(although a spinlock and sched_pin would be
198 * sufficient).
199 */
200struct local_sysmaps {
201	vm_offset_t	base;
202	uint32_t	saved_intr;
203	uint16_t	valid1, valid2;
204};
205static struct local_sysmaps sysmap_lmem[MAXCPU];
206
207static __inline void
208pmap_alloc_lmem_map(void)
209{
210	int i;
211
212	for (i = 0; i < MAXCPU; i++) {
213		sysmap_lmem[i].base = virtual_avail;
214		virtual_avail += PAGE_SIZE * 2;
215		sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
216	}
217}
218
219static __inline vm_offset_t
220pmap_lmem_map1(vm_paddr_t phys)
221{
222	struct local_sysmaps *sysm;
223	pt_entry_t *pte, npte;
224	vm_offset_t va;
225	uint32_t intr;
226	int cpu;
227
228	intr = intr_disable();
229	cpu = PCPU_GET(cpuid);
230	sysm = &sysmap_lmem[cpu];
231	sysm->saved_intr = intr;
232	va = sysm->base;
233	npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
234	pte = pmap_pte(kernel_pmap, va);
235	*pte = npte;
236	sysm->valid1 = 1;
237	return (va);
238}
239
240static __inline vm_offset_t
241pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
242{
243	struct local_sysmaps *sysm;
244	pt_entry_t *pte, npte;
245	vm_offset_t va1, va2;
246	uint32_t intr;
247	int cpu;
248
249	intr = intr_disable();
250	cpu = PCPU_GET(cpuid);
251	sysm = &sysmap_lmem[cpu];
252	sysm->saved_intr = intr;
253	va1 = sysm->base;
254	va2 = sysm->base + PAGE_SIZE;
255	npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
256	pte = pmap_pte(kernel_pmap, va1);
257	*pte = npte;
258	npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
259	pte = pmap_pte(kernel_pmap, va2);
260	*pte = npte;
261	sysm->valid1 = 1;
262	sysm->valid2 = 1;
263	return (va1);
264}
265
266static __inline void
267pmap_lmem_unmap(void)
268{
269	struct local_sysmaps *sysm;
270	pt_entry_t *pte;
271	int cpu;
272
273	cpu = PCPU_GET(cpuid);
274	sysm = &sysmap_lmem[cpu];
275	pte = pmap_pte(kernel_pmap, sysm->base);
276	*pte = PTE_G;
277	tlb_invalidate_address(kernel_pmap, sysm->base);
278	sysm->valid1 = 0;
279	if (sysm->valid2) {
280		pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
281		*pte = PTE_G;
282		tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
283		sysm->valid2 = 0;
284	}
285	intr_restore(sysm->saved_intr);
286}
287#else  /* __mips_n64 */
288
289static __inline void
290pmap_alloc_lmem_map(void)
291{
292}
293
294static __inline vm_offset_t
295pmap_lmem_map1(vm_paddr_t phys)
296{
297
298	return (0);
299}
300
301static __inline vm_offset_t
302pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
303{
304
305	return (0);
306}
307
308static __inline vm_offset_t
309pmap_lmem_unmap(void)
310{
311
312	return (0);
313}
314#endif /* !__mips_n64 */
315
316/*
317 * Page table entry lookup routines.
318 */
319static __inline pd_entry_t *
320pmap_segmap(pmap_t pmap, vm_offset_t va)
321{
322
323	return (&pmap->pm_segtab[pmap_seg_index(va)]);
324}
325
326#ifdef __mips_n64
327static __inline pd_entry_t *
328pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
329{
330	pd_entry_t *pde;
331
332	pde = (pd_entry_t *)*pdpe;
333	return (&pde[pmap_pde_index(va)]);
334}
335
336static __inline pd_entry_t *
337pmap_pde(pmap_t pmap, vm_offset_t va)
338{
339	pd_entry_t *pdpe;
340
341	pdpe = pmap_segmap(pmap, va);
342	if (*pdpe == NULL)
343		return (NULL);
344
345	return (pmap_pdpe_to_pde(pdpe, va));
346}
347#else
348static __inline pd_entry_t *
349pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
350{
351
352	return (pdpe);
353}
354
355static __inline
356pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
357{
358
359	return (pmap_segmap(pmap, va));
360}
361#endif
362
363static __inline pt_entry_t *
364pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
365{
366	pt_entry_t *pte;
367
368	pte = (pt_entry_t *)*pde;
369	return (&pte[pmap_pte_index(va)]);
370}
371
372pt_entry_t *
373pmap_pte(pmap_t pmap, vm_offset_t va)
374{
375	pd_entry_t *pde;
376
377	pde = pmap_pde(pmap, va);
378	if (pde == NULL || *pde == NULL)
379		return (NULL);
380
381	return (pmap_pde_to_pte(pde, va));
382}
383
384vm_offset_t
385pmap_steal_memory(vm_size_t size)
386{
387	vm_paddr_t bank_size, pa;
388	vm_offset_t va;
389
390	size = round_page(size);
391	bank_size = phys_avail[1] - phys_avail[0];
392	while (size > bank_size) {
393		int i;
394
395		for (i = 0; phys_avail[i + 2]; i += 2) {
396			phys_avail[i] = phys_avail[i + 2];
397			phys_avail[i + 1] = phys_avail[i + 3];
398		}
399		phys_avail[i] = 0;
400		phys_avail[i + 1] = 0;
401		if (!phys_avail[0])
402			panic("pmap_steal_memory: out of memory");
403		bank_size = phys_avail[1] - phys_avail[0];
404	}
405
406	pa = phys_avail[0];
407	phys_avail[0] += size;
408	if (MIPS_DIRECT_MAPPABLE(pa) == 0)
409		panic("Out of memory below 512Meg?");
410	va = MIPS_PHYS_TO_DIRECT(pa);
411	bzero((caddr_t)va, size);
412	return (va);
413}
414
415/*
416 * Bootstrap the system enough to run with virtual memory.  This
417 * assumes that the phys_avail array has been initialized.
418 */
419static void
420pmap_create_kernel_pagetable(void)
421{
422	int i, j;
423	vm_offset_t ptaddr;
424	pt_entry_t *pte;
425#ifdef __mips_n64
426	pd_entry_t *pde;
427	vm_offset_t pdaddr;
428	int npt, npde;
429#endif
430
431	/*
432	 * Allocate segment table for the kernel
433	 */
434	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
435
436	/*
437	 * Allocate second level page tables for the kernel
438	 */
439#ifdef __mips_n64
440	npde = howmany(NKPT, NPDEPG);
441	pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
442#endif
443	nkpt = NKPT;
444	ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
445
446	/*
447	 * The R[4-7]?00 stores only one copy of the Global bit in the
448	 * translation lookaside buffer for each 2 page entry. Thus invalid
449	 * entrys must have the Global bit set so when Entry LO and Entry HI
450	 * G bits are anded together they will produce a global bit to store
451	 * in the tlb.
452	 */
453	for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
454		*pte = PTE_G;
455
456#ifdef __mips_n64
457	for (i = 0,  npt = nkpt; npt > 0; i++) {
458		kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
459		pde = (pd_entry_t *)kernel_segmap[i];
460
461		for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
462			pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
463	}
464#else
465	for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
466		kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
467#endif
468
469	PMAP_LOCK_INIT(kernel_pmap);
470	kernel_pmap->pm_segtab = kernel_segmap;
471	CPU_FILL(&kernel_pmap->pm_active);
472	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
473	kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
474	kernel_pmap->pm_asid[0].gen = 0;
475	kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
476}
477
478void
479pmap_bootstrap(void)
480{
481	int i;
482	int need_local_mappings = 0;
483
484	/* Sort. */
485again:
486	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
487		/*
488		 * Keep the memory aligned on page boundary.
489		 */
490		phys_avail[i] = round_page(phys_avail[i]);
491		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
492
493		if (i < 2)
494			continue;
495		if (phys_avail[i - 2] > phys_avail[i]) {
496			vm_paddr_t ptemp[2];
497
498			ptemp[0] = phys_avail[i + 0];
499			ptemp[1] = phys_avail[i + 1];
500
501			phys_avail[i + 0] = phys_avail[i - 2];
502			phys_avail[i + 1] = phys_avail[i - 1];
503
504			phys_avail[i - 2] = ptemp[0];
505			phys_avail[i - 1] = ptemp[1];
506			goto again;
507		}
508	}
509
510       	/*
511	 * In 32 bit, we may have memory which cannot be mapped directly.
512	 * This memory will need temporary mapping before it can be
513	 * accessed.
514	 */
515	if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1))
516		need_local_mappings = 1;
517
518	/*
519	 * Copy the phys_avail[] array before we start stealing memory from it.
520	 */
521	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
522		physmem_desc[i] = phys_avail[i];
523		physmem_desc[i + 1] = phys_avail[i + 1];
524	}
525
526	Maxmem = atop(phys_avail[i - 1]);
527
528	if (bootverbose) {
529		printf("Physical memory chunk(s):\n");
530		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
531			vm_paddr_t size;
532
533			size = phys_avail[i + 1] - phys_avail[i];
534			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
535			    (uintmax_t) phys_avail[i],
536			    (uintmax_t) phys_avail[i + 1] - 1,
537			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
538		}
539		printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem));
540	}
541	/*
542	 * Steal the message buffer from the beginning of memory.
543	 */
544	msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
545	msgbufinit(msgbufp, msgbufsize);
546
547	/*
548	 * Steal thread0 kstack.
549	 */
550	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
551
552	virtual_avail = VM_MIN_KERNEL_ADDRESS;
553	virtual_end = VM_MAX_KERNEL_ADDRESS;
554
555#ifdef SMP
556	/*
557	 * Steal some virtual address space to map the pcpu area.
558	 */
559	virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
560	pcpup = (struct pcpu *)virtual_avail;
561	virtual_avail += PAGE_SIZE * 2;
562
563	/*
564	 * Initialize the wired TLB entry mapping the pcpu region for
565	 * the BSP at 'pcpup'. Up until this point we were operating
566	 * with the 'pcpup' for the BSP pointing to a virtual address
567	 * in KSEG0 so there was no need for a TLB mapping.
568	 */
569	mips_pcpu_tlb_init(PCPU_ADDR(0));
570
571	if (bootverbose)
572		printf("pcpu is available at virtual address %p.\n", pcpup);
573#endif
574
575	if (need_local_mappings)
576		pmap_alloc_lmem_map();
577	pmap_create_kernel_pagetable();
578	pmap_max_asid = VMNUM_PIDS;
579	mips_wr_entryhi(0);
580	mips_wr_pagemask(0);
581
582 	/*
583	 * Initialize the global pv list lock.
584	 */
585	rw_init(&pvh_global_lock, "pmap pv global");
586}
587
588/*
589 * Initialize a vm_page's machine-dependent fields.
590 */
591void
592pmap_page_init(vm_page_t m)
593{
594
595	TAILQ_INIT(&m->md.pv_list);
596	m->md.pv_flags = 0;
597}
598
599/*
600 *	Initialize the pmap module.
601 *	Called by vm_init, to initialize any structures that the pmap
602 *	system needs to map virtual memory.
603 */
604void
605pmap_init(void)
606{
607}
608
609/***************************************************
610 * Low level helper routines.....
611 ***************************************************/
612
613#ifdef	SMP
614static __inline void
615pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
616{
617	int	cpuid, cpu, self;
618	cpuset_t active_cpus;
619
620	sched_pin();
621	if (is_kernel_pmap(pmap)) {
622		smp_rendezvous(NULL, fn, NULL, arg);
623		goto out;
624	}
625	/* Force ASID update on inactive CPUs */
626	CPU_FOREACH(cpu) {
627		if (!CPU_ISSET(cpu, &pmap->pm_active))
628			pmap->pm_asid[cpu].gen = 0;
629	}
630	cpuid = PCPU_GET(cpuid);
631	/*
632	 * XXX: barrier/locking for active?
633	 *
634	 * Take a snapshot of active here, any further changes are ignored.
635	 * tlb update/invalidate should be harmless on inactive CPUs
636	 */
637	active_cpus = pmap->pm_active;
638	self = CPU_ISSET(cpuid, &active_cpus);
639	CPU_CLR(cpuid, &active_cpus);
640	/* Optimize for the case where this cpu is the only active one */
641	if (CPU_EMPTY(&active_cpus)) {
642		if (self)
643			fn(arg);
644	} else {
645		if (self)
646			CPU_SET(cpuid, &active_cpus);
647		smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg);
648	}
649out:
650	sched_unpin();
651}
652#else /* !SMP */
653static __inline void
654pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
655{
656	int	cpuid;
657
658	if (is_kernel_pmap(pmap)) {
659		fn(arg);
660		return;
661	}
662	cpuid = PCPU_GET(cpuid);
663	if (!CPU_ISSET(cpuid, &pmap->pm_active))
664		pmap->pm_asid[cpuid].gen = 0;
665	else
666		fn(arg);
667}
668#endif /* SMP */
669
670static void
671pmap_invalidate_all(pmap_t pmap)
672{
673
674	pmap_call_on_active_cpus(pmap,
675	    (void (*)(void *))tlb_invalidate_all_user, pmap);
676}
677
678struct pmap_invalidate_page_arg {
679	pmap_t pmap;
680	vm_offset_t va;
681};
682
683static void
684pmap_invalidate_page_action(void *arg)
685{
686	struct pmap_invalidate_page_arg *p = arg;
687
688	tlb_invalidate_address(p->pmap, p->va);
689}
690
691static void
692pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
693{
694	struct pmap_invalidate_page_arg arg;
695
696	arg.pmap = pmap;
697	arg.va = va;
698	pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg);
699}
700
701struct pmap_invalidate_range_arg {
702	pmap_t pmap;
703	vm_offset_t sva;
704	vm_offset_t eva;
705};
706
707static void
708pmap_invalidate_range_action(void *arg)
709{
710	struct pmap_invalidate_range_arg *p = arg;
711
712	tlb_invalidate_range(p->pmap, p->sva, p->eva);
713}
714
715static void
716pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
717{
718	struct pmap_invalidate_range_arg arg;
719
720	arg.pmap = pmap;
721	arg.sva = sva;
722	arg.eva = eva;
723	pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg);
724}
725
726struct pmap_update_page_arg {
727	pmap_t pmap;
728	vm_offset_t va;
729	pt_entry_t pte;
730};
731
732static void
733pmap_update_page_action(void *arg)
734{
735	struct pmap_update_page_arg *p = arg;
736
737	tlb_update(p->pmap, p->va, p->pte);
738}
739
740static void
741pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
742{
743	struct pmap_update_page_arg arg;
744
745	arg.pmap = pmap;
746	arg.va = va;
747	arg.pte = pte;
748	pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg);
749}
750
751/*
752 *	Routine:	pmap_extract
753 *	Function:
754 *		Extract the physical page address associated
755 *		with the given map/virtual_address pair.
756 */
757vm_paddr_t
758pmap_extract(pmap_t pmap, vm_offset_t va)
759{
760	pt_entry_t *pte;
761	vm_offset_t retval = 0;
762
763	PMAP_LOCK(pmap);
764	pte = pmap_pte(pmap, va);
765	if (pte) {
766		retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
767	}
768	PMAP_UNLOCK(pmap);
769	return (retval);
770}
771
772/*
773 *	Routine:	pmap_extract_and_hold
774 *	Function:
775 *		Atomically extract and hold the physical page
776 *		with the given pmap and virtual address pair
777 *		if that mapping permits the given protection.
778 */
779vm_page_t
780pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
781{
782	pt_entry_t pte, *ptep;
783	vm_paddr_t pa, pte_pa;
784	vm_page_t m;
785
786	m = NULL;
787	pa = 0;
788	PMAP_LOCK(pmap);
789retry:
790	ptep = pmap_pte(pmap, va);
791	if (ptep != NULL) {
792		pte = *ptep;
793		if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
794		    (prot & VM_PROT_WRITE) == 0)) {
795			pte_pa = TLBLO_PTE_TO_PA(pte);
796			if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
797				goto retry;
798			m = PHYS_TO_VM_PAGE(pte_pa);
799			vm_page_hold(m);
800		}
801	}
802	PA_UNLOCK_COND(pa);
803	PMAP_UNLOCK(pmap);
804	return (m);
805}
806
807/***************************************************
808 * Low level mapping routines.....
809 ***************************************************/
810
811/*
812 * add a wired page to the kva
813 */
814void
815pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr)
816{
817	pt_entry_t *pte;
818	pt_entry_t opte, npte;
819
820#ifdef PMAP_DEBUG
821	printf("pmap_kenter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
822#endif
823
824	pte = pmap_pte(kernel_pmap, va);
825	opte = *pte;
826	npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G;
827	*pte = npte;
828	if (pte_test(&opte, PTE_V) && opte != npte)
829		pmap_update_page(kernel_pmap, va, npte);
830}
831
832void
833pmap_kenter(vm_offset_t va, vm_paddr_t pa)
834{
835
836	KASSERT(is_cacheable_mem(pa),
837		("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
838
839	pmap_kenter_attr(va, pa, PTE_C_CACHE);
840}
841
842/*
843 * remove a page from the kernel pagetables
844 */
845 /* PMAP_INLINE */ void
846pmap_kremove(vm_offset_t va)
847{
848	pt_entry_t *pte;
849
850	/*
851	 * Write back all caches from the page being destroyed
852	 */
853	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
854
855	pte = pmap_pte(kernel_pmap, va);
856	*pte = PTE_G;
857	pmap_invalidate_page(kernel_pmap, va);
858}
859
860/*
861 *	Used to map a range of physical addresses into kernel
862 *	virtual address space.
863 *
864 *	The value passed in '*virt' is a suggested virtual address for
865 *	the mapping. Architectures which can support a direct-mapped
866 *	physical to virtual region can return the appropriate address
867 *	within that region, leaving '*virt' unchanged. Other
868 *	architectures should map the pages starting at '*virt' and
869 *	update '*virt' with the first usable address after the mapped
870 *	region.
871 *
872 *	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
873 */
874vm_offset_t
875pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
876{
877	vm_offset_t va, sva;
878
879	if (MIPS_DIRECT_MAPPABLE(end - 1))
880		return (MIPS_PHYS_TO_DIRECT(start));
881
882	va = sva = *virt;
883	while (start < end) {
884		pmap_kenter(va, start);
885		va += PAGE_SIZE;
886		start += PAGE_SIZE;
887	}
888	*virt = va;
889	return (sva);
890}
891
892/*
893 * Add a list of wired pages to the kva
894 * this routine is only used for temporary
895 * kernel mappings that do not need to have
896 * page modification or references recorded.
897 * Note that old mappings are simply written
898 * over.  The page *must* be wired.
899 */
900void
901pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
902{
903	int i;
904	vm_offset_t origva = va;
905
906	for (i = 0; i < count; i++) {
907		pmap_flush_pvcache(m[i]);
908		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
909		va += PAGE_SIZE;
910	}
911
912	mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
913}
914
915/*
916 * this routine jerks page mappings from the
917 * kernel -- it is meant only for temporary mappings.
918 */
919void
920pmap_qremove(vm_offset_t va, int count)
921{
922	pt_entry_t *pte;
923	vm_offset_t origva;
924
925	if (count < 1)
926		return;
927	mips_dcache_wbinv_range_index(va, PAGE_SIZE * count);
928	origva = va;
929	do {
930		pte = pmap_pte(kernel_pmap, va);
931		*pte = PTE_G;
932		va += PAGE_SIZE;
933	} while (--count > 0);
934	pmap_invalidate_range(kernel_pmap, origva, va);
935}
936
937/***************************************************
938 * Page table page management routines.....
939 ***************************************************/
940
941/*
942 * Decrements a page table page's wire count, which is used to record the
943 * number of valid page table entries within the page.  If the wire count
944 * drops to zero, then the page table page is unmapped.  Returns TRUE if the
945 * page table page was unmapped and FALSE otherwise.
946 */
947static PMAP_INLINE boolean_t
948pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
949{
950
951	--m->wire_count;
952	if (m->wire_count == 0) {
953		_pmap_unwire_ptp(pmap, va, m);
954		return (TRUE);
955	} else
956		return (FALSE);
957}
958
959static void
960_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
961{
962	pd_entry_t *pde;
963
964	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
965	/*
966	 * unmap the page table page
967	 */
968#ifdef __mips_n64
969	if (m->pindex < NUPDE)
970		pde = pmap_pde(pmap, va);
971	else
972		pde = pmap_segmap(pmap, va);
973#else
974	pde = pmap_pde(pmap, va);
975#endif
976	*pde = 0;
977	pmap->pm_stats.resident_count--;
978
979#ifdef __mips_n64
980	if (m->pindex < NUPDE) {
981		pd_entry_t *pdp;
982		vm_page_t pdpg;
983
984		/*
985		 * Recursively decrement next level pagetable refcount
986		 */
987		pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
988		pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
989		pmap_unwire_ptp(pmap, va, pdpg);
990	}
991#endif
992
993	/*
994	 * If the page is finally unwired, simply free it.
995	 */
996	vm_page_free_zero(m);
997	atomic_subtract_int(&cnt.v_wire_count, 1);
998}
999
1000/*
1001 * After removing a page table entry, this routine is used to
1002 * conditionally free the page, and manage the hold/wire counts.
1003 */
1004static int
1005pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
1006{
1007	vm_page_t mpte;
1008
1009	if (va >= VM_MAXUSER_ADDRESS)
1010		return (0);
1011	KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0"));
1012	mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde));
1013	return (pmap_unwire_ptp(pmap, va, mpte));
1014}
1015
1016void
1017pmap_pinit0(pmap_t pmap)
1018{
1019	int i;
1020
1021	PMAP_LOCK_INIT(pmap);
1022	pmap->pm_segtab = kernel_segmap;
1023	CPU_ZERO(&pmap->pm_active);
1024	for (i = 0; i < MAXCPU; i++) {
1025		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1026		pmap->pm_asid[i].gen = 0;
1027	}
1028	PCPU_SET(curpmap, pmap);
1029	TAILQ_INIT(&pmap->pm_pvchunk);
1030	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1031}
1032
1033void
1034pmap_grow_direct_page_cache()
1035{
1036
1037#ifdef __mips_n64
1038	vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS);
1039#else
1040	vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
1041#endif
1042}
1043
1044vm_page_t
1045pmap_alloc_direct_page(unsigned int index, int req)
1046{
1047	vm_page_t m;
1048
1049	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
1050	    VM_ALLOC_ZERO);
1051	if (m == NULL)
1052		return (NULL);
1053
1054	if ((m->flags & PG_ZERO) == 0)
1055		pmap_zero_page(m);
1056
1057	m->pindex = index;
1058	return (m);
1059}
1060
1061/*
1062 * Initialize a preallocated and zeroed pmap structure,
1063 * such as one in a vmspace structure.
1064 */
1065int
1066pmap_pinit(pmap_t pmap)
1067{
1068	vm_offset_t ptdva;
1069	vm_page_t ptdpg;
1070	int i;
1071
1072	PMAP_LOCK_INIT(pmap);
1073
1074	/*
1075	 * allocate the page directory page
1076	 */
1077	while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL)
1078	       pmap_grow_direct_page_cache();
1079
1080	ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1081	pmap->pm_segtab = (pd_entry_t *)ptdva;
1082	CPU_ZERO(&pmap->pm_active);
1083	for (i = 0; i < MAXCPU; i++) {
1084		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1085		pmap->pm_asid[i].gen = 0;
1086	}
1087	TAILQ_INIT(&pmap->pm_pvchunk);
1088	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1089
1090	return (1);
1091}
1092
1093/*
1094 * this routine is called if the page table page is not
1095 * mapped correctly.
1096 */
1097static vm_page_t
1098_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1099{
1100	vm_offset_t pageva;
1101	vm_page_t m;
1102
1103	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1104	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1105	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1106
1107	/*
1108	 * Find or fabricate a new pagetable page
1109	 */
1110	if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
1111		if (flags & M_WAITOK) {
1112			PMAP_UNLOCK(pmap);
1113			rw_wunlock(&pvh_global_lock);
1114			pmap_grow_direct_page_cache();
1115			rw_wlock(&pvh_global_lock);
1116			PMAP_LOCK(pmap);
1117		}
1118
1119		/*
1120		 * Indicate the need to retry.	While waiting, the page
1121		 * table page may have been allocated.
1122		 */
1123		return (NULL);
1124	}
1125
1126	/*
1127	 * Map the pagetable page into the process address space, if it
1128	 * isn't already there.
1129	 */
1130	pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1131
1132#ifdef __mips_n64
1133	if (ptepindex >= NUPDE) {
1134		pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1135	} else {
1136		pd_entry_t *pdep, *pde;
1137		int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1138		int pdeindex = ptepindex & (NPDEPG - 1);
1139		vm_page_t pg;
1140
1141		pdep = &pmap->pm_segtab[segindex];
1142		if (*pdep == NULL) {
1143			/* recurse for allocating page dir */
1144			if (_pmap_allocpte(pmap, NUPDE + segindex,
1145			    flags) == NULL) {
1146				/* alloc failed, release current */
1147				--m->wire_count;
1148				atomic_subtract_int(&cnt.v_wire_count, 1);
1149				vm_page_free_zero(m);
1150				return (NULL);
1151			}
1152		} else {
1153			pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1154			pg->wire_count++;
1155		}
1156		/* Next level entry */
1157		pde = (pd_entry_t *)*pdep;
1158		pde[pdeindex] = (pd_entry_t)pageva;
1159	}
1160#else
1161	pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1162#endif
1163	pmap->pm_stats.resident_count++;
1164	return (m);
1165}
1166
1167static vm_page_t
1168pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1169{
1170	unsigned ptepindex;
1171	pd_entry_t *pde;
1172	vm_page_t m;
1173
1174	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1175	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1176	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1177
1178	/*
1179	 * Calculate pagetable page index
1180	 */
1181	ptepindex = pmap_pde_pindex(va);
1182retry:
1183	/*
1184	 * Get the page directory entry
1185	 */
1186	pde = pmap_pde(pmap, va);
1187
1188	/*
1189	 * If the page table page is mapped, we just increment the hold
1190	 * count, and activate it.
1191	 */
1192	if (pde != NULL && *pde != NULL) {
1193		m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1194		m->wire_count++;
1195	} else {
1196		/*
1197		 * Here if the pte page isn't mapped, or if it has been
1198		 * deallocated.
1199		 */
1200		m = _pmap_allocpte(pmap, ptepindex, flags);
1201		if (m == NULL && (flags & M_WAITOK))
1202			goto retry;
1203	}
1204	return (m);
1205}
1206
1207
1208/***************************************************
1209 * Pmap allocation/deallocation routines.
1210 ***************************************************/
1211
1212/*
1213 * Release any resources held by the given physical map.
1214 * Called when a pmap initialized by pmap_pinit is being released.
1215 * Should only be called if the map contains no valid mappings.
1216 */
1217void
1218pmap_release(pmap_t pmap)
1219{
1220	vm_offset_t ptdva;
1221	vm_page_t ptdpg;
1222
1223	KASSERT(pmap->pm_stats.resident_count == 0,
1224	    ("pmap_release: pmap resident count %ld != 0",
1225	    pmap->pm_stats.resident_count));
1226
1227	ptdva = (vm_offset_t)pmap->pm_segtab;
1228	ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1229
1230	ptdpg->wire_count--;
1231	atomic_subtract_int(&cnt.v_wire_count, 1);
1232	vm_page_free_zero(ptdpg);
1233	PMAP_LOCK_DESTROY(pmap);
1234}
1235
1236/*
1237 * grow the number of kernel page table entries, if needed
1238 */
1239void
1240pmap_growkernel(vm_offset_t addr)
1241{
1242	vm_page_t nkpg;
1243	pd_entry_t *pde, *pdpe;
1244	pt_entry_t *pte;
1245	int i;
1246
1247	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1248	addr = roundup2(addr, NBSEG);
1249	if (addr - 1 >= kernel_map->max_offset)
1250		addr = kernel_map->max_offset;
1251	while (kernel_vm_end < addr) {
1252		pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1253#ifdef __mips_n64
1254		if (*pdpe == 0) {
1255			/* new intermediate page table entry */
1256			nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1257			if (nkpg == NULL)
1258				panic("pmap_growkernel: no memory to grow kernel");
1259			*pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1260			continue; /* try again */
1261		}
1262#endif
1263		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1264		if (*pde != 0) {
1265			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1266			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1267				kernel_vm_end = kernel_map->max_offset;
1268				break;
1269			}
1270			continue;
1271		}
1272
1273		/*
1274		 * This index is bogus, but out of the way
1275		 */
1276		nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1277		if (!nkpg)
1278			panic("pmap_growkernel: no memory to grow kernel");
1279		nkpt++;
1280		*pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1281
1282		/*
1283		 * The R[4-7]?00 stores only one copy of the Global bit in
1284		 * the translation lookaside buffer for each 2 page entry.
1285		 * Thus invalid entrys must have the Global bit set so when
1286		 * Entry LO and Entry HI G bits are anded together they will
1287		 * produce a global bit to store in the tlb.
1288		 */
1289		pte = (pt_entry_t *)*pde;
1290		for (i = 0; i < NPTEPG; i++)
1291			pte[i] = PTE_G;
1292
1293		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1294		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1295			kernel_vm_end = kernel_map->max_offset;
1296			break;
1297		}
1298	}
1299}
1300
1301/***************************************************
1302 * page management routines.
1303 ***************************************************/
1304
1305CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1306#ifdef __mips_n64
1307CTASSERT(_NPCM == 3);
1308CTASSERT(_NPCPV == 168);
1309#else
1310CTASSERT(_NPCM == 11);
1311CTASSERT(_NPCPV == 336);
1312#endif
1313
1314static __inline struct pv_chunk *
1315pv_to_chunk(pv_entry_t pv)
1316{
1317
1318	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1319}
1320
1321#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1322
1323#ifdef __mips_n64
1324#define	PC_FREE0_1	0xfffffffffffffffful
1325#define	PC_FREE2	0x000000fffffffffful
1326#else
1327#define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
1328#define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
1329#endif
1330
1331static const u_long pc_freemask[_NPCM] = {
1332#ifdef __mips_n64
1333	PC_FREE0_1, PC_FREE0_1, PC_FREE2
1334#else
1335	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1336	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1337	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1338	PC_FREE0_9, PC_FREE10
1339#endif
1340};
1341
1342static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
1343
1344SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1345    "Current number of pv entries");
1346
1347#ifdef PV_STATS
1348static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1349
1350SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1351    "Current number of pv entry chunks");
1352SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1353    "Current number of pv entry chunks allocated");
1354SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1355    "Current number of pv entry chunks frees");
1356SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1357    "Number of times tried to get a chunk page but failed.");
1358
1359static long pv_entry_frees, pv_entry_allocs;
1360static int pv_entry_spare;
1361
1362SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1363    "Current number of pv entry frees");
1364SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1365    "Current number of pv entry allocs");
1366SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1367    "Current number of spare pv entries");
1368#endif
1369
1370/*
1371 * We are in a serious low memory condition.  Resort to
1372 * drastic measures to free some pages so we can allocate
1373 * another pv entry chunk.
1374 */
1375static vm_page_t
1376pmap_pv_reclaim(pmap_t locked_pmap)
1377{
1378	struct pch newtail;
1379	struct pv_chunk *pc;
1380	pd_entry_t *pde;
1381	pmap_t pmap;
1382	pt_entry_t *pte, oldpte;
1383	pv_entry_t pv;
1384	vm_offset_t va;
1385	vm_page_t m, m_pc;
1386	u_long inuse;
1387	int bit, field, freed, idx;
1388
1389	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1390	pmap = NULL;
1391	m_pc = NULL;
1392	TAILQ_INIT(&newtail);
1393	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
1394		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1395		if (pmap != pc->pc_pmap) {
1396			if (pmap != NULL) {
1397				pmap_invalidate_all(pmap);
1398				if (pmap != locked_pmap)
1399					PMAP_UNLOCK(pmap);
1400			}
1401			pmap = pc->pc_pmap;
1402			/* Avoid deadlock and lock recursion. */
1403			if (pmap > locked_pmap)
1404				PMAP_LOCK(pmap);
1405			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
1406				pmap = NULL;
1407				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1408				continue;
1409			}
1410		}
1411
1412		/*
1413		 * Destroy every non-wired, 4 KB page mapping in the chunk.
1414		 */
1415		freed = 0;
1416		for (field = 0; field < _NPCM; field++) {
1417			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1418			    inuse != 0; inuse &= ~(1UL << bit)) {
1419				bit = ffsl(inuse) - 1;
1420				idx = field * sizeof(inuse) * NBBY + bit;
1421				pv = &pc->pc_pventry[idx];
1422				va = pv->pv_va;
1423				pde = pmap_pde(pmap, va);
1424				KASSERT(pde != NULL && *pde != 0,
1425				    ("pmap_pv_reclaim: pde"));
1426				pte = pmap_pde_to_pte(pde, va);
1427				oldpte = *pte;
1428				if (pte_test(&oldpte, PTE_W))
1429					continue;
1430				if (is_kernel_pmap(pmap))
1431					*pte = PTE_G;
1432				else
1433					*pte = 0;
1434				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte));
1435				if (pte_test(&oldpte, PTE_D))
1436					vm_page_dirty(m);
1437				if (m->md.pv_flags & PV_TABLE_REF)
1438					vm_page_aflag_set(m, PGA_REFERENCED);
1439				m->md.pv_flags &= ~PV_TABLE_REF;
1440				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1441				if (TAILQ_EMPTY(&m->md.pv_list))
1442					vm_page_aflag_clear(m, PGA_WRITEABLE);
1443				pc->pc_map[field] |= 1UL << bit;
1444				pmap_unuse_pt(pmap, va, *pde);
1445				freed++;
1446			}
1447		}
1448		if (freed == 0) {
1449			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1450			continue;
1451		}
1452		/* Every freed mapping is for a 4 KB page. */
1453		pmap->pm_stats.resident_count -= freed;
1454		PV_STAT(pv_entry_frees += freed);
1455		PV_STAT(pv_entry_spare += freed);
1456		pv_entry_count -= freed;
1457		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1458		for (field = 0; field < _NPCM; field++)
1459			if (pc->pc_map[field] != pc_freemask[field]) {
1460				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1461				    pc_list);
1462				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1463
1464				/*
1465				 * One freed pv entry in locked_pmap is
1466				 * sufficient.
1467				 */
1468				if (pmap == locked_pmap)
1469					goto out;
1470				break;
1471			}
1472		if (field == _NPCM) {
1473			PV_STAT(pv_entry_spare -= _NPCPV);
1474			PV_STAT(pc_chunk_count--);
1475			PV_STAT(pc_chunk_frees++);
1476			/* Entire chunk is free; return it. */
1477			m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(
1478			    (vm_offset_t)pc));
1479			break;
1480		}
1481	}
1482out:
1483	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
1484	if (pmap != NULL) {
1485		pmap_invalidate_all(pmap);
1486		if (pmap != locked_pmap)
1487			PMAP_UNLOCK(pmap);
1488	}
1489	return (m_pc);
1490}
1491
1492/*
1493 * free the pv_entry back to the free list
1494 */
1495static void
1496free_pv_entry(pmap_t pmap, pv_entry_t pv)
1497{
1498	struct pv_chunk *pc;
1499	int bit, field, idx;
1500
1501	rw_assert(&pvh_global_lock, RA_WLOCKED);
1502	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1503	PV_STAT(pv_entry_frees++);
1504	PV_STAT(pv_entry_spare++);
1505	pv_entry_count--;
1506	pc = pv_to_chunk(pv);
1507	idx = pv - &pc->pc_pventry[0];
1508	field = idx / (sizeof(u_long) * NBBY);
1509	bit = idx % (sizeof(u_long) * NBBY);
1510	pc->pc_map[field] |= 1ul << bit;
1511	for (idx = 0; idx < _NPCM; idx++)
1512		if (pc->pc_map[idx] != pc_freemask[idx]) {
1513			/*
1514			 * 98% of the time, pc is already at the head of the
1515			 * list.  If it isn't already, move it to the head.
1516			 */
1517			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
1518			    pc)) {
1519				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1520				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1521				    pc_list);
1522			}
1523			return;
1524		}
1525	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1526	free_pv_chunk(pc);
1527}
1528
1529static void
1530free_pv_chunk(struct pv_chunk *pc)
1531{
1532	vm_page_t m;
1533
1534 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1535	PV_STAT(pv_entry_spare -= _NPCPV);
1536	PV_STAT(pc_chunk_count--);
1537	PV_STAT(pc_chunk_frees++);
1538	/* entire chunk is free, return it */
1539	m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc));
1540	vm_page_unwire(m, 0);
1541	vm_page_free(m);
1542}
1543
1544/*
1545 * get a new pv_entry, allocating a block from the system
1546 * when needed.
1547 */
1548static pv_entry_t
1549get_pv_entry(pmap_t pmap, boolean_t try)
1550{
1551	struct pv_chunk *pc;
1552	pv_entry_t pv;
1553	vm_page_t m;
1554	int bit, field, idx;
1555
1556	rw_assert(&pvh_global_lock, RA_WLOCKED);
1557	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1558	PV_STAT(pv_entry_allocs++);
1559	pv_entry_count++;
1560retry:
1561	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1562	if (pc != NULL) {
1563		for (field = 0; field < _NPCM; field++) {
1564			if (pc->pc_map[field]) {
1565				bit = ffsl(pc->pc_map[field]) - 1;
1566				break;
1567			}
1568		}
1569		if (field < _NPCM) {
1570			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
1571			pv = &pc->pc_pventry[idx];
1572			pc->pc_map[field] &= ~(1ul << bit);
1573			/* If this was the last item, move it to tail */
1574			for (field = 0; field < _NPCM; field++)
1575				if (pc->pc_map[field] != 0) {
1576					PV_STAT(pv_entry_spare--);
1577					return (pv);	/* not full, return */
1578				}
1579			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1580			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
1581			PV_STAT(pv_entry_spare--);
1582			return (pv);
1583		}
1584	}
1585	/* No free items, allocate another chunk */
1586	m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
1587	    VM_ALLOC_WIRED);
1588	if (m == NULL) {
1589		if (try) {
1590			pv_entry_count--;
1591			PV_STAT(pc_chunk_tryfail++);
1592			return (NULL);
1593		}
1594		m = pmap_pv_reclaim(pmap);
1595		if (m == NULL)
1596			goto retry;
1597	}
1598	PV_STAT(pc_chunk_count++);
1599	PV_STAT(pc_chunk_allocs++);
1600	pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1601	pc->pc_pmap = pmap;
1602	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
1603	for (field = 1; field < _NPCM; field++)
1604		pc->pc_map[field] = pc_freemask[field];
1605	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1606	pv = &pc->pc_pventry[0];
1607	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1608	PV_STAT(pv_entry_spare += _NPCPV - 1);
1609	return (pv);
1610}
1611
1612static pv_entry_t
1613pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1614{
1615	pv_entry_t pv;
1616
1617	rw_assert(&pvh_global_lock, RA_WLOCKED);
1618	TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1619		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1620			TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1621			break;
1622		}
1623	}
1624	return (pv);
1625}
1626
1627static void
1628pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1629{
1630	pv_entry_t pv;
1631
1632	pv = pmap_pvh_remove(pvh, pmap, va);
1633	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1634	     (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)),
1635	     (u_long)va));
1636	free_pv_entry(pmap, pv);
1637}
1638
1639static void
1640pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1641{
1642
1643	rw_assert(&pvh_global_lock, RA_WLOCKED);
1644	pmap_pvh_free(&m->md, pmap, va);
1645	if (TAILQ_EMPTY(&m->md.pv_list))
1646		vm_page_aflag_clear(m, PGA_WRITEABLE);
1647}
1648
1649/*
1650 * Conditionally create a pv entry.
1651 */
1652static boolean_t
1653pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1654    vm_page_t m)
1655{
1656	pv_entry_t pv;
1657
1658	rw_assert(&pvh_global_lock, RA_WLOCKED);
1659	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1660	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1661		pv->pv_va = va;
1662		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1663		return (TRUE);
1664	} else
1665		return (FALSE);
1666}
1667
1668/*
1669 * pmap_remove_pte: do the things to unmap a page in a process
1670 */
1671static int
1672pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
1673    pd_entry_t pde)
1674{
1675	pt_entry_t oldpte;
1676	vm_page_t m;
1677	vm_paddr_t pa;
1678
1679	rw_assert(&pvh_global_lock, RA_WLOCKED);
1680	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1681
1682	/*
1683	 * Write back all cache lines from the page being unmapped.
1684	 */
1685	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1686
1687	oldpte = *ptq;
1688	if (is_kernel_pmap(pmap))
1689		*ptq = PTE_G;
1690	else
1691		*ptq = 0;
1692
1693	if (pte_test(&oldpte, PTE_W))
1694		pmap->pm_stats.wired_count -= 1;
1695
1696	pmap->pm_stats.resident_count -= 1;
1697
1698	if (pte_test(&oldpte, PTE_MANAGED)) {
1699		pa = TLBLO_PTE_TO_PA(oldpte);
1700		m = PHYS_TO_VM_PAGE(pa);
1701		if (pte_test(&oldpte, PTE_D)) {
1702			KASSERT(!pte_test(&oldpte, PTE_RO),
1703			    ("%s: modified page not writable: va: %p, pte: %#jx",
1704			    __func__, (void *)va, (uintmax_t)oldpte));
1705			vm_page_dirty(m);
1706		}
1707		if (m->md.pv_flags & PV_TABLE_REF)
1708			vm_page_aflag_set(m, PGA_REFERENCED);
1709		m->md.pv_flags &= ~PV_TABLE_REF;
1710
1711		pmap_remove_entry(pmap, m, va);
1712	}
1713	return (pmap_unuse_pt(pmap, va, pde));
1714}
1715
1716/*
1717 * Remove a single page from a process address space
1718 */
1719static void
1720pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1721{
1722	pd_entry_t *pde;
1723	pt_entry_t *ptq;
1724
1725	rw_assert(&pvh_global_lock, RA_WLOCKED);
1726	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1727	pde = pmap_pde(pmap, va);
1728	if (pde == NULL || *pde == 0)
1729		return;
1730	ptq = pmap_pde_to_pte(pde, va);
1731
1732	/*
1733	 * If there is no pte for this address, just skip it!
1734	 */
1735	if (!pte_test(ptq, PTE_V))
1736		return;
1737
1738	(void)pmap_remove_pte(pmap, ptq, va, *pde);
1739	pmap_invalidate_page(pmap, va);
1740}
1741
1742/*
1743 *	Remove the given range of addresses from the specified map.
1744 *
1745 *	It is assumed that the start and end are properly
1746 *	rounded to the page size.
1747 */
1748void
1749pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1750{
1751	pd_entry_t *pde, *pdpe;
1752	pt_entry_t *pte;
1753	vm_offset_t va, va_next;
1754
1755	/*
1756	 * Perform an unsynchronized read.  This is, however, safe.
1757	 */
1758	if (pmap->pm_stats.resident_count == 0)
1759		return;
1760
1761	rw_wlock(&pvh_global_lock);
1762	PMAP_LOCK(pmap);
1763
1764	/*
1765	 * special handling of removing one page.  a very common operation
1766	 * and easy to short circuit some code.
1767	 */
1768	if ((sva + PAGE_SIZE) == eva) {
1769		pmap_remove_page(pmap, sva);
1770		goto out;
1771	}
1772	for (; sva < eva; sva = va_next) {
1773		pdpe = pmap_segmap(pmap, sva);
1774#ifdef __mips_n64
1775		if (*pdpe == 0) {
1776			va_next = (sva + NBSEG) & ~SEGMASK;
1777			if (va_next < sva)
1778				va_next = eva;
1779			continue;
1780		}
1781#endif
1782		va_next = (sva + NBPDR) & ~PDRMASK;
1783		if (va_next < sva)
1784			va_next = eva;
1785
1786		pde = pmap_pdpe_to_pde(pdpe, sva);
1787		if (*pde == NULL)
1788			continue;
1789
1790		/*
1791		 * Limit our scan to either the end of the va represented
1792		 * by the current page table page, or to the end of the
1793		 * range being removed.
1794		 */
1795		if (va_next > eva)
1796			va_next = eva;
1797
1798		va = va_next;
1799		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1800		    sva += PAGE_SIZE) {
1801			if (!pte_test(pte, PTE_V)) {
1802				if (va != va_next) {
1803					pmap_invalidate_range(pmap, va, sva);
1804					va = va_next;
1805				}
1806				continue;
1807			}
1808			if (va == va_next)
1809				va = sva;
1810			if (pmap_remove_pte(pmap, pte, sva, *pde)) {
1811				sva += PAGE_SIZE;
1812				break;
1813			}
1814		}
1815		if (va != va_next)
1816			pmap_invalidate_range(pmap, va, sva);
1817	}
1818out:
1819	rw_wunlock(&pvh_global_lock);
1820	PMAP_UNLOCK(pmap);
1821}
1822
1823/*
1824 *	Routine:	pmap_remove_all
1825 *	Function:
1826 *		Removes this physical page from
1827 *		all physical maps in which it resides.
1828 *		Reflects back modify bits to the pager.
1829 *
1830 *	Notes:
1831 *		Original versions of this routine were very
1832 *		inefficient because they iteratively called
1833 *		pmap_remove (slow...)
1834 */
1835
1836void
1837pmap_remove_all(vm_page_t m)
1838{
1839	pv_entry_t pv;
1840	pmap_t pmap;
1841	pd_entry_t *pde;
1842	pt_entry_t *pte, tpte;
1843
1844	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1845	    ("pmap_remove_all: page %p is not managed", m));
1846	rw_wlock(&pvh_global_lock);
1847
1848	if (m->md.pv_flags & PV_TABLE_REF)
1849		vm_page_aflag_set(m, PGA_REFERENCED);
1850
1851	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1852		pmap = PV_PMAP(pv);
1853		PMAP_LOCK(pmap);
1854
1855		/*
1856		 * If it's last mapping writeback all caches from
1857		 * the page being destroyed
1858	 	 */
1859		if (TAILQ_NEXT(pv, pv_list) == NULL)
1860			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1861
1862		pmap->pm_stats.resident_count--;
1863
1864		pde = pmap_pde(pmap, pv->pv_va);
1865		KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde"));
1866		pte = pmap_pde_to_pte(pde, pv->pv_va);
1867
1868		tpte = *pte;
1869		if (is_kernel_pmap(pmap))
1870			*pte = PTE_G;
1871		else
1872			*pte = 0;
1873
1874		if (pte_test(&tpte, PTE_W))
1875			pmap->pm_stats.wired_count--;
1876
1877		/*
1878		 * Update the vm_page_t clean and reference bits.
1879		 */
1880		if (pte_test(&tpte, PTE_D)) {
1881			KASSERT(!pte_test(&tpte, PTE_RO),
1882			    ("%s: modified page not writable: va: %p, pte: %#jx",
1883			    __func__, (void *)pv->pv_va, (uintmax_t)tpte));
1884			vm_page_dirty(m);
1885		}
1886		pmap_invalidate_page(pmap, pv->pv_va);
1887
1888		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1889		pmap_unuse_pt(pmap, pv->pv_va, *pde);
1890		free_pv_entry(pmap, pv);
1891		PMAP_UNLOCK(pmap);
1892	}
1893
1894	vm_page_aflag_clear(m, PGA_WRITEABLE);
1895	m->md.pv_flags &= ~PV_TABLE_REF;
1896	rw_wunlock(&pvh_global_lock);
1897}
1898
1899/*
1900 *	Set the physical protection on the
1901 *	specified range of this map as requested.
1902 */
1903void
1904pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1905{
1906	pt_entry_t pbits, *pte;
1907	pd_entry_t *pde, *pdpe;
1908	vm_offset_t va, va_next;
1909	vm_paddr_t pa;
1910	vm_page_t m;
1911
1912	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1913		pmap_remove(pmap, sva, eva);
1914		return;
1915	}
1916	if (prot & VM_PROT_WRITE)
1917		return;
1918
1919	rw_wlock(&pvh_global_lock);
1920	PMAP_LOCK(pmap);
1921	for (; sva < eva; sva = va_next) {
1922		pdpe = pmap_segmap(pmap, sva);
1923#ifdef __mips_n64
1924		if (*pdpe == 0) {
1925			va_next = (sva + NBSEG) & ~SEGMASK;
1926			if (va_next < sva)
1927				va_next = eva;
1928			continue;
1929		}
1930#endif
1931		va_next = (sva + NBPDR) & ~PDRMASK;
1932		if (va_next < sva)
1933			va_next = eva;
1934
1935		pde = pmap_pdpe_to_pde(pdpe, sva);
1936		if (*pde == NULL)
1937			continue;
1938
1939		/*
1940		 * Limit our scan to either the end of the va represented
1941		 * by the current page table page, or to the end of the
1942		 * range being write protected.
1943		 */
1944		if (va_next > eva)
1945			va_next = eva;
1946
1947		va = va_next;
1948		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1949		    sva += PAGE_SIZE) {
1950			pbits = *pte;
1951			if (!pte_test(&pbits, PTE_V) || pte_test(&pbits,
1952			    PTE_RO)) {
1953				if (va != va_next) {
1954					pmap_invalidate_range(pmap, va, sva);
1955					va = va_next;
1956				}
1957				continue;
1958			}
1959			pte_set(&pbits, PTE_RO);
1960			if (pte_test(&pbits, PTE_D)) {
1961				pte_clear(&pbits, PTE_D);
1962				if (pte_test(&pbits, PTE_MANAGED)) {
1963					pa = TLBLO_PTE_TO_PA(pbits);
1964					m = PHYS_TO_VM_PAGE(pa);
1965					vm_page_dirty(m);
1966				}
1967				if (va == va_next)
1968					va = sva;
1969			} else {
1970				/*
1971				 * Unless PTE_D is set, any TLB entries
1972				 * mapping "sva" don't allow write access, so
1973				 * they needn't be invalidated.
1974				 */
1975				if (va != va_next) {
1976					pmap_invalidate_range(pmap, va, sva);
1977					va = va_next;
1978				}
1979			}
1980			*pte = pbits;
1981		}
1982		if (va != va_next)
1983			pmap_invalidate_range(pmap, va, sva);
1984	}
1985	rw_wunlock(&pvh_global_lock);
1986	PMAP_UNLOCK(pmap);
1987}
1988
1989/*
1990 *	Insert the given physical page (p) at
1991 *	the specified virtual address (v) in the
1992 *	target physical map with the protection requested.
1993 *
1994 *	If specified, the page will be wired down, meaning
1995 *	that the related pte can not be reclaimed.
1996 *
1997 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1998 *	or lose information.  That is, this routine must actually
1999 *	insert this page into the given map NOW.
2000 */
2001void
2002pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
2003    vm_prot_t prot, boolean_t wired)
2004{
2005	vm_paddr_t pa, opa;
2006	pt_entry_t *pte;
2007	pt_entry_t origpte, newpte;
2008	pv_entry_t pv;
2009	vm_page_t mpte, om;
2010
2011	va &= ~PAGE_MASK;
2012 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
2013	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
2014	    va >= kmi.clean_eva,
2015	    ("pmap_enter: managed mapping within the clean submap"));
2016	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
2017	    ("pmap_enter: page %p is not busy", m));
2018	pa = VM_PAGE_TO_PHYS(m);
2019	newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, access, prot);
2020	if (wired)
2021		newpte |= PTE_W;
2022	if (is_kernel_pmap(pmap))
2023		newpte |= PTE_G;
2024	if (is_cacheable_mem(pa))
2025		newpte |= PTE_C_CACHE;
2026	else
2027		newpte |= PTE_C_UNCACHED;
2028
2029	mpte = NULL;
2030
2031	rw_wlock(&pvh_global_lock);
2032	PMAP_LOCK(pmap);
2033
2034	/*
2035	 * In the case that a page table page is not resident, we are
2036	 * creating it here.
2037	 */
2038	if (va < VM_MAXUSER_ADDRESS) {
2039		mpte = pmap_allocpte(pmap, va, M_WAITOK);
2040	}
2041	pte = pmap_pte(pmap, va);
2042
2043	/*
2044	 * Page Directory table entry not valid, we need a new PT page
2045	 */
2046	if (pte == NULL) {
2047		panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
2048		    (void *)pmap->pm_segtab, (void *)va);
2049	}
2050	om = NULL;
2051	origpte = *pte;
2052	opa = TLBLO_PTE_TO_PA(origpte);
2053
2054	/*
2055	 * Mapping has not changed, must be protection or wiring change.
2056	 */
2057	if (pte_test(&origpte, PTE_V) && opa == pa) {
2058		/*
2059		 * Wiring change, just update stats. We don't worry about
2060		 * wiring PT pages as they remain resident as long as there
2061		 * are valid mappings in them. Hence, if a user page is
2062		 * wired, the PT page will be also.
2063		 */
2064		if (wired && !pte_test(&origpte, PTE_W))
2065			pmap->pm_stats.wired_count++;
2066		else if (!wired && pte_test(&origpte, PTE_W))
2067			pmap->pm_stats.wired_count--;
2068
2069		KASSERT(!pte_test(&origpte, PTE_D | PTE_RO),
2070		    ("%s: modified page not writable: va: %p, pte: %#jx",
2071		    __func__, (void *)va, (uintmax_t)origpte));
2072
2073		/*
2074		 * Remove extra pte reference
2075		 */
2076		if (mpte)
2077			mpte->wire_count--;
2078
2079		if (pte_test(&origpte, PTE_MANAGED)) {
2080			m->md.pv_flags |= PV_TABLE_REF;
2081			om = m;
2082			newpte |= PTE_MANAGED;
2083			if (!pte_test(&newpte, PTE_RO))
2084				vm_page_aflag_set(m, PGA_WRITEABLE);
2085		}
2086		goto validate;
2087	}
2088
2089	pv = NULL;
2090
2091	/*
2092	 * Mapping has changed, invalidate old range and fall through to
2093	 * handle validating new mapping.
2094	 */
2095	if (opa) {
2096		if (pte_test(&origpte, PTE_W))
2097			pmap->pm_stats.wired_count--;
2098
2099		if (pte_test(&origpte, PTE_MANAGED)) {
2100			om = PHYS_TO_VM_PAGE(opa);
2101			pv = pmap_pvh_remove(&om->md, pmap, va);
2102		}
2103		if (mpte != NULL) {
2104			mpte->wire_count--;
2105			KASSERT(mpte->wire_count > 0,
2106			    ("pmap_enter: missing reference to page table page,"
2107			    " va: %p", (void *)va));
2108		}
2109	} else
2110		pmap->pm_stats.resident_count++;
2111
2112	/*
2113	 * Enter on the PV list if part of our managed memory.
2114	 */
2115	if ((m->oflags & VPO_UNMANAGED) == 0) {
2116		m->md.pv_flags |= PV_TABLE_REF;
2117		if (pv == NULL)
2118			pv = get_pv_entry(pmap, FALSE);
2119		pv->pv_va = va;
2120		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2121		newpte |= PTE_MANAGED;
2122		if (!pte_test(&newpte, PTE_RO))
2123			vm_page_aflag_set(m, PGA_WRITEABLE);
2124	} else if (pv != NULL)
2125		free_pv_entry(pmap, pv);
2126
2127	/*
2128	 * Increment counters
2129	 */
2130	if (wired)
2131		pmap->pm_stats.wired_count++;
2132
2133validate:
2134
2135#ifdef PMAP_DEBUG
2136	printf("pmap_enter:  va: %p -> pa: %p\n", (void *)va, (void *)pa);
2137#endif
2138
2139	/*
2140	 * if the mapping or permission bits are different, we need to
2141	 * update the pte.
2142	 */
2143	if (origpte != newpte) {
2144		*pte = newpte;
2145		if (pte_test(&origpte, PTE_V)) {
2146			if (pte_test(&origpte, PTE_MANAGED) && opa != pa) {
2147				if (om->md.pv_flags & PV_TABLE_REF)
2148					vm_page_aflag_set(om, PGA_REFERENCED);
2149				om->md.pv_flags &= ~PV_TABLE_REF;
2150			}
2151			if (pte_test(&origpte, PTE_D)) {
2152				KASSERT(!pte_test(&origpte, PTE_RO),
2153				    ("pmap_enter: modified page not writable:"
2154				    " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte));
2155				if (pte_test(&origpte, PTE_MANAGED))
2156					vm_page_dirty(om);
2157			}
2158			if (pte_test(&origpte, PTE_MANAGED) &&
2159			    TAILQ_EMPTY(&om->md.pv_list))
2160				vm_page_aflag_clear(om, PGA_WRITEABLE);
2161			pmap_update_page(pmap, va, newpte);
2162		}
2163	}
2164
2165	/*
2166	 * Sync I & D caches for executable pages.  Do this only if the
2167	 * target pmap belongs to the current process.  Otherwise, an
2168	 * unresolvable TLB miss may occur.
2169	 */
2170	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2171	    (prot & VM_PROT_EXECUTE)) {
2172		mips_icache_sync_range(va, PAGE_SIZE);
2173		mips_dcache_wbinv_range(va, PAGE_SIZE);
2174	}
2175	rw_wunlock(&pvh_global_lock);
2176	PMAP_UNLOCK(pmap);
2177}
2178
2179/*
2180 * this code makes some *MAJOR* assumptions:
2181 * 1. Current pmap & pmap exists.
2182 * 2. Not wired.
2183 * 3. Read access.
2184 * 4. No page table pages.
2185 * but is *MUCH* faster than pmap_enter...
2186 */
2187
2188void
2189pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2190{
2191
2192	rw_wlock(&pvh_global_lock);
2193	PMAP_LOCK(pmap);
2194	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2195	rw_wunlock(&pvh_global_lock);
2196	PMAP_UNLOCK(pmap);
2197}
2198
2199static vm_page_t
2200pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2201    vm_prot_t prot, vm_page_t mpte)
2202{
2203	pt_entry_t *pte;
2204	vm_paddr_t pa;
2205
2206	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2207	    (m->oflags & VPO_UNMANAGED) != 0,
2208	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2209	rw_assert(&pvh_global_lock, RA_WLOCKED);
2210	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2211
2212	/*
2213	 * In the case that a page table page is not resident, we are
2214	 * creating it here.
2215	 */
2216	if (va < VM_MAXUSER_ADDRESS) {
2217		pd_entry_t *pde;
2218		unsigned ptepindex;
2219
2220		/*
2221		 * Calculate pagetable page index
2222		 */
2223		ptepindex = pmap_pde_pindex(va);
2224		if (mpte && (mpte->pindex == ptepindex)) {
2225			mpte->wire_count++;
2226		} else {
2227			/*
2228			 * Get the page directory entry
2229			 */
2230			pde = pmap_pde(pmap, va);
2231
2232			/*
2233			 * If the page table page is mapped, we just
2234			 * increment the hold count, and activate it.
2235			 */
2236			if (pde && *pde != 0) {
2237				mpte = PHYS_TO_VM_PAGE(
2238				    MIPS_DIRECT_TO_PHYS(*pde));
2239				mpte->wire_count++;
2240			} else {
2241				mpte = _pmap_allocpte(pmap, ptepindex,
2242				    M_NOWAIT);
2243				if (mpte == NULL)
2244					return (mpte);
2245			}
2246		}
2247	} else {
2248		mpte = NULL;
2249	}
2250
2251	pte = pmap_pte(pmap, va);
2252	if (pte_test(pte, PTE_V)) {
2253		if (mpte != NULL) {
2254			mpte->wire_count--;
2255			mpte = NULL;
2256		}
2257		return (mpte);
2258	}
2259
2260	/*
2261	 * Enter on the PV list if part of our managed memory.
2262	 */
2263	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2264	    !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2265		if (mpte != NULL) {
2266			pmap_unwire_ptp(pmap, va, mpte);
2267			mpte = NULL;
2268		}
2269		return (mpte);
2270	}
2271
2272	/*
2273	 * Increment counters
2274	 */
2275	pmap->pm_stats.resident_count++;
2276
2277	pa = VM_PAGE_TO_PHYS(m);
2278
2279	/*
2280	 * Now validate mapping with RO protection
2281	 */
2282	*pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V;
2283	if ((m->oflags & VPO_UNMANAGED) == 0)
2284		*pte |= PTE_MANAGED;
2285
2286	if (is_cacheable_mem(pa))
2287		*pte |= PTE_C_CACHE;
2288	else
2289		*pte |= PTE_C_UNCACHED;
2290
2291	if (is_kernel_pmap(pmap))
2292		*pte |= PTE_G;
2293	else {
2294		/*
2295		 * Sync I & D caches.  Do this only if the target pmap
2296		 * belongs to the current process.  Otherwise, an
2297		 * unresolvable TLB miss may occur. */
2298		if (pmap == &curproc->p_vmspace->vm_pmap) {
2299			va &= ~PAGE_MASK;
2300			mips_icache_sync_range(va, PAGE_SIZE);
2301			mips_dcache_wbinv_range(va, PAGE_SIZE);
2302		}
2303	}
2304	return (mpte);
2305}
2306
2307/*
2308 * Make a temporary mapping for a physical address.  This is only intended
2309 * to be used for panic dumps.
2310 *
2311 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2312 */
2313void *
2314pmap_kenter_temporary(vm_paddr_t pa, int i)
2315{
2316	vm_offset_t va;
2317
2318	if (i != 0)
2319		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2320		    __func__);
2321
2322	if (MIPS_DIRECT_MAPPABLE(pa)) {
2323		va = MIPS_PHYS_TO_DIRECT(pa);
2324	} else {
2325#ifndef __mips_n64    /* XXX : to be converted to new style */
2326		int cpu;
2327		register_t intr;
2328		struct local_sysmaps *sysm;
2329		pt_entry_t *pte, npte;
2330
2331		/* If this is used other than for dumps, we may need to leave
2332		 * interrupts disasbled on return. If crash dumps don't work when
2333		 * we get to this point, we might want to consider this (leaving things
2334		 * disabled as a starting point ;-)
2335	 	 */
2336		intr = intr_disable();
2337		cpu = PCPU_GET(cpuid);
2338		sysm = &sysmap_lmem[cpu];
2339		/* Since this is for the debugger, no locks or any other fun */
2340		npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V |
2341		    PTE_G;
2342		pte = pmap_pte(kernel_pmap, sysm->base);
2343		*pte = npte;
2344		sysm->valid1 = 1;
2345		pmap_update_page(kernel_pmap, sysm->base, npte);
2346		va = sysm->base;
2347		intr_restore(intr);
2348#endif
2349	}
2350	return ((void *)va);
2351}
2352
2353void
2354pmap_kenter_temporary_free(vm_paddr_t pa)
2355{
2356#ifndef __mips_n64    /* XXX : to be converted to new style */
2357	int cpu;
2358	register_t intr;
2359	struct local_sysmaps *sysm;
2360#endif
2361
2362	if (MIPS_DIRECT_MAPPABLE(pa)) {
2363		/* nothing to do for this case */
2364		return;
2365	}
2366#ifndef __mips_n64    /* XXX : to be converted to new style */
2367	cpu = PCPU_GET(cpuid);
2368	sysm = &sysmap_lmem[cpu];
2369	if (sysm->valid1) {
2370		pt_entry_t *pte;
2371
2372		intr = intr_disable();
2373		pte = pmap_pte(kernel_pmap, sysm->base);
2374		*pte = PTE_G;
2375		pmap_invalidate_page(kernel_pmap, sysm->base);
2376		intr_restore(intr);
2377		sysm->valid1 = 0;
2378	}
2379#endif
2380}
2381
2382/*
2383 * Maps a sequence of resident pages belonging to the same object.
2384 * The sequence begins with the given page m_start.  This page is
2385 * mapped at the given virtual address start.  Each subsequent page is
2386 * mapped at a virtual address that is offset from start by the same
2387 * amount as the page is offset from m_start within the object.  The
2388 * last page in the sequence is the page with the largest offset from
2389 * m_start that can be mapped at a virtual address less than the given
2390 * virtual address end.  Not every virtual page between start and end
2391 * is mapped; only those for which a resident page exists with the
2392 * corresponding offset from m_start are mapped.
2393 */
2394void
2395pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2396    vm_page_t m_start, vm_prot_t prot)
2397{
2398	vm_page_t m, mpte;
2399	vm_pindex_t diff, psize;
2400
2401	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
2402	psize = atop(end - start);
2403	mpte = NULL;
2404	m = m_start;
2405	rw_wlock(&pvh_global_lock);
2406	PMAP_LOCK(pmap);
2407	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2408		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2409		    prot, mpte);
2410		m = TAILQ_NEXT(m, listq);
2411	}
2412	rw_wunlock(&pvh_global_lock);
2413 	PMAP_UNLOCK(pmap);
2414}
2415
2416/*
2417 * pmap_object_init_pt preloads the ptes for a given object
2418 * into the specified pmap.  This eliminates the blast of soft
2419 * faults on process startup and immediately after an mmap.
2420 */
2421void
2422pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2423    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2424{
2425	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2426	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2427	    ("pmap_object_init_pt: non-device object"));
2428}
2429
2430/*
2431 *	Routine:	pmap_change_wiring
2432 *	Function:	Change the wiring attribute for a map/virtual-address
2433 *			pair.
2434 *	In/out conditions:
2435 *			The mapping must already exist in the pmap.
2436 */
2437void
2438pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2439{
2440	pt_entry_t *pte;
2441
2442	PMAP_LOCK(pmap);
2443	pte = pmap_pte(pmap, va);
2444
2445	if (wired && !pte_test(pte, PTE_W))
2446		pmap->pm_stats.wired_count++;
2447	else if (!wired && pte_test(pte, PTE_W))
2448		pmap->pm_stats.wired_count--;
2449
2450	/*
2451	 * Wiring is not a hardware characteristic so there is no need to
2452	 * invalidate TLB.
2453	 */
2454	if (wired)
2455		pte_set(pte, PTE_W);
2456	else
2457		pte_clear(pte, PTE_W);
2458	PMAP_UNLOCK(pmap);
2459}
2460
2461/*
2462 *	Copy the range specified by src_addr/len
2463 *	from the source map to the range dst_addr/len
2464 *	in the destination map.
2465 *
2466 *	This routine is only advisory and need not do anything.
2467 */
2468
2469void
2470pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2471    vm_size_t len, vm_offset_t src_addr)
2472{
2473}
2474
2475/*
2476 *	pmap_zero_page zeros the specified hardware page by mapping
2477 *	the page into KVM and using bzero to clear its contents.
2478 *
2479 * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2480 */
2481void
2482pmap_zero_page(vm_page_t m)
2483{
2484	vm_offset_t va;
2485	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2486
2487	if (MIPS_DIRECT_MAPPABLE(phys)) {
2488		va = MIPS_PHYS_TO_DIRECT(phys);
2489		bzero((caddr_t)va, PAGE_SIZE);
2490		mips_dcache_wbinv_range(va, PAGE_SIZE);
2491	} else {
2492		va = pmap_lmem_map1(phys);
2493		bzero((caddr_t)va, PAGE_SIZE);
2494		mips_dcache_wbinv_range(va, PAGE_SIZE);
2495		pmap_lmem_unmap();
2496	}
2497}
2498
2499/*
2500 *	pmap_zero_page_area zeros the specified hardware page by mapping
2501 *	the page into KVM and using bzero to clear its contents.
2502 *
2503 *	off and size may not cover an area beyond a single hardware page.
2504 */
2505void
2506pmap_zero_page_area(vm_page_t m, int off, int size)
2507{
2508	vm_offset_t va;
2509	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2510
2511	if (MIPS_DIRECT_MAPPABLE(phys)) {
2512		va = MIPS_PHYS_TO_DIRECT(phys);
2513		bzero((char *)(caddr_t)va + off, size);
2514		mips_dcache_wbinv_range(va + off, size);
2515	} else {
2516		va = pmap_lmem_map1(phys);
2517		bzero((char *)va + off, size);
2518		mips_dcache_wbinv_range(va + off, size);
2519		pmap_lmem_unmap();
2520	}
2521}
2522
2523void
2524pmap_zero_page_idle(vm_page_t m)
2525{
2526	vm_offset_t va;
2527	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2528
2529	if (MIPS_DIRECT_MAPPABLE(phys)) {
2530		va = MIPS_PHYS_TO_DIRECT(phys);
2531		bzero((caddr_t)va, PAGE_SIZE);
2532		mips_dcache_wbinv_range(va, PAGE_SIZE);
2533	} else {
2534		va = pmap_lmem_map1(phys);
2535		bzero((caddr_t)va, PAGE_SIZE);
2536		mips_dcache_wbinv_range(va, PAGE_SIZE);
2537		pmap_lmem_unmap();
2538	}
2539}
2540
2541/*
2542 *	pmap_copy_page copies the specified (machine independent)
2543 *	page by mapping the page into virtual memory and using
2544 *	bcopy to copy the page, one machine dependent page at a
2545 *	time.
2546 *
2547 * 	Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2548 */
2549void
2550pmap_copy_page(vm_page_t src, vm_page_t dst)
2551{
2552	vm_offset_t va_src, va_dst;
2553	vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2554	vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2555
2556	if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2557		/* easy case, all can be accessed via KSEG0 */
2558		/*
2559		 * Flush all caches for VA that are mapped to this page
2560		 * to make sure that data in SDRAM is up to date
2561		 */
2562		pmap_flush_pvcache(src);
2563		mips_dcache_wbinv_range_index(
2564		    MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2565		va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2566		va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2567		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2568		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2569	} else {
2570		va_src = pmap_lmem_map2(phys_src, phys_dst);
2571		va_dst = va_src + PAGE_SIZE;
2572		bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2573		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2574		pmap_lmem_unmap();
2575	}
2576}
2577
2578/*
2579 * Returns true if the pmap's pv is one of the first
2580 * 16 pvs linked to from this page.  This count may
2581 * be changed upwards or downwards in the future; it
2582 * is only necessary that true be returned for a small
2583 * subset of pmaps for proper page aging.
2584 */
2585boolean_t
2586pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2587{
2588	pv_entry_t pv;
2589	int loops = 0;
2590	boolean_t rv;
2591
2592	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2593	    ("pmap_page_exists_quick: page %p is not managed", m));
2594	rv = FALSE;
2595	rw_wlock(&pvh_global_lock);
2596	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2597		if (PV_PMAP(pv) == pmap) {
2598			rv = TRUE;
2599			break;
2600		}
2601		loops++;
2602		if (loops >= 16)
2603			break;
2604	}
2605	rw_wunlock(&pvh_global_lock);
2606	return (rv);
2607}
2608
2609/*
2610 * Remove all pages from specified address space
2611 * this aids process exit speeds.  Also, this code
2612 * is special cased for current process only, but
2613 * can have the more generic (and slightly slower)
2614 * mode enabled.  This is much faster than pmap_remove
2615 * in the case of running down an entire address space.
2616 */
2617void
2618pmap_remove_pages(pmap_t pmap)
2619{
2620	pd_entry_t *pde;
2621	pt_entry_t *pte, tpte;
2622	pv_entry_t pv;
2623	vm_page_t m;
2624	struct pv_chunk *pc, *npc;
2625	u_long inuse, bitmask;
2626	int allfree, bit, field, idx;
2627
2628	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2629		printf("warning: pmap_remove_pages called with non-current pmap\n");
2630		return;
2631	}
2632	rw_wlock(&pvh_global_lock);
2633	PMAP_LOCK(pmap);
2634	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2635		allfree = 1;
2636		for (field = 0; field < _NPCM; field++) {
2637			inuse = ~pc->pc_map[field] & pc_freemask[field];
2638			while (inuse != 0) {
2639				bit = ffsl(inuse) - 1;
2640				bitmask = 1UL << bit;
2641				idx = field * sizeof(inuse) * NBBY + bit;
2642				pv = &pc->pc_pventry[idx];
2643				inuse &= ~bitmask;
2644
2645				pde = pmap_pde(pmap, pv->pv_va);
2646				KASSERT(pde != NULL && *pde != 0,
2647				    ("pmap_remove_pages: pde"));
2648				pte = pmap_pde_to_pte(pde, pv->pv_va);
2649				if (!pte_test(pte, PTE_V))
2650					panic("pmap_remove_pages: bad pte");
2651				tpte = *pte;
2652
2653/*
2654 * We cannot remove wired pages from a process' mapping at this time
2655 */
2656				if (pte_test(&tpte, PTE_W)) {
2657					allfree = 0;
2658					continue;
2659				}
2660				*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2661
2662				m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2663				KASSERT(m != NULL,
2664				    ("pmap_remove_pages: bad tpte %#jx",
2665				    (uintmax_t)tpte));
2666
2667				/*
2668				 * Update the vm_page_t clean and reference bits.
2669				 */
2670				if (pte_test(&tpte, PTE_D))
2671					vm_page_dirty(m);
2672
2673				/* Mark free */
2674				PV_STAT(pv_entry_frees++);
2675				PV_STAT(pv_entry_spare++);
2676				pv_entry_count--;
2677				pc->pc_map[field] |= bitmask;
2678				pmap->pm_stats.resident_count--;
2679				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2680				if (TAILQ_EMPTY(&m->md.pv_list))
2681					vm_page_aflag_clear(m, PGA_WRITEABLE);
2682				pmap_unuse_pt(pmap, pv->pv_va, *pde);
2683			}
2684		}
2685		if (allfree) {
2686			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2687			free_pv_chunk(pc);
2688		}
2689	}
2690	pmap_invalidate_all(pmap);
2691	PMAP_UNLOCK(pmap);
2692	rw_wunlock(&pvh_global_lock);
2693}
2694
2695/*
2696 * pmap_testbit tests bits in pte's
2697 */
2698static boolean_t
2699pmap_testbit(vm_page_t m, int bit)
2700{
2701	pv_entry_t pv;
2702	pmap_t pmap;
2703	pt_entry_t *pte;
2704	boolean_t rv = FALSE;
2705
2706	if (m->oflags & VPO_UNMANAGED)
2707		return (rv);
2708
2709	rw_assert(&pvh_global_lock, RA_WLOCKED);
2710	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2711		pmap = PV_PMAP(pv);
2712		PMAP_LOCK(pmap);
2713		pte = pmap_pte(pmap, pv->pv_va);
2714		rv = pte_test(pte, bit);
2715		PMAP_UNLOCK(pmap);
2716		if (rv)
2717			break;
2718	}
2719	return (rv);
2720}
2721
2722/*
2723 *	pmap_page_wired_mappings:
2724 *
2725 *	Return the number of managed mappings to the given physical page
2726 *	that are wired.
2727 */
2728int
2729pmap_page_wired_mappings(vm_page_t m)
2730{
2731	pv_entry_t pv;
2732	pmap_t pmap;
2733	pt_entry_t *pte;
2734	int count;
2735
2736	count = 0;
2737	if ((m->oflags & VPO_UNMANAGED) != 0)
2738		return (count);
2739	rw_wlock(&pvh_global_lock);
2740	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2741		pmap = PV_PMAP(pv);
2742		PMAP_LOCK(pmap);
2743		pte = pmap_pte(pmap, pv->pv_va);
2744		if (pte_test(pte, PTE_W))
2745			count++;
2746		PMAP_UNLOCK(pmap);
2747	}
2748	rw_wunlock(&pvh_global_lock);
2749	return (count);
2750}
2751
2752/*
2753 * Clear the write and modified bits in each of the given page's mappings.
2754 */
2755void
2756pmap_remove_write(vm_page_t m)
2757{
2758	pmap_t pmap;
2759	pt_entry_t pbits, *pte;
2760	pv_entry_t pv;
2761
2762	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2763	    ("pmap_remove_write: page %p is not managed", m));
2764
2765	/*
2766	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
2767	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
2768	 * is clear, no page table entries need updating.
2769	 */
2770	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2771	if ((m->oflags & VPO_BUSY) == 0 &&
2772	    (m->aflags & PGA_WRITEABLE) == 0)
2773		return;
2774	rw_wlock(&pvh_global_lock);
2775	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2776		pmap = PV_PMAP(pv);
2777		PMAP_LOCK(pmap);
2778		pte = pmap_pte(pmap, pv->pv_va);
2779		KASSERT(pte != NULL && pte_test(pte, PTE_V),
2780		    ("page on pv_list has no pte"));
2781		pbits = *pte;
2782		if (pte_test(&pbits, PTE_D)) {
2783			pte_clear(&pbits, PTE_D);
2784			vm_page_dirty(m);
2785		}
2786		pte_set(&pbits, PTE_RO);
2787		if (pbits != *pte) {
2788			*pte = pbits;
2789			pmap_update_page(pmap, pv->pv_va, pbits);
2790		}
2791		PMAP_UNLOCK(pmap);
2792	}
2793	vm_page_aflag_clear(m, PGA_WRITEABLE);
2794	rw_wunlock(&pvh_global_lock);
2795}
2796
2797/*
2798 *	pmap_ts_referenced:
2799 *
2800 *	Return the count of reference bits for a page, clearing all of them.
2801 */
2802int
2803pmap_ts_referenced(vm_page_t m)
2804{
2805
2806	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2807	    ("pmap_ts_referenced: page %p is not managed", m));
2808	if (m->md.pv_flags & PV_TABLE_REF) {
2809		rw_wlock(&pvh_global_lock);
2810		m->md.pv_flags &= ~PV_TABLE_REF;
2811		rw_wunlock(&pvh_global_lock);
2812		return (1);
2813	}
2814	return (0);
2815}
2816
2817/*
2818 *	pmap_is_modified:
2819 *
2820 *	Return whether or not the specified physical page was modified
2821 *	in any physical maps.
2822 */
2823boolean_t
2824pmap_is_modified(vm_page_t m)
2825{
2826	boolean_t rv;
2827
2828	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2829	    ("pmap_is_modified: page %p is not managed", m));
2830
2831	/*
2832	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
2833	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2834	 * is clear, no PTEs can have PTE_D set.
2835	 */
2836	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2837	if ((m->oflags & VPO_BUSY) == 0 &&
2838	    (m->aflags & PGA_WRITEABLE) == 0)
2839		return (FALSE);
2840	rw_wlock(&pvh_global_lock);
2841	rv = pmap_testbit(m, PTE_D);
2842	rw_wunlock(&pvh_global_lock);
2843	return (rv);
2844}
2845
2846/* N/C */
2847
2848/*
2849 *	pmap_is_prefaultable:
2850 *
2851 *	Return whether or not the specified virtual address is elgible
2852 *	for prefault.
2853 */
2854boolean_t
2855pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2856{
2857	pd_entry_t *pde;
2858	pt_entry_t *pte;
2859	boolean_t rv;
2860
2861	rv = FALSE;
2862	PMAP_LOCK(pmap);
2863	pde = pmap_pde(pmap, addr);
2864	if (pde != NULL && *pde != 0) {
2865		pte = pmap_pde_to_pte(pde, addr);
2866		rv = (*pte == 0);
2867	}
2868	PMAP_UNLOCK(pmap);
2869	return (rv);
2870}
2871
2872/*
2873 *	Clear the modify bits on the specified physical page.
2874 */
2875void
2876pmap_clear_modify(vm_page_t m)
2877{
2878	pmap_t pmap;
2879	pt_entry_t *pte;
2880	pv_entry_t pv;
2881
2882	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2883	    ("pmap_clear_modify: page %p is not managed", m));
2884	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2885	KASSERT((m->oflags & VPO_BUSY) == 0,
2886	    ("pmap_clear_modify: page %p is busy", m));
2887
2888	/*
2889	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
2890	 * If the object containing the page is locked and the page is not
2891	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
2892	 */
2893	if ((m->aflags & PGA_WRITEABLE) == 0)
2894		return;
2895	rw_wlock(&pvh_global_lock);
2896	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2897		pmap = PV_PMAP(pv);
2898		PMAP_LOCK(pmap);
2899		pte = pmap_pte(pmap, pv->pv_va);
2900		if (pte_test(pte, PTE_D)) {
2901			pte_clear(pte, PTE_D);
2902			pmap_update_page(pmap, pv->pv_va, *pte);
2903		}
2904		PMAP_UNLOCK(pmap);
2905	}
2906	rw_wunlock(&pvh_global_lock);
2907}
2908
2909/*
2910 *	pmap_is_referenced:
2911 *
2912 *	Return whether or not the specified physical page was referenced
2913 *	in any physical maps.
2914 */
2915boolean_t
2916pmap_is_referenced(vm_page_t m)
2917{
2918
2919	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2920	    ("pmap_is_referenced: page %p is not managed", m));
2921	return ((m->md.pv_flags & PV_TABLE_REF) != 0);
2922}
2923
2924/*
2925 *	pmap_clear_reference:
2926 *
2927 *	Clear the reference bit on the specified physical page.
2928 */
2929void
2930pmap_clear_reference(vm_page_t m)
2931{
2932
2933	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2934	    ("pmap_clear_reference: page %p is not managed", m));
2935	rw_wlock(&pvh_global_lock);
2936	if (m->md.pv_flags & PV_TABLE_REF) {
2937		m->md.pv_flags &= ~PV_TABLE_REF;
2938	}
2939	rw_wunlock(&pvh_global_lock);
2940}
2941
2942/*
2943 * Miscellaneous support routines follow
2944 */
2945
2946/*
2947 * Map a set of physical memory pages into the kernel virtual
2948 * address space. Return a pointer to where it is mapped. This
2949 * routine is intended to be used for mapping device memory,
2950 * NOT real memory.
2951 *
2952 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
2953 */
2954void *
2955pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2956{
2957        vm_offset_t va, tmpva, offset;
2958
2959	/*
2960	 * KSEG1 maps only first 512M of phys address space. For
2961	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2962	 */
2963	if (MIPS_DIRECT_MAPPABLE(pa + size - 1))
2964		return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
2965	else {
2966		offset = pa & PAGE_MASK;
2967		size = roundup(size + offset, PAGE_SIZE);
2968
2969		va = kmem_alloc_nofault(kernel_map, size);
2970		if (!va)
2971			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2972		pa = trunc_page(pa);
2973		for (tmpva = va; size > 0;) {
2974			pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED);
2975			size -= PAGE_SIZE;
2976			tmpva += PAGE_SIZE;
2977			pa += PAGE_SIZE;
2978		}
2979	}
2980
2981	return ((void *)(va + offset));
2982}
2983
2984void
2985pmap_unmapdev(vm_offset_t va, vm_size_t size)
2986{
2987#ifndef __mips_n64
2988	vm_offset_t base, offset;
2989
2990	/* If the address is within KSEG1 then there is nothing to do */
2991	if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
2992		return;
2993
2994	base = trunc_page(va);
2995	offset = va & PAGE_MASK;
2996	size = roundup(size + offset, PAGE_SIZE);
2997	kmem_free(kernel_map, base, size);
2998#endif
2999}
3000
3001/*
3002 * perform the pmap work for mincore
3003 */
3004int
3005pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3006{
3007	pt_entry_t *ptep, pte;
3008	vm_paddr_t pa;
3009	vm_page_t m;
3010	int val;
3011
3012	PMAP_LOCK(pmap);
3013retry:
3014	ptep = pmap_pte(pmap, addr);
3015	pte = (ptep != NULL) ? *ptep : 0;
3016	if (!pte_test(&pte, PTE_V)) {
3017		val = 0;
3018		goto out;
3019	}
3020	val = MINCORE_INCORE;
3021	if (pte_test(&pte, PTE_D))
3022		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3023	pa = TLBLO_PTE_TO_PA(pte);
3024	if (pte_test(&pte, PTE_MANAGED)) {
3025		/*
3026		 * This may falsely report the given address as
3027		 * MINCORE_REFERENCED.  Unfortunately, due to the lack of
3028		 * per-PTE reference information, it is impossible to
3029		 * determine if the address is MINCORE_REFERENCED.
3030		 */
3031		m = PHYS_TO_VM_PAGE(pa);
3032		if ((m->aflags & PGA_REFERENCED) != 0)
3033			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3034	}
3035	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3036	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
3037	    pte_test(&pte, PTE_MANAGED)) {
3038		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3039		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3040			goto retry;
3041	} else
3042out:
3043		PA_UNLOCK_COND(*locked_pa);
3044	PMAP_UNLOCK(pmap);
3045	return (val);
3046}
3047
3048void
3049pmap_activate(struct thread *td)
3050{
3051	pmap_t pmap, oldpmap;
3052	struct proc *p = td->td_proc;
3053	u_int cpuid;
3054
3055	critical_enter();
3056
3057	pmap = vmspace_pmap(p->p_vmspace);
3058	oldpmap = PCPU_GET(curpmap);
3059	cpuid = PCPU_GET(cpuid);
3060
3061	if (oldpmap)
3062		CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
3063	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
3064	pmap_asid_alloc(pmap);
3065	if (td == curthread) {
3066		PCPU_SET(segbase, pmap->pm_segtab);
3067		mips_wr_entryhi(pmap->pm_asid[cpuid].asid);
3068	}
3069
3070	PCPU_SET(curpmap, pmap);
3071	critical_exit();
3072}
3073
3074void
3075pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3076{
3077}
3078
3079/*
3080 *	Increase the starting virtual address of the given mapping if a
3081 *	different alignment might result in more superpage mappings.
3082 */
3083void
3084pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3085    vm_offset_t *addr, vm_size_t size)
3086{
3087	vm_offset_t superpage_offset;
3088
3089	if (size < NBSEG)
3090		return;
3091	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3092		offset += ptoa(object->pg_color);
3093	superpage_offset = offset & SEGMASK;
3094	if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG ||
3095	    (*addr & SEGMASK) == superpage_offset)
3096		return;
3097	if ((*addr & SEGMASK) < superpage_offset)
3098		*addr = (*addr & ~SEGMASK) + superpage_offset;
3099	else
3100		*addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset;
3101}
3102
3103/*
3104 * 	Increase the starting virtual address of the given mapping so
3105 * 	that it is aligned to not be the second page in a TLB entry.
3106 * 	This routine assumes that the length is appropriately-sized so
3107 * 	that the allocation does not share a TLB entry at all if required.
3108 */
3109void
3110pmap_align_tlb(vm_offset_t *addr)
3111{
3112	if ((*addr & PAGE_SIZE) == 0)
3113		return;
3114	*addr += PAGE_SIZE;
3115	return;
3116}
3117
3118#ifdef DDB
3119DB_SHOW_COMMAND(ptable, ddb_pid_dump)
3120{
3121	pmap_t pmap;
3122	struct thread *td = NULL;
3123	struct proc *p;
3124	int i, j, k;
3125	vm_paddr_t pa;
3126	vm_offset_t va;
3127
3128	if (have_addr) {
3129		td = db_lookup_thread(addr, TRUE);
3130		if (td == NULL) {
3131			db_printf("Invalid pid or tid");
3132			return;
3133		}
3134		p = td->td_proc;
3135		if (p->p_vmspace == NULL) {
3136			db_printf("No vmspace for process");
3137			return;
3138		}
3139			pmap = vmspace_pmap(p->p_vmspace);
3140	} else
3141		pmap = kernel_pmap;
3142
3143	db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
3144	    pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
3145	    pmap->pm_asid[0].gen);
3146	for (i = 0; i < NPDEPG; i++) {
3147		pd_entry_t *pdpe;
3148		pt_entry_t *pde;
3149		pt_entry_t pte;
3150
3151		pdpe = (pd_entry_t *)pmap->pm_segtab[i];
3152		if (pdpe == NULL)
3153			continue;
3154		db_printf("[%4d] %p\n", i, pdpe);
3155#ifdef __mips_n64
3156		for (j = 0; j < NPDEPG; j++) {
3157			pde = (pt_entry_t *)pdpe[j];
3158			if (pde == NULL)
3159				continue;
3160			db_printf("\t[%4d] %p\n", j, pde);
3161#else
3162		{
3163			j = 0;
3164			pde =  (pt_entry_t *)pdpe;
3165#endif
3166			for (k = 0; k < NPTEPG; k++) {
3167				pte = pde[k];
3168				if (pte == 0 || !pte_test(&pte, PTE_V))
3169					continue;
3170				pa = TLBLO_PTE_TO_PA(pte);
3171				va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3172				db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n",
3173				       k, (void *)va, (uintmax_t)pte, (uintmax_t)pa);
3174			}
3175		}
3176	}
3177}
3178#endif
3179
3180#if defined(DEBUG)
3181
3182static void pads(pmap_t pm);
3183void pmap_pvdump(vm_offset_t pa);
3184
3185/* print address space of pmap*/
3186static void
3187pads(pmap_t pm)
3188{
3189	unsigned va, i, j;
3190	pt_entry_t *ptep;
3191
3192	if (pm == kernel_pmap)
3193		return;
3194	for (i = 0; i < NPTEPG; i++)
3195		if (pm->pm_segtab[i])
3196			for (j = 0; j < NPTEPG; j++) {
3197				va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
3198				if (pm == kernel_pmap && va < KERNBASE)
3199					continue;
3200				if (pm != kernel_pmap &&
3201				    va >= VM_MAXUSER_ADDRESS)
3202					continue;
3203				ptep = pmap_pte(pm, va);
3204				if (pte_test(ptep, PTE_V))
3205					printf("%x:%x ", va, *(int *)ptep);
3206			}
3207
3208}
3209
3210void
3211pmap_pvdump(vm_offset_t pa)
3212{
3213	register pv_entry_t pv;
3214	vm_page_t m;
3215
3216	printf("pa %x", pa);
3217	m = PHYS_TO_VM_PAGE(pa);
3218	for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3219	    pv = TAILQ_NEXT(pv, pv_list)) {
3220		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3221		pads(pv->pv_pmap);
3222	}
3223	printf(" ");
3224}
3225
3226/* N/C */
3227#endif
3228
3229
3230/*
3231 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3232 * It takes almost as much or more time to search the TLB for a
3233 * specific ASID and flush those entries as it does to flush the entire TLB.
3234 * Therefore, when we allocate a new ASID, we just take the next number. When
3235 * we run out of numbers, we flush the TLB, increment the generation count
3236 * and start over. ASID zero is reserved for kernel use.
3237 */
3238static void
3239pmap_asid_alloc(pmap)
3240	pmap_t pmap;
3241{
3242	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3243	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3244	else {
3245		if (PCPU_GET(next_asid) == pmap_max_asid) {
3246			tlb_invalidate_all_user(NULL);
3247			PCPU_SET(asid_generation,
3248			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3249			if (PCPU_GET(asid_generation) == 0) {
3250				PCPU_SET(asid_generation, 1);
3251			}
3252			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3253		}
3254		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3255		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3256		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3257	}
3258}
3259
3260static pt_entry_t
3261init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot)
3262{
3263	pt_entry_t rw;
3264
3265	if (!(prot & VM_PROT_WRITE))
3266		rw = PTE_V | PTE_RO;
3267	else if ((m->oflags & VPO_UNMANAGED) == 0) {
3268		if ((access & VM_PROT_WRITE) != 0)
3269			rw = PTE_V | PTE_D;
3270		else
3271			rw = PTE_V;
3272	} else
3273		/* Needn't emulate a modified bit for unmanaged pages. */
3274		rw = PTE_V | PTE_D;
3275	return (rw);
3276}
3277
3278/*
3279 * pmap_emulate_modified : do dirty bit emulation
3280 *
3281 * On SMP, update just the local TLB, other CPUs will update their
3282 * TLBs from PTE lazily, if they get the exception.
3283 * Returns 0 in case of sucess, 1 if the page is read only and we
3284 * need to fault.
3285 */
3286int
3287pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3288{
3289	pt_entry_t *pte;
3290
3291	PMAP_LOCK(pmap);
3292	pte = pmap_pte(pmap, va);
3293	if (pte == NULL)
3294		panic("pmap_emulate_modified: can't find PTE");
3295#ifdef SMP
3296	/* It is possible that some other CPU changed m-bit */
3297	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3298		tlb_update(pmap, va, *pte);
3299		PMAP_UNLOCK(pmap);
3300		return (0);
3301	}
3302#else
3303	if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3304		panic("pmap_emulate_modified: invalid pte");
3305#endif
3306	if (pte_test(pte, PTE_RO)) {
3307		PMAP_UNLOCK(pmap);
3308		return (1);
3309	}
3310	pte_set(pte, PTE_D);
3311	tlb_update(pmap, va, *pte);
3312	if (!pte_test(pte, PTE_MANAGED))
3313		panic("pmap_emulate_modified: unmanaged page");
3314	PMAP_UNLOCK(pmap);
3315	return (0);
3316}
3317
3318/*
3319 *	Routine:	pmap_kextract
3320 *	Function:
3321 *		Extract the physical page address associated
3322 *		virtual address.
3323 */
3324vm_paddr_t
3325pmap_kextract(vm_offset_t va)
3326{
3327	int mapped;
3328
3329	/*
3330	 * First, the direct-mapped regions.
3331	 */
3332#if defined(__mips_n64)
3333	if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3334		return (MIPS_XKPHYS_TO_PHYS(va));
3335#endif
3336	if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3337		return (MIPS_KSEG0_TO_PHYS(va));
3338
3339	if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3340		return (MIPS_KSEG1_TO_PHYS(va));
3341
3342	/*
3343	 * User virtual addresses.
3344	 */
3345	if (va < VM_MAXUSER_ADDRESS) {
3346		pt_entry_t *ptep;
3347
3348		if (curproc && curproc->p_vmspace) {
3349			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3350			if (ptep) {
3351				return (TLBLO_PTE_TO_PA(*ptep) |
3352				    (va & PAGE_MASK));
3353			}
3354			return (0);
3355		}
3356	}
3357
3358	/*
3359	 * Should be kernel virtual here, otherwise fail
3360	 */
3361	mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3362#if defined(__mips_n64)
3363	mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3364#endif
3365	/*
3366	 * Kernel virtual.
3367	 */
3368
3369	if (mapped) {
3370		pt_entry_t *ptep;
3371
3372		/* Is the kernel pmap initialized? */
3373		if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
3374			/* It's inside the virtual address range */
3375			ptep = pmap_pte(kernel_pmap, va);
3376			if (ptep) {
3377				return (TLBLO_PTE_TO_PA(*ptep) |
3378				    (va & PAGE_MASK));
3379			}
3380		}
3381		return (0);
3382	}
3383
3384	panic("%s for unknown address space %p.", __func__, (void *)va);
3385}
3386
3387
3388void
3389pmap_flush_pvcache(vm_page_t m)
3390{
3391	pv_entry_t pv;
3392
3393	if (m != NULL) {
3394		for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3395		    pv = TAILQ_NEXT(pv, pv_list)) {
3396			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3397		}
3398	}
3399}
3400