pmap.c revision 209243
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
38 *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42/*
43 *	Manages physical address maps.
44 *
45 *	In addition to hardware address maps, this
46 *	module is called upon to provide software-use-only
47 *	maps which may or may not be stored in the same
48 *	form as hardware maps.	These pseudo-maps are
49 *	used to store intermediate results from copy
50 *	operations to and from address spaces.
51 *
52 *	Since the information managed by this module is
53 *	also stored by the logical address mapping module,
54 *	this module may throw away valid virtual-to-physical
55 *	mappings at almost any time.  However, invalidations
56 *	of virtual-to-physical mappings must be done as
57 *	requested.
58 *
59 *	In order to cope with hardware architectures which
60 *	make virtual-to-physical map invalidates expensive,
61 *	this module may delay invalidate or reduced protection
62 *	operations until such time as they are actually
63 *	necessary.  This module is given full information as
64 *	to which processors are currently using which maps,
65 *	and to when physical maps must be made correct.
66 */
67
68#include <sys/cdefs.h>
69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 209243 2010-06-17 05:03:01Z jchandra $");
70
71#include "opt_ddb.h"
72#include "opt_msgbuf.h"
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/proc.h>
76#include <sys/msgbuf.h>
77#include <sys/vmmeter.h>
78#include <sys/mman.h>
79#include <sys/smp.h>
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_phys.h>
84#include <sys/lock.h>
85#include <sys/mutex.h>
86#include <vm/vm_kern.h>
87#include <vm/vm_page.h>
88#include <vm/vm_map.h>
89#include <vm/vm_object.h>
90#include <vm/vm_extern.h>
91#include <vm/vm_pageout.h>
92#include <vm/vm_pager.h>
93#include <vm/uma.h>
94#include <sys/pcpu.h>
95#include <sys/sched.h>
96#ifdef SMP
97#include <sys/smp.h>
98#endif
99
100#include <machine/cache.h>
101#include <machine/md_var.h>
102#include <machine/tlb.h>
103
104#if defined(DIAGNOSTIC)
105#define	PMAP_DIAGNOSTIC
106#endif
107
108#undef PMAP_DEBUG
109
110#ifndef PMAP_SHPGPERPROC
111#define	PMAP_SHPGPERPROC 200
112#endif
113
114#if !defined(PMAP_DIAGNOSTIC)
115#define	PMAP_INLINE __inline
116#else
117#define	PMAP_INLINE
118#endif
119
120/*
121 * Get PDEs and PTEs for user/kernel address space
122 */
123#define	pmap_pde(m, v)	       (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT]))
124#define	segtab_pde(m, v)	(m[(vm_offset_t)(v) >> SEGSHIFT])
125
126#define	pmap_pte_w(pte)		((*(int *)pte & PTE_W) != 0)
127#define	pmap_pde_v(pte)		((*(int *)pte) != 0)
128#define	pmap_pte_m(pte)		((*(int *)pte & PTE_M) != 0)
129#define	pmap_pte_v(pte)		((*(int *)pte & PTE_V) != 0)
130
131#define	pmap_pte_set_w(pte, v)	((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W))
132#define	pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
133
134#define	MIPS_SEGSIZE		(1L << SEGSHIFT)
135#define	mips_segtrunc(va)	((va) & ~(MIPS_SEGSIZE-1))
136#define	pmap_TLB_invalidate_all() MIPS_TBIAP()
137#define	pmap_va_asid(pmap, va)	((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT))
138#define	is_kernel_pmap(x)	((x) == kernel_pmap)
139
140struct pmap kernel_pmap_store;
141pd_entry_t *kernel_segmap;
142
143vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
144vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
145
146static int nkpt;
147unsigned pmap_max_asid;		/* max ASID supported by the system */
148
149
150#define	PMAP_ASID_RESERVED	0
151
152vm_offset_t kernel_vm_end;
153
154static void pmap_asid_alloc(pmap_t pmap);
155
156/*
157 * Data for the pv entry allocation mechanism
158 */
159static uma_zone_t pvzone;
160static struct vm_object pvzone_obj;
161static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
162
163static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
164static pv_entry_t get_pv_entry(pmap_t locked_pmap);
165static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
166static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
167    vm_offset_t va);
168static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
169
170static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
171    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
172static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va);
173static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
174static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
175static boolean_t pmap_testbit(vm_page_t m, int bit);
176static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
177    vm_offset_t va, vm_page_t m);
178
179static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
180
181static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
182static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
183static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
184static vm_page_t pmap_alloc_pte_page(pmap_t, unsigned int, int, vm_offset_t *);
185static void pmap_release_pte_page(vm_page_t);
186
187#ifdef SMP
188static void pmap_invalidate_page_action(void *arg);
189static void pmap_invalidate_all_action(void *arg);
190static void pmap_update_page_action(void *arg);
191#endif
192
193static void pmap_ptpgzone_dtor(void *mem, int size, void *arg);
194static void *pmap_ptpgzone_allocf(uma_zone_t, int, u_int8_t *, int);
195static uma_zone_t ptpgzone;
196
197struct local_sysmaps {
198	struct mtx lock;
199	vm_offset_t base;
200	uint16_t valid1, valid2;
201};
202
203/* This structure is for large memory
204 * above 512Meg. We can't (in 32 bit mode)
205 * just use the direct mapped MIPS_KSEG0_TO_PHYS()
206 * macros since we can't see the memory and must
207 * map it in when we need to access it. In 64
208 * bit mode this goes away.
209 */
210static struct local_sysmaps sysmap_lmem[MAXCPU];
211
212#define	PMAP_LMEM_MAP1(va, phys)					\
213	int cpu;							\
214	struct local_sysmaps *sysm;					\
215	pt_entry_t *pte, npte;						\
216									\
217	cpu = PCPU_GET(cpuid);						\
218	sysm = &sysmap_lmem[cpu];					\
219	PMAP_LGMEM_LOCK(sysm);						\
220	intr = intr_disable();						\
221	sched_pin();							\
222	va = sysm->base;						\
223	npte = TLBLO_PA_TO_PFN(phys) |					\
224	    PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;			\
225	pte = pmap_pte(kernel_pmap, va);				\
226	*pte = npte;							\
227	sysm->valid1 = 1;
228
229#define	PMAP_LMEM_MAP2(va1, phys1, va2, phys2)				\
230	int cpu;							\
231	struct local_sysmaps *sysm;					\
232	pt_entry_t *pte, npte;						\
233									\
234	cpu = PCPU_GET(cpuid);						\
235	sysm = &sysmap_lmem[cpu];					\
236	PMAP_LGMEM_LOCK(sysm);						\
237	intr = intr_disable();						\
238	sched_pin();							\
239	va1 = sysm->base;						\
240	va2 = sysm->base + PAGE_SIZE;					\
241	npte = TLBLO_PA_TO_PFN(phys1) |					\
242	    PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;			\
243	pte = pmap_pte(kernel_pmap, va1);				\
244	*pte = npte;							\
245	npte =  TLBLO_PA_TO_PFN(phys2) |				\
246	    PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;			\
247	pte = pmap_pte(kernel_pmap, va2);				\
248	*pte = npte;							\
249	sysm->valid1 = 1;						\
250	sysm->valid2 = 1;
251
252#define	PMAP_LMEM_UNMAP()						\
253	pte = pmap_pte(kernel_pmap, sysm->base);			\
254	*pte = PTE_G;							\
255	tlb_invalidate_address(kernel_pmap, sysm->base);		\
256	sysm->valid1 = 0;						\
257	pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);		\
258	*pte = PTE_G;							\
259	tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);	\
260	sysm->valid2 = 0;						\
261	sched_unpin();							\
262	intr_restore(intr);						\
263	PMAP_LGMEM_UNLOCK(sysm);
264
265pd_entry_t
266pmap_segmap(pmap_t pmap, vm_offset_t va)
267{
268	if (pmap->pm_segtab)
269		return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]);
270	else
271		return ((pd_entry_t)0);
272}
273
274/*
275 *	Routine:	pmap_pte
276 *	Function:
277 *		Extract the page table entry associated
278 *		with the given map/virtual_address pair.
279 */
280pt_entry_t *
281pmap_pte(pmap_t pmap, vm_offset_t va)
282{
283	pt_entry_t *pdeaddr;
284
285	if (pmap) {
286		pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va);
287		if (pdeaddr) {
288			return pdeaddr + vad_to_pte_offset(va);
289		}
290	}
291	return ((pt_entry_t *)0);
292}
293
294
295vm_offset_t
296pmap_steal_memory(vm_size_t size)
297{
298	vm_size_t bank_size;
299	vm_offset_t pa, va;
300
301	size = round_page(size);
302
303	bank_size = phys_avail[1] - phys_avail[0];
304	while (size > bank_size) {
305		int i;
306
307		for (i = 0; phys_avail[i + 2]; i += 2) {
308			phys_avail[i] = phys_avail[i + 2];
309			phys_avail[i + 1] = phys_avail[i + 3];
310		}
311		phys_avail[i] = 0;
312		phys_avail[i + 1] = 0;
313		if (!phys_avail[0])
314			panic("pmap_steal_memory: out of memory");
315		bank_size = phys_avail[1] - phys_avail[0];
316	}
317
318	pa = phys_avail[0];
319	phys_avail[0] += size;
320	if (pa >= MIPS_KSEG0_LARGEST_PHYS) {
321		panic("Out of memory below 512Meg?");
322	}
323	va = MIPS_PHYS_TO_KSEG0(pa);
324	bzero((caddr_t)va, size);
325	return va;
326}
327
328/*
329 *	Bootstrap the system enough to run with virtual memory.  This
330 * assumes that the phys_avail array has been initialized.
331 */
332void
333pmap_bootstrap(void)
334{
335	pt_entry_t *pgtab;
336	pt_entry_t *pte;
337	int i, j;
338	int memory_larger_than_512meg = 0;
339
340	/* Sort. */
341again:
342	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
343		/*
344		 * Keep the memory aligned on page boundary.
345		 */
346		phys_avail[i] = round_page(phys_avail[i]);
347		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
348
349		if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS)
350			memory_larger_than_512meg++;
351		if (i < 2)
352			continue;
353		if (phys_avail[i - 2] > phys_avail[i]) {
354			vm_paddr_t ptemp[2];
355
356
357			ptemp[0] = phys_avail[i + 0];
358			ptemp[1] = phys_avail[i + 1];
359
360			phys_avail[i + 0] = phys_avail[i - 2];
361			phys_avail[i + 1] = phys_avail[i - 1];
362
363			phys_avail[i - 2] = ptemp[0];
364			phys_avail[i - 1] = ptemp[1];
365			goto again;
366		}
367	}
368
369	/*
370	 * Copy the phys_avail[] array before we start stealing memory from it.
371	 */
372	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
373		physmem_desc[i] = phys_avail[i];
374		physmem_desc[i + 1] = phys_avail[i + 1];
375	}
376
377	Maxmem = atop(phys_avail[i - 1]);
378
379	if (bootverbose) {
380		printf("Physical memory chunk(s):\n");
381		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
382			vm_paddr_t size;
383
384			size = phys_avail[i + 1] - phys_avail[i];
385			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
386			    (uintmax_t) phys_avail[i],
387			    (uintmax_t) phys_avail[i + 1] - 1,
388			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
389		}
390		printf("Maxmem is 0x%0lx\n", ptoa(Maxmem));
391	}
392	/*
393	 * Steal the message buffer from the beginning of memory.
394	 */
395	msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE);
396	msgbufinit(msgbufp, MSGBUF_SIZE);
397
398	/*
399	 * Steal thread0 kstack.
400	 */
401	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
402
403
404	virtual_avail = VM_MIN_KERNEL_ADDRESS;
405	virtual_end = VM_MAX_KERNEL_ADDRESS;
406
407#ifdef SMP
408	/*
409	 * Steal some virtual address space to map the pcpu area.
410	 */
411	virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
412	pcpup = (struct pcpu *)virtual_avail;
413	virtual_avail += PAGE_SIZE * 2;
414
415	/*
416	 * Initialize the wired TLB entry mapping the pcpu region for
417	 * the BSP at 'pcpup'. Up until this point we were operating
418	 * with the 'pcpup' for the BSP pointing to a virtual address
419	 * in KSEG0 so there was no need for a TLB mapping.
420	 */
421	mips_pcpu_tlb_init(PCPU_ADDR(0));
422
423	if (bootverbose)
424		printf("pcpu is available at virtual address %p.\n", pcpup);
425#endif
426
427	/*
428	 * Steal some virtual space that will not be in kernel_segmap. This
429	 * va memory space will be used to map in kernel pages that are
430	 * outside the 512Meg region. Note that we only do this steal when
431	 * we do have memory in this region, that way for systems with
432	 * smaller memory we don't "steal" any va ranges :-)
433	 */
434	if (memory_larger_than_512meg) {
435		for (i = 0; i < MAXCPU; i++) {
436			sysmap_lmem[i].base = virtual_avail;
437			virtual_avail += PAGE_SIZE * 2;
438			sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
439			PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]);
440		}
441	}
442
443	/*
444	 * Allocate segment table for the kernel
445	 */
446	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
447
448	/*
449	 * Allocate second level page tables for the kernel
450	 */
451	nkpt = NKPT;
452	if (memory_larger_than_512meg) {
453		/*
454		 * If we have a large memory system we CANNOT afford to hit
455		 * pmap_growkernel() and allocate memory. Since we MAY end
456		 * up with a page that is NOT mappable. For that reason we
457		 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h)
458		 * this gives us 480meg of kernel virtual addresses at the
459		 * cost of 120 pages (each page gets us 4 Meg). Since the
460		 * kernel starts at virtual_avail, we can use this to
461		 * calculate how many entris are left from there to the end
462		 * of the segmap, we want to allocate all of it, which would
463		 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results
464		 * in about 256 entries or so instead of the 120.
465		 */
466		nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT);
467	}
468	pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt);
469
470	/*
471	 * The R[4-7]?00 stores only one copy of the Global bit in the
472	 * translation lookaside buffer for each 2 page entry. Thus invalid
473	 * entrys must have the Global bit set so when Entry LO and Entry HI
474	 * G bits are anded together they will produce a global bit to store
475	 * in the tlb.
476	 */
477	for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++)
478		*pte = PTE_G;
479
480	/*
481	 * The segment table contains the KVA of the pages in the second
482	 * level page table.
483	 */
484	for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++)
485		kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG));
486
487	/*
488	 * The kernel's pmap is statically allocated so we don't have to use
489	 * pmap_create, which is unlikely to work correctly at this part of
490	 * the boot sequence (XXX and which no longer exists).
491	 */
492	PMAP_LOCK_INIT(kernel_pmap);
493	kernel_pmap->pm_segtab = kernel_segmap;
494	kernel_pmap->pm_active = ~0;
495	TAILQ_INIT(&kernel_pmap->pm_pvlist);
496	kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
497	kernel_pmap->pm_asid[0].gen = 0;
498	pmap_max_asid = VMNUM_PIDS;
499	mips_wr_entryhi(0);
500}
501
502/*
503 * Initialize a vm_page's machine-dependent fields.
504 */
505void
506pmap_page_init(vm_page_t m)
507{
508
509	TAILQ_INIT(&m->md.pv_list);
510	m->md.pv_list_count = 0;
511	m->md.pv_flags = 0;
512}
513
514/*
515 *	Initialize the pmap module.
516 *	Called by vm_init, to initialize any structures that the pmap
517 *	system needs to map virtual memory.
518 *	pmap_init has been enhanced to support in a fairly consistant
519 *	way, discontiguous physical memory.
520 */
521void
522pmap_init(void)
523{
524
525	/*
526	 * Initialize the address space (zone) for the pv entries.  Set a
527	 * high water mark so that the system can recover from excessive
528	 * numbers of pv entries.
529	 */
530	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
531	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
532	pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
533	pv_entry_high_water = 9 * (pv_entry_max / 10);
534	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
535
536	ptpgzone = uma_zcreate("PT ENTRY", PAGE_SIZE, NULL, pmap_ptpgzone_dtor,
537	    NULL, NULL, PAGE_SIZE - 1, UMA_ZONE_NOFREE | UMA_ZONE_ZINIT);
538	uma_zone_set_allocf(ptpgzone, pmap_ptpgzone_allocf);
539}
540
541/***************************************************
542 * Low level helper routines.....
543 ***************************************************/
544
545#if defined(PMAP_DIAGNOSTIC)
546
547/*
548 * This code checks for non-writeable/modified pages.
549 * This should be an invalid condition.
550 */
551static int
552pmap_nw_modified(pt_entry_t pte)
553{
554	if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO))
555		return (1);
556	else
557		return (0);
558}
559
560#endif
561
562static void
563pmap_invalidate_all(pmap_t pmap)
564{
565#ifdef SMP
566	smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap);
567}
568
569static void
570pmap_invalidate_all_action(void *arg)
571{
572	pmap_t pmap = (pmap_t)arg;
573
574#endif
575
576	if (pmap == kernel_pmap) {
577		tlb_invalidate_all();
578		return;
579	}
580
581	if (pmap->pm_active & PCPU_GET(cpumask))
582		tlb_invalidate_all_user(pmap);
583	else
584		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
585}
586
587struct pmap_invalidate_page_arg {
588	pmap_t pmap;
589	vm_offset_t va;
590};
591
592static __inline void
593pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
594{
595#ifdef SMP
596	struct pmap_invalidate_page_arg arg;
597
598	arg.pmap = pmap;
599	arg.va = va;
600
601	smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg);
602}
603
604static void
605pmap_invalidate_page_action(void *arg)
606{
607	pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap;
608	vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va;
609
610#endif
611
612	if (is_kernel_pmap(pmap)) {
613		tlb_invalidate_address(pmap, va);
614		return;
615	}
616	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
617		return;
618	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
619		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
620		return;
621	}
622	tlb_invalidate_address(pmap, va);
623}
624
625struct pmap_update_page_arg {
626	pmap_t pmap;
627	vm_offset_t va;
628	pt_entry_t pte;
629};
630
631void
632pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
633{
634#ifdef SMP
635	struct pmap_update_page_arg arg;
636
637	arg.pmap = pmap;
638	arg.va = va;
639	arg.pte = pte;
640
641	smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg);
642}
643
644static void
645pmap_update_page_action(void *arg)
646{
647	pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap;
648	vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va;
649	pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte;
650
651#endif
652	if (is_kernel_pmap(pmap)) {
653		tlb_update(pmap, va, pte);
654		return;
655	}
656	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
657		return;
658	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
659		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
660		return;
661	}
662	tlb_update(pmap, va, pte);
663}
664
665/*
666 *	Routine:	pmap_extract
667 *	Function:
668 *		Extract the physical page address associated
669 *		with the given map/virtual_address pair.
670 */
671vm_paddr_t
672pmap_extract(pmap_t pmap, vm_offset_t va)
673{
674	pt_entry_t *pte;
675	vm_offset_t retval = 0;
676
677	PMAP_LOCK(pmap);
678	pte = pmap_pte(pmap, va);
679	if (pte) {
680		retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
681	}
682	PMAP_UNLOCK(pmap);
683	return retval;
684}
685
686/*
687 *	Routine:	pmap_extract_and_hold
688 *	Function:
689 *		Atomically extract and hold the physical page
690 *		with the given pmap and virtual address pair
691 *		if that mapping permits the given protection.
692 */
693vm_page_t
694pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
695{
696	pt_entry_t pte;
697	vm_page_t m;
698	vm_paddr_t pa;
699
700	m = NULL;
701	pa = 0;
702	PMAP_LOCK(pmap);
703retry:
704	pte = *pmap_pte(pmap, va);
705	if (pte != 0 && pmap_pte_v(&pte) &&
706	    ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) {
707		if (vm_page_pa_tryrelock(pmap, TLBLO_PTE_TO_PA(pte), &pa))
708			goto retry;
709
710		m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte));
711		vm_page_hold(m);
712	}
713	PA_UNLOCK_COND(pa);
714	PMAP_UNLOCK(pmap);
715	return (m);
716}
717
718/***************************************************
719 * Low level mapping routines.....
720 ***************************************************/
721
722/*
723 * add a wired page to the kva
724 */
725 /* PMAP_INLINE */ void
726pmap_kenter(vm_offset_t va, vm_paddr_t pa)
727{
728	register pt_entry_t *pte;
729	pt_entry_t npte, opte;
730
731#ifdef PMAP_DEBUG
732	printf("pmap_kenter:  va: 0x%08x -> pa: 0x%08x\n", va, pa);
733#endif
734	npte = TLBLO_PA_TO_PFN(pa) | PTE_RW | PTE_V | PTE_G | PTE_W;
735
736	if (is_cacheable_mem(pa))
737		npte |= PTE_CACHE;
738	else
739		npte |= PTE_UNCACHED;
740
741	pte = pmap_pte(kernel_pmap, va);
742	opte = *pte;
743	*pte = npte;
744
745	pmap_update_page(kernel_pmap, va, npte);
746}
747
748/*
749 * remove a page from the kernel pagetables
750 */
751 /* PMAP_INLINE */ void
752pmap_kremove(vm_offset_t va)
753{
754	register pt_entry_t *pte;
755
756	/*
757	 * Write back all caches from the page being destroyed
758	 */
759	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
760
761	pte = pmap_pte(kernel_pmap, va);
762	*pte = PTE_G;
763	pmap_invalidate_page(kernel_pmap, va);
764}
765
766/*
767 *	Used to map a range of physical addresses into kernel
768 *	virtual address space.
769 *
770 *	The value passed in '*virt' is a suggested virtual address for
771 *	the mapping. Architectures which can support a direct-mapped
772 *	physical to virtual region can return the appropriate address
773 *	within that region, leaving '*virt' unchanged. Other
774 *	architectures should map the pages starting at '*virt' and
775 *	update '*virt' with the first usable address after the mapped
776 *	region.
777 */
778vm_offset_t
779pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
780{
781	vm_offset_t va, sva;
782
783	va = sva = *virt;
784	while (start < end) {
785		pmap_kenter(va, start);
786		va += PAGE_SIZE;
787		start += PAGE_SIZE;
788	}
789	*virt = va;
790	return (sva);
791}
792
793/*
794 * Add a list of wired pages to the kva
795 * this routine is only used for temporary
796 * kernel mappings that do not need to have
797 * page modification or references recorded.
798 * Note that old mappings are simply written
799 * over.  The page *must* be wired.
800 */
801void
802pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
803{
804	int i;
805	vm_offset_t origva = va;
806
807	for (i = 0; i < count; i++) {
808		pmap_flush_pvcache(m[i]);
809		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
810		va += PAGE_SIZE;
811	}
812
813	mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
814}
815
816/*
817 * this routine jerks page mappings from the
818 * kernel -- it is meant only for temporary mappings.
819 */
820void
821pmap_qremove(vm_offset_t va, int count)
822{
823	/*
824	 * No need to wb/inv caches here,
825	 *   pmap_kremove will do it for us
826	 */
827
828	while (count-- > 0) {
829		pmap_kremove(va);
830		va += PAGE_SIZE;
831	}
832}
833
834/***************************************************
835 * Page table page management routines.....
836 ***************************************************/
837
838/*  Revision 1.507
839 *
840 * Simplify the reference counting of page table pages.	 Specifically, use
841 * the page table page's wired count rather than its hold count to contain
842 * the reference count.
843 */
844
845/*
846 * This routine unholds page table pages, and if the hold count
847 * drops to zero, then it decrements the wire count.
848 */
849static int
850_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
851{
852
853	/*
854	 * unmap the page table page
855	 */
856	pmap->pm_segtab[m->pindex] = 0;
857	--pmap->pm_stats.resident_count;
858
859	if (pmap->pm_ptphint == m)
860		pmap->pm_ptphint = NULL;
861
862	/*
863	 * If the page is finally unwired, simply free it.
864	 */
865	atomic_subtract_int(&cnt.v_wire_count, 1);
866	PMAP_UNLOCK(pmap);
867	vm_page_unlock_queues();
868	pmap_release_pte_page(m);
869	vm_page_lock_queues();
870	PMAP_LOCK(pmap);
871	return (1);
872}
873
874static PMAP_INLINE int
875pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
876{
877	--m->wire_count;
878	if (m->wire_count == 0)
879		return (_pmap_unwire_pte_hold(pmap, m));
880	else
881		return (0);
882}
883
884/*
885 * After removing a page table entry, this routine is used to
886 * conditionally free the page, and manage the hold/wire counts.
887 */
888static int
889pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
890{
891	unsigned ptepindex;
892	pd_entry_t pteva;
893
894	if (va >= VM_MAXUSER_ADDRESS)
895		return (0);
896
897	if (mpte == NULL) {
898		ptepindex = (va >> SEGSHIFT);
899		if (pmap->pm_ptphint &&
900		    (pmap->pm_ptphint->pindex == ptepindex)) {
901			mpte = pmap->pm_ptphint;
902		} else {
903			pteva = *pmap_pde(pmap, va);
904			mpte = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(pteva));
905			pmap->pm_ptphint = mpte;
906		}
907	}
908	return pmap_unwire_pte_hold(pmap, mpte);
909}
910
911void
912pmap_pinit0(pmap_t pmap)
913{
914	int i;
915
916	PMAP_LOCK_INIT(pmap);
917	pmap->pm_segtab = kernel_segmap;
918	pmap->pm_active = 0;
919	pmap->pm_ptphint = NULL;
920	for (i = 0; i < MAXCPU; i++) {
921		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
922		pmap->pm_asid[i].gen = 0;
923	}
924	PCPU_SET(curpmap, pmap);
925	TAILQ_INIT(&pmap->pm_pvlist);
926	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
927}
928
929static void
930pmap_ptpgzone_dtor(void *mem, int size, void *arg)
931{
932#ifdef INVARIANTS
933	static char zeropage[PAGE_SIZE];
934
935	KASSERT(size == PAGE_SIZE,
936		("pmap_ptpgzone_dtor: invalid size %d", size));
937	KASSERT(bcmp(mem, zeropage, PAGE_SIZE) == 0,
938		("pmap_ptpgzone_dtor: freeing a non-zeroed page"));
939#endif
940}
941
942static void *
943pmap_ptpgzone_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
944{
945	vm_page_t m;
946	vm_paddr_t paddr;
947	int tries;
948
949	KASSERT(bytes == PAGE_SIZE,
950		("pmap_ptpgzone_allocf: invalid allocation size %d", bytes));
951
952	*flags = UMA_SLAB_PRIV;
953	tries = 0;
954retry:
955	m = vm_phys_alloc_contig(1, 0, MIPS_KSEG0_LARGEST_PHYS,
956	    PAGE_SIZE, PAGE_SIZE);
957	if (m == NULL) {
958                if (tries < ((wait & M_NOWAIT) != 0 ? 1 : 3)) {
959			vm_contig_grow_cache(tries, 0, MIPS_KSEG0_LARGEST_PHYS);
960			tries++;
961			goto retry;
962		} else
963			return (NULL);
964	}
965
966	paddr = VM_PAGE_TO_PHYS(m);
967	return ((void *)MIPS_PHYS_TO_KSEG0(paddr));
968}
969
970static vm_page_t
971pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap)
972{
973	vm_paddr_t paddr;
974	void *va;
975	vm_page_t m;
976	int locked;
977
978	locked = mtx_owned(&pmap->pm_mtx);
979	if (locked) {
980		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
981		PMAP_UNLOCK(pmap);
982		vm_page_unlock_queues();
983	}
984	va = uma_zalloc(ptpgzone, wait);
985	if (locked) {
986		vm_page_lock_queues();
987		PMAP_LOCK(pmap);
988	}
989	if (va == NULL)
990		return (NULL);
991
992	paddr = MIPS_KSEG0_TO_PHYS(va);
993	m = PHYS_TO_VM_PAGE(paddr);
994
995	if (!locked)
996		vm_page_lock_queues();
997	m->pindex = index;
998	m->valid = VM_PAGE_BITS_ALL;
999	m->wire_count = 1;
1000	if (!locked)
1001		vm_page_unlock_queues();
1002
1003	atomic_add_int(&cnt.v_wire_count, 1);
1004	*vap = (vm_offset_t)va;
1005	return (m);
1006}
1007
1008static void
1009pmap_release_pte_page(vm_page_t m)
1010{
1011	void *va;
1012	vm_paddr_t paddr;
1013
1014	paddr = VM_PAGE_TO_PHYS(m);
1015	va = (void *)MIPS_PHYS_TO_KSEG0(paddr);
1016	uma_zfree(ptpgzone, va);
1017}
1018
1019/*
1020 * Initialize a preallocated and zeroed pmap structure,
1021 * such as one in a vmspace structure.
1022 */
1023int
1024pmap_pinit(pmap_t pmap)
1025{
1026	vm_offset_t ptdva;
1027	vm_page_t ptdpg;
1028	int i;
1029
1030	PMAP_LOCK_INIT(pmap);
1031
1032	/*
1033	 * allocate the page directory page
1034	 */
1035	ptdpg = pmap_alloc_pte_page(pmap, NUSERPGTBLS, M_WAITOK, &ptdva);
1036	if (ptdpg == NULL)
1037		return (0);
1038
1039	pmap->pm_segtab = (pd_entry_t *)ptdva;
1040	pmap->pm_active = 0;
1041	pmap->pm_ptphint = NULL;
1042	for (i = 0; i < MAXCPU; i++) {
1043		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1044		pmap->pm_asid[i].gen = 0;
1045	}
1046	TAILQ_INIT(&pmap->pm_pvlist);
1047	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1048
1049	return (1);
1050}
1051
1052/*
1053 * this routine is called if the page table page is not
1054 * mapped correctly.
1055 */
1056static vm_page_t
1057_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1058{
1059	vm_offset_t pteva;
1060	vm_page_t m;
1061
1062	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1063	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1064	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1065
1066	/*
1067	 * Find or fabricate a new pagetable page
1068	 */
1069	m = pmap_alloc_pte_page(pmap, ptepindex, flags, &pteva);
1070	if (m == NULL)
1071		return (NULL);
1072
1073	/*
1074	 * Map the pagetable page into the process address space, if it
1075	 * isn't already there.
1076	 */
1077
1078	pmap->pm_stats.resident_count++;
1079	pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva;
1080
1081	/*
1082	 * Set the page table hint
1083	 */
1084	pmap->pm_ptphint = m;
1085	return (m);
1086}
1087
1088static vm_page_t
1089pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1090{
1091	unsigned ptepindex;
1092	vm_offset_t pteva;
1093	vm_page_t m;
1094
1095	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1096	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1097	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1098
1099	/*
1100	 * Calculate pagetable page index
1101	 */
1102	ptepindex = va >> SEGSHIFT;
1103retry:
1104	/*
1105	 * Get the page directory entry
1106	 */
1107	pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1108
1109	/*
1110	 * If the page table page is mapped, we just increment the hold
1111	 * count, and activate it.
1112	 */
1113	if (pteva) {
1114		/*
1115		 * In order to get the page table page, try the hint first.
1116		 */
1117		if (pmap->pm_ptphint &&
1118		    (pmap->pm_ptphint->pindex == ptepindex)) {
1119			m = pmap->pm_ptphint;
1120		} else {
1121			m = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(pteva));
1122			pmap->pm_ptphint = m;
1123		}
1124		m->wire_count++;
1125	} else {
1126		/*
1127		 * Here if the pte page isn't mapped, or if it has been
1128		 * deallocated.
1129		 */
1130		m = _pmap_allocpte(pmap, ptepindex, flags);
1131		if (m == NULL && (flags & M_WAITOK))
1132			goto retry;
1133	}
1134	return m;
1135}
1136
1137
1138/***************************************************
1139* Pmap allocation/deallocation routines.
1140 ***************************************************/
1141/*
1142 *  Revision 1.397
1143 *  - Merged pmap_release and pmap_release_free_page.  When pmap_release is
1144 *    called only the page directory page(s) can be left in the pmap pte
1145 *    object, since all page table pages will have been freed by
1146 *    pmap_remove_pages and pmap_remove.  In addition, there can only be one
1147 *    reference to the pmap and the page directory is wired, so the page(s)
1148 *    can never be busy.  So all there is to do is clear the magic mappings
1149 *    from the page directory and free the page(s).
1150 */
1151
1152
1153/*
1154 * Release any resources held by the given physical map.
1155 * Called when a pmap initialized by pmap_pinit is being released.
1156 * Should only be called if the map contains no valid mappings.
1157 */
1158void
1159pmap_release(pmap_t pmap)
1160{
1161	vm_offset_t ptdva;
1162	vm_page_t ptdpg;
1163
1164	KASSERT(pmap->pm_stats.resident_count == 0,
1165	    ("pmap_release: pmap resident count %ld != 0",
1166	    pmap->pm_stats.resident_count));
1167
1168	ptdva = (vm_offset_t)pmap->pm_segtab;
1169	ptdpg = PHYS_TO_VM_PAGE(MIPS_KSEG0_TO_PHYS(ptdva));
1170
1171	ptdpg->wire_count--;
1172	atomic_subtract_int(&cnt.v_wire_count, 1);
1173	pmap_release_pte_page(ptdpg);
1174	PMAP_LOCK_DESTROY(pmap);
1175}
1176
1177/*
1178 * grow the number of kernel page table entries, if needed
1179 */
1180void
1181pmap_growkernel(vm_offset_t addr)
1182{
1183	vm_offset_t pageva;
1184	vm_page_t nkpg;
1185	pt_entry_t *pte;
1186	int i;
1187
1188	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1189	if (kernel_vm_end == 0) {
1190		kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
1191		nkpt = 0;
1192		while (segtab_pde(kernel_segmap, kernel_vm_end)) {
1193			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1194			    ~(PAGE_SIZE * NPTEPG - 1);
1195			nkpt++;
1196			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1197				kernel_vm_end = kernel_map->max_offset;
1198				break;
1199			}
1200		}
1201	}
1202	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1203	if (addr - 1 >= kernel_map->max_offset)
1204		addr = kernel_map->max_offset;
1205	while (kernel_vm_end < addr) {
1206		if (segtab_pde(kernel_segmap, kernel_vm_end)) {
1207			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1208			    ~(PAGE_SIZE * NPTEPG - 1);
1209			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1210				kernel_vm_end = kernel_map->max_offset;
1211				break;
1212			}
1213			continue;
1214		}
1215		/*
1216		 * This index is bogus, but out of the way
1217		 */
1218		nkpg = pmap_alloc_pte_page(kernel_pmap, nkpt, M_NOWAIT, &pageva);
1219
1220		if (!nkpg)
1221			panic("pmap_growkernel: no memory to grow kernel");
1222
1223		nkpt++;
1224		pte = (pt_entry_t *)pageva;
1225		segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte;
1226
1227		/*
1228		 * The R[4-7]?00 stores only one copy of the Global bit in
1229		 * the translation lookaside buffer for each 2 page entry.
1230		 * Thus invalid entrys must have the Global bit set so when
1231		 * Entry LO and Entry HI G bits are anded together they will
1232		 * produce a global bit to store in the tlb.
1233		 */
1234		for (i = 0; i < NPTEPG; i++, pte++)
1235			*pte = PTE_G;
1236
1237		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1238		    ~(PAGE_SIZE * NPTEPG - 1);
1239		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1240			kernel_vm_end = kernel_map->max_offset;
1241			break;
1242		}
1243	}
1244}
1245
1246/***************************************************
1247* page management routines.
1248 ***************************************************/
1249
1250/*
1251 * free the pv_entry back to the free list
1252 */
1253static PMAP_INLINE void
1254free_pv_entry(pv_entry_t pv)
1255{
1256
1257	pv_entry_count--;
1258	uma_zfree(pvzone, pv);
1259}
1260
1261/*
1262 * get a new pv_entry, allocating a block from the system
1263 * when needed.
1264 * the memory allocation is performed bypassing the malloc code
1265 * because of the possibility of allocations at interrupt time.
1266 */
1267static pv_entry_t
1268get_pv_entry(pmap_t locked_pmap)
1269{
1270	static const struct timeval printinterval = { 60, 0 };
1271	static struct timeval lastprint;
1272	struct vpgqueues *vpq;
1273	pt_entry_t *pte, oldpte;
1274	pmap_t pmap;
1275	pv_entry_t allocated_pv, next_pv, pv;
1276	vm_offset_t va;
1277	vm_page_t m;
1278
1279	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1280	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1281	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
1282	if (allocated_pv != NULL) {
1283		pv_entry_count++;
1284		if (pv_entry_count > pv_entry_high_water)
1285			pagedaemon_wakeup();
1286		else
1287			return (allocated_pv);
1288	}
1289	/*
1290	 * Reclaim pv entries: At first, destroy mappings to inactive
1291	 * pages.  After that, if a pv entry is still needed, destroy
1292	 * mappings to active pages.
1293	 */
1294	if (ratecheck(&lastprint, &printinterval))
1295		printf("Approaching the limit on PV entries, "
1296		    "increase the vm.pmap.shpgperproc tunable.\n");
1297	vpq = &vm_page_queues[PQ_INACTIVE];
1298retry:
1299	TAILQ_FOREACH(m, &vpq->pl, pageq) {
1300		if (m->hold_count || m->busy)
1301			continue;
1302		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
1303			va = pv->pv_va;
1304			pmap = pv->pv_pmap;
1305			/* Avoid deadlock and lock recursion. */
1306			if (pmap > locked_pmap)
1307				PMAP_LOCK(pmap);
1308			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
1309				continue;
1310			pmap->pm_stats.resident_count--;
1311			pte = pmap_pte(pmap, va);
1312			KASSERT(pte != NULL, ("pte"));
1313			oldpte = loadandclear((u_int *)pte);
1314			if (is_kernel_pmap(pmap))
1315				*pte = PTE_G;
1316			KASSERT((oldpte & PTE_W) == 0,
1317			    ("wired pte for unwired page"));
1318			if (m->md.pv_flags & PV_TABLE_REF)
1319				vm_page_flag_set(m, PG_REFERENCED);
1320			if (oldpte & PTE_M)
1321				vm_page_dirty(m);
1322			pmap_invalidate_page(pmap, va);
1323			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1324			m->md.pv_list_count--;
1325			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1326			pmap_unuse_pt(pmap, va, pv->pv_ptem);
1327			if (pmap != locked_pmap)
1328				PMAP_UNLOCK(pmap);
1329			if (allocated_pv == NULL)
1330				allocated_pv = pv;
1331			else
1332				free_pv_entry(pv);
1333		}
1334		if (TAILQ_EMPTY(&m->md.pv_list)) {
1335			vm_page_flag_clear(m, PG_WRITEABLE);
1336			m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1337		}
1338	}
1339	if (allocated_pv == NULL) {
1340		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
1341			vpq = &vm_page_queues[PQ_ACTIVE];
1342			goto retry;
1343		}
1344		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
1345	}
1346	return (allocated_pv);
1347}
1348
1349/*
1350 *  Revision 1.370
1351 *
1352 *  Move pmap_collect() out of the machine-dependent code, rename it
1353 *  to reflect its new location, and add page queue and flag locking.
1354 *
1355 *  Notes: (1) alpha, i386, and ia64 had identical implementations
1356 *  of pmap_collect() in terms of machine-independent interfaces;
1357 *  (2) sparc64 doesn't require it; (3) powerpc had it as a TODO.
1358 *
1359 *  MIPS implementation was identical to alpha [Junos 8.2]
1360 */
1361
1362/*
1363 * If it is the first entry on the list, it is actually
1364 * in the header and we must copy the following entry up
1365 * to the header.  Otherwise we must search the list for
1366 * the entry.  In either case we free the now unused entry.
1367 */
1368
1369static pv_entry_t
1370pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1371{
1372	pv_entry_t pv;
1373
1374	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1375	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1376	if (pvh->pv_list_count < pmap->pm_stats.resident_count) {
1377		TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1378			if (pmap == pv->pv_pmap && va == pv->pv_va)
1379				break;
1380		}
1381	} else {
1382		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1383			if (va == pv->pv_va)
1384				break;
1385		}
1386	}
1387	if (pv != NULL) {
1388		TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1389		pvh->pv_list_count--;
1390		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1391	}
1392	return (pv);
1393}
1394
1395static void
1396pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1397{
1398	pv_entry_t pv;
1399
1400	pv = pmap_pvh_remove(pvh, pmap, va);
1401	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1402	     (u_long)VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)),
1403	     (u_long)va));
1404	free_pv_entry(pv);
1405}
1406
1407static void
1408pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1409{
1410
1411	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1412	pmap_pvh_free(&m->md, pmap, va);
1413	if (TAILQ_EMPTY(&m->md.pv_list))
1414		vm_page_flag_clear(m, PG_WRITEABLE);
1415}
1416
1417/*
1418 * Conditionally create a pv entry.
1419 */
1420static boolean_t
1421pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1422    vm_page_t m)
1423{
1424	pv_entry_t pv;
1425
1426	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1427	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1428	if (pv_entry_count < pv_entry_high_water &&
1429	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
1430		pv_entry_count++;
1431		pv->pv_va = va;
1432		pv->pv_pmap = pmap;
1433		pv->pv_ptem = mpte;
1434		pv->pv_wired = FALSE;
1435		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1436		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1437		m->md.pv_list_count++;
1438		return (TRUE);
1439	} else
1440		return (FALSE);
1441}
1442
1443/*
1444 * pmap_remove_pte: do the things to unmap a page in a process
1445 */
1446static int
1447pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
1448{
1449	pt_entry_t oldpte;
1450	vm_page_t m;
1451	vm_offset_t pa;
1452
1453	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1454	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1455
1456	oldpte = loadandclear((u_int *)ptq);
1457	if (is_kernel_pmap(pmap))
1458		*ptq = PTE_G;
1459
1460	if (oldpte & PTE_W)
1461		pmap->pm_stats.wired_count -= 1;
1462
1463	pmap->pm_stats.resident_count -= 1;
1464	pa = TLBLO_PTE_TO_PA(oldpte);
1465
1466	if (page_is_managed(pa)) {
1467		m = PHYS_TO_VM_PAGE(pa);
1468		if (oldpte & PTE_M) {
1469#if defined(PMAP_DIAGNOSTIC)
1470			if (pmap_nw_modified(oldpte)) {
1471				printf(
1472				    "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1473				    va, oldpte);
1474			}
1475#endif
1476			vm_page_dirty(m);
1477		}
1478		if (m->md.pv_flags & PV_TABLE_REF)
1479			vm_page_flag_set(m, PG_REFERENCED);
1480		m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1481
1482		pmap_remove_entry(pmap, m, va);
1483	}
1484	return pmap_unuse_pt(pmap, va, NULL);
1485}
1486
1487/*
1488 * Remove a single page from a process address space
1489 */
1490static void
1491pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1492{
1493	register pt_entry_t *ptq;
1494
1495	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1496	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1497	ptq = pmap_pte(pmap, va);
1498
1499	/*
1500	 * if there is no pte for this address, just skip it!!!
1501	 */
1502	if (!ptq || !pmap_pte_v(ptq)) {
1503		return;
1504	}
1505
1506	/*
1507	 * Write back all caches from the page being destroyed
1508	 */
1509	mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1510
1511	/*
1512	 * get a local va for mappings for this pmap.
1513	 */
1514	(void)pmap_remove_pte(pmap, ptq, va);
1515	pmap_invalidate_page(pmap, va);
1516
1517	return;
1518}
1519
1520/*
1521 *	Remove the given range of addresses from the specified map.
1522 *
1523 *	It is assumed that the start and end are properly
1524 *	rounded to the page size.
1525 */
1526void
1527pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1528{
1529	vm_offset_t va, nva;
1530
1531	if (pmap == NULL)
1532		return;
1533
1534	if (pmap->pm_stats.resident_count == 0)
1535		return;
1536
1537	vm_page_lock_queues();
1538	PMAP_LOCK(pmap);
1539
1540	/*
1541	 * special handling of removing one page.  a very common operation
1542	 * and easy to short circuit some code.
1543	 */
1544	if ((sva + PAGE_SIZE) == eva) {
1545		pmap_remove_page(pmap, sva);
1546		goto out;
1547	}
1548	for (va = sva; va < eva; va = nva) {
1549		if (!*pmap_pde(pmap, va)) {
1550			nva = mips_segtrunc(va + MIPS_SEGSIZE);
1551			continue;
1552		}
1553		pmap_remove_page(pmap, va);
1554		nva = va + PAGE_SIZE;
1555	}
1556
1557out:
1558	vm_page_unlock_queues();
1559	PMAP_UNLOCK(pmap);
1560}
1561
1562/*
1563 *	Routine:	pmap_remove_all
1564 *	Function:
1565 *		Removes this physical page from
1566 *		all physical maps in which it resides.
1567 *		Reflects back modify bits to the pager.
1568 *
1569 *	Notes:
1570 *		Original versions of this routine were very
1571 *		inefficient because they iteratively called
1572 *		pmap_remove (slow...)
1573 */
1574
1575void
1576pmap_remove_all(vm_page_t m)
1577{
1578	register pv_entry_t pv;
1579	register pt_entry_t *pte, tpte;
1580
1581	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1582	    ("pmap_remove_all: page %p is fictitious", m));
1583	vm_page_lock_queues();
1584
1585	if (m->md.pv_flags & PV_TABLE_REF)
1586		vm_page_flag_set(m, PG_REFERENCED);
1587
1588	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1589		PMAP_LOCK(pv->pv_pmap);
1590
1591		/*
1592		 * If it's last mapping writeback all caches from
1593		 * the page being destroyed
1594	 	 */
1595		if (m->md.pv_list_count == 1)
1596			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1597
1598		pv->pv_pmap->pm_stats.resident_count--;
1599
1600		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1601
1602		tpte = loadandclear((u_int *)pte);
1603		if (is_kernel_pmap(pv->pv_pmap))
1604			*pte = PTE_G;
1605
1606		if (tpte & PTE_W)
1607			pv->pv_pmap->pm_stats.wired_count--;
1608
1609		/*
1610		 * Update the vm_page_t clean and reference bits.
1611		 */
1612		if (tpte & PTE_M) {
1613#if defined(PMAP_DIAGNOSTIC)
1614			if (pmap_nw_modified(tpte)) {
1615				printf(
1616				    "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1617				    pv->pv_va, tpte);
1618			}
1619#endif
1620			vm_page_dirty(m);
1621		}
1622		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1623
1624		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1625		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1626		m->md.pv_list_count--;
1627		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1628		PMAP_UNLOCK(pv->pv_pmap);
1629		free_pv_entry(pv);
1630	}
1631
1632	vm_page_flag_clear(m, PG_WRITEABLE);
1633	m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1634	vm_page_unlock_queues();
1635}
1636
1637/*
1638 *	Set the physical protection on the
1639 *	specified range of this map as requested.
1640 */
1641void
1642pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1643{
1644	pt_entry_t *pte;
1645
1646	if (pmap == NULL)
1647		return;
1648
1649	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1650		pmap_remove(pmap, sva, eva);
1651		return;
1652	}
1653	if (prot & VM_PROT_WRITE)
1654		return;
1655
1656	vm_page_lock_queues();
1657	PMAP_LOCK(pmap);
1658	while (sva < eva) {
1659		pt_entry_t pbits, obits;
1660		vm_page_t m;
1661		vm_offset_t pa;
1662
1663		/*
1664		 * If segment table entry is empty, skip this segment.
1665		 */
1666		if (!*pmap_pde(pmap, sva)) {
1667			sva = mips_segtrunc(sva + MIPS_SEGSIZE);
1668			continue;
1669		}
1670		/*
1671		 * If pte is invalid, skip this page
1672		 */
1673		pte = pmap_pte(pmap, sva);
1674		if (!pmap_pte_v(pte)) {
1675			sva += PAGE_SIZE;
1676			continue;
1677		}
1678retry:
1679		obits = pbits = *pte;
1680		pa = TLBLO_PTE_TO_PA(pbits);
1681
1682		if (page_is_managed(pa) && (pbits & PTE_M) != 0) {
1683			m = PHYS_TO_VM_PAGE(pa);
1684			vm_page_dirty(m);
1685			m->md.pv_flags &= ~PV_TABLE_MOD;
1686		}
1687		pbits = (pbits & ~PTE_M) | PTE_RO;
1688
1689		if (pbits != *pte) {
1690			if (!atomic_cmpset_int((u_int *)pte, obits, pbits))
1691				goto retry;
1692			pmap_update_page(pmap, sva, pbits);
1693		}
1694		sva += PAGE_SIZE;
1695	}
1696	vm_page_unlock_queues();
1697	PMAP_UNLOCK(pmap);
1698}
1699
1700/*
1701 *	Insert the given physical page (p) at
1702 *	the specified virtual address (v) in the
1703 *	target physical map with the protection requested.
1704 *
1705 *	If specified, the page will be wired down, meaning
1706 *	that the related pte can not be reclaimed.
1707 *
1708 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1709 *	or lose information.  That is, this routine must actually
1710 *	insert this page into the given map NOW.
1711 */
1712void
1713pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1714    vm_prot_t prot, boolean_t wired)
1715{
1716	vm_offset_t pa, opa;
1717	register pt_entry_t *pte;
1718	pt_entry_t origpte, newpte;
1719	pv_entry_t pv;
1720	vm_page_t mpte, om;
1721	int rw = 0;
1722
1723	if (pmap == NULL)
1724		return;
1725
1726	va &= ~PAGE_MASK;
1727 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1728	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1729	    (m->oflags & VPO_BUSY) != 0,
1730	    ("pmap_enter: page %p is not busy", m));
1731
1732	mpte = NULL;
1733
1734	vm_page_lock_queues();
1735	PMAP_LOCK(pmap);
1736
1737	/*
1738	 * In the case that a page table page is not resident, we are
1739	 * creating it here.
1740	 */
1741	if (va < VM_MAXUSER_ADDRESS) {
1742		mpte = pmap_allocpte(pmap, va, M_WAITOK);
1743	}
1744	pte = pmap_pte(pmap, va);
1745
1746	/*
1747	 * Page Directory table entry not valid, we need a new PT page
1748	 */
1749	if (pte == NULL) {
1750		panic("pmap_enter: invalid page directory, pdir=%p, va=%p\n",
1751		    (void *)pmap->pm_segtab, (void *)va);
1752	}
1753	pa = VM_PAGE_TO_PHYS(m);
1754	om = NULL;
1755	origpte = *pte;
1756	opa = TLBLO_PTE_TO_PA(origpte);
1757
1758	/*
1759	 * Mapping has not changed, must be protection or wiring change.
1760	 */
1761	if ((origpte & PTE_V) && (opa == pa)) {
1762		/*
1763		 * Wiring change, just update stats. We don't worry about
1764		 * wiring PT pages as they remain resident as long as there
1765		 * are valid mappings in them. Hence, if a user page is
1766		 * wired, the PT page will be also.
1767		 */
1768		if (wired && ((origpte & PTE_W) == 0))
1769			pmap->pm_stats.wired_count++;
1770		else if (!wired && (origpte & PTE_W))
1771			pmap->pm_stats.wired_count--;
1772
1773#if defined(PMAP_DIAGNOSTIC)
1774		if (pmap_nw_modified(origpte)) {
1775			printf(
1776			    "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
1777			    va, origpte);
1778		}
1779#endif
1780
1781		/*
1782		 * Remove extra pte reference
1783		 */
1784		if (mpte)
1785			mpte->wire_count--;
1786
1787		if (page_is_managed(opa)) {
1788			om = m;
1789		}
1790		goto validate;
1791	}
1792
1793	pv = NULL;
1794
1795	/*
1796	 * Mapping has changed, invalidate old range and fall through to
1797	 * handle validating new mapping.
1798	 */
1799	if (opa) {
1800		if (origpte & PTE_W)
1801			pmap->pm_stats.wired_count--;
1802
1803		if (page_is_managed(opa)) {
1804			om = PHYS_TO_VM_PAGE(opa);
1805			pv = pmap_pvh_remove(&om->md, pmap, va);
1806		}
1807		if (mpte != NULL) {
1808			mpte->wire_count--;
1809			KASSERT(mpte->wire_count > 0,
1810			    ("pmap_enter: missing reference to page table page,"
1811			    " va: %p", (void *)va));
1812		}
1813	} else
1814		pmap->pm_stats.resident_count++;
1815
1816	/*
1817	 * Enter on the PV list if part of our managed memory. Note that we
1818	 * raise IPL while manipulating pv_table since pmap_enter can be
1819	 * called at interrupt time.
1820	 */
1821	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1822		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1823		    ("pmap_enter: managed mapping within the clean submap"));
1824		if (pv == NULL)
1825			pv = get_pv_entry(pmap);
1826		pv->pv_va = va;
1827		pv->pv_pmap = pmap;
1828		pv->pv_ptem = mpte;
1829		pv->pv_wired = wired;
1830		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1831		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1832		m->md.pv_list_count++;
1833	} else if (pv != NULL)
1834		free_pv_entry(pv);
1835
1836	/*
1837	 * Increment counters
1838	 */
1839	if (wired)
1840		pmap->pm_stats.wired_count++;
1841
1842validate:
1843	if ((access & VM_PROT_WRITE) != 0)
1844		m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF;
1845	rw = init_pte_prot(va, m, prot);
1846
1847#ifdef PMAP_DEBUG
1848	printf("pmap_enter:  va: 0x%08x -> pa: 0x%08x\n", va, pa);
1849#endif
1850	/*
1851	 * Now validate mapping with desired protection/wiring.
1852	 */
1853	newpte = TLBLO_PA_TO_PFN(pa) | rw | PTE_V;
1854
1855	if (is_cacheable_mem(pa))
1856		newpte |= PTE_CACHE;
1857	else
1858		newpte |= PTE_UNCACHED;
1859
1860	if (wired)
1861		newpte |= PTE_W;
1862
1863	if (is_kernel_pmap(pmap)) {
1864	         newpte |= PTE_G;
1865	}
1866
1867	/*
1868	 * if the mapping or permission bits are different, we need to
1869	 * update the pte.
1870	 */
1871	if (origpte != newpte) {
1872		if (origpte & PTE_V) {
1873			*pte = newpte;
1874			if (page_is_managed(opa) && (opa != pa)) {
1875				if (om->md.pv_flags & PV_TABLE_REF)
1876					vm_page_flag_set(om, PG_REFERENCED);
1877				om->md.pv_flags &=
1878				    ~(PV_TABLE_REF | PV_TABLE_MOD);
1879			}
1880			if (origpte & PTE_M) {
1881				KASSERT((origpte & PTE_RW),
1882				    ("pmap_enter: modified page not writable:"
1883				    " va: %p, pte: 0x%x", (void *)va, origpte));
1884				if (page_is_managed(opa))
1885					vm_page_dirty(om);
1886			}
1887			if (page_is_managed(opa) &&
1888			    TAILQ_EMPTY(&om->md.pv_list))
1889				vm_page_flag_clear(om, PG_WRITEABLE);
1890		} else {
1891			*pte = newpte;
1892		}
1893	}
1894	pmap_update_page(pmap, va, newpte);
1895
1896	/*
1897	 * Sync I & D caches for executable pages.  Do this only if the the
1898	 * target pmap belongs to the current process.  Otherwise, an
1899	 * unresolvable TLB miss may occur.
1900	 */
1901	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
1902	    (prot & VM_PROT_EXECUTE)) {
1903		mips_icache_sync_range(va, PAGE_SIZE);
1904		mips_dcache_wbinv_range(va, PAGE_SIZE);
1905	}
1906	vm_page_unlock_queues();
1907	PMAP_UNLOCK(pmap);
1908}
1909
1910/*
1911 * this code makes some *MAJOR* assumptions:
1912 * 1. Current pmap & pmap exists.
1913 * 2. Not wired.
1914 * 3. Read access.
1915 * 4. No page table pages.
1916 * but is *MUCH* faster than pmap_enter...
1917 */
1918
1919void
1920pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1921{
1922
1923	vm_page_lock_queues();
1924	PMAP_LOCK(pmap);
1925	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
1926	vm_page_unlock_queues();
1927	PMAP_UNLOCK(pmap);
1928}
1929
1930static vm_page_t
1931pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1932    vm_prot_t prot, vm_page_t mpte)
1933{
1934	pt_entry_t *pte;
1935	vm_offset_t pa;
1936
1937	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1938	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1939	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1940	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1941	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1942
1943	/*
1944	 * In the case that a page table page is not resident, we are
1945	 * creating it here.
1946	 */
1947	if (va < VM_MAXUSER_ADDRESS) {
1948		unsigned ptepindex;
1949		vm_offset_t pteva;
1950
1951		/*
1952		 * Calculate pagetable page index
1953		 */
1954		ptepindex = va >> SEGSHIFT;
1955		if (mpte && (mpte->pindex == ptepindex)) {
1956			mpte->wire_count++;
1957		} else {
1958			/*
1959			 * Get the page directory entry
1960			 */
1961			pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1962
1963			/*
1964			 * If the page table page is mapped, we just
1965			 * increment the hold count, and activate it.
1966			 */
1967			if (pteva) {
1968				if (pmap->pm_ptphint &&
1969				    (pmap->pm_ptphint->pindex == ptepindex)) {
1970					mpte = pmap->pm_ptphint;
1971				} else {
1972					mpte = PHYS_TO_VM_PAGE(
1973						MIPS_KSEG0_TO_PHYS(pteva));
1974					pmap->pm_ptphint = mpte;
1975				}
1976				mpte->wire_count++;
1977			} else {
1978				mpte = _pmap_allocpte(pmap, ptepindex,
1979				    M_NOWAIT);
1980				if (mpte == NULL)
1981					return (mpte);
1982			}
1983		}
1984	} else {
1985		mpte = NULL;
1986	}
1987
1988	pte = pmap_pte(pmap, va);
1989	if (pmap_pte_v(pte)) {
1990		if (mpte != NULL) {
1991			mpte->wire_count--;
1992			mpte = NULL;
1993		}
1994		return (mpte);
1995	}
1996
1997	/*
1998	 * Enter on the PV list if part of our managed memory.
1999	 */
2000	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
2001	    !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2002		if (mpte != NULL) {
2003			pmap_unwire_pte_hold(pmap, mpte);
2004			mpte = NULL;
2005		}
2006		return (mpte);
2007	}
2008
2009	/*
2010	 * Increment counters
2011	 */
2012	pmap->pm_stats.resident_count++;
2013
2014	pa = VM_PAGE_TO_PHYS(m);
2015
2016	/*
2017	 * Now validate mapping with RO protection
2018	 */
2019	*pte = TLBLO_PA_TO_PFN(pa) | PTE_V;
2020
2021	if (is_cacheable_mem(pa))
2022		*pte |= PTE_CACHE;
2023	else
2024		*pte |= PTE_UNCACHED;
2025
2026	if (is_kernel_pmap(pmap))
2027		*pte |= PTE_G;
2028	else {
2029		*pte |= PTE_RO;
2030		/*
2031		 * Sync I & D caches.  Do this only if the the target pmap
2032		 * belongs to the current process.  Otherwise, an
2033		 * unresolvable TLB miss may occur. */
2034		if (pmap == &curproc->p_vmspace->vm_pmap) {
2035			va &= ~PAGE_MASK;
2036			mips_icache_sync_range(va, PAGE_SIZE);
2037			mips_dcache_wbinv_range(va, PAGE_SIZE);
2038		}
2039	}
2040	return (mpte);
2041}
2042
2043/*
2044 * Make a temporary mapping for a physical address.  This is only intended
2045 * to be used for panic dumps.
2046 */
2047void *
2048pmap_kenter_temporary(vm_paddr_t pa, int i)
2049{
2050	vm_offset_t va;
2051	register_t intr;
2052	if (i != 0)
2053		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2054		    __func__);
2055
2056	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
2057		va = MIPS_PHYS_TO_KSEG0(pa);
2058	} else {
2059		int cpu;
2060		struct local_sysmaps *sysm;
2061		pt_entry_t *pte, npte;
2062
2063		/* If this is used other than for dumps, we may need to leave
2064		 * interrupts disasbled on return. If crash dumps don't work when
2065		 * we get to this point, we might want to consider this (leaving things
2066		 * disabled as a starting point ;-)
2067	 	 */
2068		intr = intr_disable();
2069		cpu = PCPU_GET(cpuid);
2070		sysm = &sysmap_lmem[cpu];
2071		/* Since this is for the debugger, no locks or any other fun */
2072		npte = TLBLO_PA_TO_PFN(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2073		pte = pmap_pte(kernel_pmap, sysm->base);
2074		*pte = npte;
2075		sysm->valid1 = 1;
2076		pmap_update_page(kernel_pmap, sysm->base, npte);
2077		va = sysm->base;
2078		intr_restore(intr);
2079	}
2080	return ((void *)va);
2081}
2082
2083void
2084pmap_kenter_temporary_free(vm_paddr_t pa)
2085{
2086	int cpu;
2087	register_t intr;
2088	struct local_sysmaps *sysm;
2089
2090	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
2091		/* nothing to do for this case */
2092		return;
2093	}
2094	cpu = PCPU_GET(cpuid);
2095	sysm = &sysmap_lmem[cpu];
2096	if (sysm->valid1) {
2097		pt_entry_t *pte;
2098
2099		intr = intr_disable();
2100		pte = pmap_pte(kernel_pmap, sysm->base);
2101		*pte = PTE_G;
2102		pmap_invalidate_page(kernel_pmap, sysm->base);
2103		intr_restore(intr);
2104		sysm->valid1 = 0;
2105	}
2106}
2107
2108/*
2109 * Moved the code to Machine Independent
2110 *	 vm_map_pmap_enter()
2111 */
2112
2113/*
2114 * Maps a sequence of resident pages belonging to the same object.
2115 * The sequence begins with the given page m_start.  This page is
2116 * mapped at the given virtual address start.  Each subsequent page is
2117 * mapped at a virtual address that is offset from start by the same
2118 * amount as the page is offset from m_start within the object.  The
2119 * last page in the sequence is the page with the largest offset from
2120 * m_start that can be mapped at a virtual address less than the given
2121 * virtual address end.  Not every virtual page between start and end
2122 * is mapped; only those for which a resident page exists with the
2123 * corresponding offset from m_start are mapped.
2124 */
2125void
2126pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2127    vm_page_t m_start, vm_prot_t prot)
2128{
2129	vm_page_t m, mpte;
2130	vm_pindex_t diff, psize;
2131
2132	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
2133	psize = atop(end - start);
2134	mpte = NULL;
2135	m = m_start;
2136	vm_page_lock_queues();
2137	PMAP_LOCK(pmap);
2138	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2139		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2140		    prot, mpte);
2141		m = TAILQ_NEXT(m, listq);
2142	}
2143	vm_page_unlock_queues();
2144 	PMAP_UNLOCK(pmap);
2145}
2146
2147/*
2148 * pmap_object_init_pt preloads the ptes for a given object
2149 * into the specified pmap.  This eliminates the blast of soft
2150 * faults on process startup and immediately after an mmap.
2151 */
2152void
2153pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2154    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2155{
2156	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2157	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2158	    ("pmap_object_init_pt: non-device object"));
2159}
2160
2161/*
2162 *	Routine:	pmap_change_wiring
2163 *	Function:	Change the wiring attribute for a map/virtual-address
2164 *			pair.
2165 *	In/out conditions:
2166 *			The mapping must already exist in the pmap.
2167 */
2168void
2169pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2170{
2171	register pt_entry_t *pte;
2172
2173	if (pmap == NULL)
2174		return;
2175
2176	PMAP_LOCK(pmap);
2177	pte = pmap_pte(pmap, va);
2178
2179	if (wired && !pmap_pte_w(pte))
2180		pmap->pm_stats.wired_count++;
2181	else if (!wired && pmap_pte_w(pte))
2182		pmap->pm_stats.wired_count--;
2183
2184	/*
2185	 * Wiring is not a hardware characteristic so there is no need to
2186	 * invalidate TLB.
2187	 */
2188	pmap_pte_set_w(pte, wired);
2189	PMAP_UNLOCK(pmap);
2190}
2191
2192/*
2193 *	Copy the range specified by src_addr/len
2194 *	from the source map to the range dst_addr/len
2195 *	in the destination map.
2196 *
2197 *	This routine is only advisory and need not do anything.
2198 */
2199
2200void
2201pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2202    vm_size_t len, vm_offset_t src_addr)
2203{
2204}
2205
2206/*
2207 *	pmap_zero_page zeros the specified hardware page by mapping
2208 *	the page into KVM and using bzero to clear its contents.
2209 */
2210void
2211pmap_zero_page(vm_page_t m)
2212{
2213	vm_offset_t va;
2214	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2215	register_t intr;
2216
2217	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2218		va = MIPS_PHYS_TO_KSEG0(phys);
2219
2220		bzero((caddr_t)va, PAGE_SIZE);
2221		mips_dcache_wbinv_range(va, PAGE_SIZE);
2222	} else {
2223		PMAP_LMEM_MAP1(va, phys);
2224
2225		bzero((caddr_t)va, PAGE_SIZE);
2226		mips_dcache_wbinv_range(va, PAGE_SIZE);
2227
2228		PMAP_LMEM_UNMAP();
2229	}
2230}
2231
2232/*
2233 *	pmap_zero_page_area zeros the specified hardware page by mapping
2234 *	the page into KVM and using bzero to clear its contents.
2235 *
2236 *	off and size may not cover an area beyond a single hardware page.
2237 */
2238void
2239pmap_zero_page_area(vm_page_t m, int off, int size)
2240{
2241	vm_offset_t va;
2242	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2243	register_t intr;
2244
2245	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2246		va = MIPS_PHYS_TO_KSEG0(phys);
2247		bzero((char *)(caddr_t)va + off, size);
2248		mips_dcache_wbinv_range(va + off, size);
2249	} else {
2250		PMAP_LMEM_MAP1(va, phys);
2251
2252		bzero((char *)va + off, size);
2253		mips_dcache_wbinv_range(va + off, size);
2254
2255		PMAP_LMEM_UNMAP();
2256	}
2257}
2258
2259void
2260pmap_zero_page_idle(vm_page_t m)
2261{
2262	vm_offset_t va;
2263	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2264	register_t intr;
2265
2266	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2267		va = MIPS_PHYS_TO_KSEG0(phys);
2268		bzero((caddr_t)va, PAGE_SIZE);
2269		mips_dcache_wbinv_range(va, PAGE_SIZE);
2270	} else {
2271		PMAP_LMEM_MAP1(va, phys);
2272
2273		bzero((caddr_t)va, PAGE_SIZE);
2274		mips_dcache_wbinv_range(va, PAGE_SIZE);
2275
2276		PMAP_LMEM_UNMAP();
2277	}
2278}
2279
2280/*
2281 *	pmap_copy_page copies the specified (machine independent)
2282 *	page by mapping the page into virtual memory and using
2283 *	bcopy to copy the page, one machine dependent page at a
2284 *	time.
2285 */
2286void
2287pmap_copy_page(vm_page_t src, vm_page_t dst)
2288{
2289	vm_offset_t va_src, va_dst;
2290	vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src);
2291	vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst);
2292	register_t intr;
2293
2294	if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) {
2295		/* easy case, all can be accessed via KSEG0 */
2296		/*
2297		 * Flush all caches for VA that are mapped to this page
2298		 * to make sure that data in SDRAM is up to date
2299		 */
2300		pmap_flush_pvcache(src);
2301		mips_dcache_wbinv_range_index(
2302		    MIPS_PHYS_TO_KSEG0(phy_dst), PAGE_SIZE);
2303		va_src = MIPS_PHYS_TO_KSEG0(phy_src);
2304		va_dst = MIPS_PHYS_TO_KSEG0(phy_dst);
2305		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2306		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2307	} else {
2308		PMAP_LMEM_MAP2(va_src, phy_src, va_dst, phy_dst);
2309
2310		bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2311		mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2312
2313		PMAP_LMEM_UNMAP();
2314	}
2315}
2316
2317/*
2318 * Returns true if the pmap's pv is one of the first
2319 * 16 pvs linked to from this page.  This count may
2320 * be changed upwards or downwards in the future; it
2321 * is only necessary that true be returned for a small
2322 * subset of pmaps for proper page aging.
2323 */
2324boolean_t
2325pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2326{
2327	pv_entry_t pv;
2328	int loops = 0;
2329	boolean_t rv;
2330
2331	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2332	    ("pmap_page_exists_quick: page %p is not managed", m));
2333	rv = FALSE;
2334	vm_page_lock_queues();
2335	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2336		if (pv->pv_pmap == pmap) {
2337			rv = TRUE;
2338			break;
2339		}
2340		loops++;
2341		if (loops >= 16)
2342			break;
2343	}
2344	vm_page_unlock_queues();
2345	return (rv);
2346}
2347
2348/*
2349 * Remove all pages from specified address space
2350 * this aids process exit speeds.  Also, this code
2351 * is special cased for current process only, but
2352 * can have the more generic (and slightly slower)
2353 * mode enabled.  This is much faster than pmap_remove
2354 * in the case of running down an entire address space.
2355 */
2356void
2357pmap_remove_pages(pmap_t pmap)
2358{
2359	pt_entry_t *pte, tpte;
2360	pv_entry_t pv, npv;
2361	vm_page_t m;
2362
2363	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2364		printf("warning: pmap_remove_pages called with non-current pmap\n");
2365		return;
2366	}
2367	vm_page_lock_queues();
2368	PMAP_LOCK(pmap);
2369	sched_pin();
2370	//XXX need to be TAILQ_FOREACH_SAFE ?
2371	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2372
2373		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2374		if (!pmap_pte_v(pte))
2375			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2376		tpte = *pte;
2377
2378/*
2379 * We cannot remove wired pages from a process' mapping at this time
2380 */
2381		if (tpte & PTE_W) {
2382			npv = TAILQ_NEXT(pv, pv_plist);
2383			continue;
2384		}
2385		*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2386
2387		m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2388		KASSERT(m != NULL,
2389		    ("pmap_remove_pages: bad tpte %x", tpte));
2390
2391		pv->pv_pmap->pm_stats.resident_count--;
2392
2393		/*
2394		 * Update the vm_page_t clean and reference bits.
2395		 */
2396		if (tpte & PTE_M) {
2397			vm_page_dirty(m);
2398		}
2399		npv = TAILQ_NEXT(pv, pv_plist);
2400		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2401
2402		m->md.pv_list_count--;
2403		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2404		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
2405			vm_page_flag_clear(m, PG_WRITEABLE);
2406		}
2407		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2408		free_pv_entry(pv);
2409	}
2410	sched_unpin();
2411	pmap_invalidate_all(pmap);
2412	PMAP_UNLOCK(pmap);
2413	vm_page_unlock_queues();
2414}
2415
2416/*
2417 * pmap_testbit tests bits in pte's
2418 * note that the testbit/changebit routines are inline,
2419 * and a lot of things compile-time evaluate.
2420 */
2421static boolean_t
2422pmap_testbit(vm_page_t m, int bit)
2423{
2424	pv_entry_t pv;
2425	pt_entry_t *pte;
2426	boolean_t rv = FALSE;
2427
2428	if (m->flags & PG_FICTITIOUS)
2429		return rv;
2430
2431	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2432		return rv;
2433
2434	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2435	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2436#if defined(PMAP_DIAGNOSTIC)
2437		if (!pv->pv_pmap) {
2438			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2439			continue;
2440		}
2441#endif
2442		PMAP_LOCK(pv->pv_pmap);
2443		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2444		rv = (*pte & bit) != 0;
2445		PMAP_UNLOCK(pv->pv_pmap);
2446		if (rv)
2447			break;
2448	}
2449	return (rv);
2450}
2451
2452/*
2453 * this routine is used to modify bits in ptes
2454 */
2455static __inline void
2456pmap_changebit(vm_page_t m, int bit, boolean_t setem)
2457{
2458	register pv_entry_t pv;
2459	register pt_entry_t *pte;
2460
2461	if (m->flags & PG_FICTITIOUS)
2462		return;
2463
2464	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2465	/*
2466	 * Loop over all current mappings setting/clearing as appropos If
2467	 * setting RO do we need to clear the VAC?
2468	 */
2469	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2470#if defined(PMAP_DIAGNOSTIC)
2471		if (!pv->pv_pmap) {
2472			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2473			continue;
2474		}
2475#endif
2476
2477		PMAP_LOCK(pv->pv_pmap);
2478		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2479
2480		if (setem) {
2481			*(int *)pte |= bit;
2482			pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2483		} else {
2484			vm_offset_t pbits = *(vm_offset_t *)pte;
2485
2486			if (pbits & bit) {
2487				if (bit == PTE_RW) {
2488					if (pbits & PTE_M) {
2489						vm_page_dirty(m);
2490					}
2491					*(int *)pte = (pbits & ~(PTE_M | PTE_RW)) |
2492					    PTE_RO;
2493				} else {
2494					*(int *)pte = pbits & ~bit;
2495				}
2496				pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2497			}
2498		}
2499		PMAP_UNLOCK(pv->pv_pmap);
2500	}
2501	if (!setem && bit == PTE_RW)
2502		vm_page_flag_clear(m, PG_WRITEABLE);
2503}
2504
2505/*
2506 *	pmap_page_wired_mappings:
2507 *
2508 *	Return the number of managed mappings to the given physical page
2509 *	that are wired.
2510 */
2511int
2512pmap_page_wired_mappings(vm_page_t m)
2513{
2514	pv_entry_t pv;
2515	int count;
2516
2517	count = 0;
2518	if ((m->flags & PG_FICTITIOUS) != 0)
2519		return (count);
2520	vm_page_lock_queues();
2521	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
2522	    if (pv->pv_wired)
2523		count++;
2524	vm_page_unlock_queues();
2525	return (count);
2526}
2527
2528/*
2529 * Clear the write and modified bits in each of the given page's mappings.
2530 */
2531void
2532pmap_remove_write(vm_page_t m)
2533{
2534	pv_entry_t pv, npv;
2535	vm_offset_t va;
2536	pt_entry_t *pte;
2537
2538	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2539	    ("pmap_remove_write: page %p is not managed", m));
2540
2541	/*
2542	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
2543	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
2544	 * is clear, no page table entries need updating.
2545	 */
2546	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2547	if ((m->oflags & VPO_BUSY) == 0 &&
2548	    (m->flags & PG_WRITEABLE) == 0)
2549		return;
2550
2551	/*
2552	 * Loop over all current mappings setting/clearing as appropos.
2553	 */
2554	vm_page_lock_queues();
2555	for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) {
2556		npv = TAILQ_NEXT(pv, pv_plist);
2557		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2558
2559		if ((pte == NULL) || !mips_pg_v(*pte))
2560			panic("page on pm_pvlist has no pte\n");
2561
2562		va = pv->pv_va;
2563		pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE,
2564		    VM_PROT_READ | VM_PROT_EXECUTE);
2565	}
2566	vm_page_flag_clear(m, PG_WRITEABLE);
2567	vm_page_unlock_queues();
2568}
2569
2570/*
2571 *	pmap_ts_referenced:
2572 *
2573 *	Return the count of reference bits for a page, clearing all of them.
2574 */
2575int
2576pmap_ts_referenced(vm_page_t m)
2577{
2578
2579	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2580	    ("pmap_ts_referenced: page %p is not managed", m));
2581	if (m->md.pv_flags & PV_TABLE_REF) {
2582		vm_page_lock_queues();
2583		m->md.pv_flags &= ~PV_TABLE_REF;
2584		vm_page_unlock_queues();
2585		return (1);
2586	}
2587	return (0);
2588}
2589
2590/*
2591 *	pmap_is_modified:
2592 *
2593 *	Return whether or not the specified physical page was modified
2594 *	in any physical maps.
2595 */
2596boolean_t
2597pmap_is_modified(vm_page_t m)
2598{
2599	boolean_t rv;
2600
2601	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2602	    ("pmap_is_modified: page %p is not managed", m));
2603
2604	/*
2605	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
2606	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
2607	 * is clear, no PTEs can have PTE_M set.
2608	 */
2609	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2610	if ((m->oflags & VPO_BUSY) == 0 &&
2611	    (m->flags & PG_WRITEABLE) == 0)
2612		return (FALSE);
2613	vm_page_lock_queues();
2614	if (m->md.pv_flags & PV_TABLE_MOD)
2615		rv = TRUE;
2616	else
2617		rv = pmap_testbit(m, PTE_M);
2618	vm_page_unlock_queues();
2619	return (rv);
2620}
2621
2622/* N/C */
2623
2624/*
2625 *	pmap_is_prefaultable:
2626 *
2627 *	Return whether or not the specified virtual address is elgible
2628 *	for prefault.
2629 */
2630boolean_t
2631pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2632{
2633	pt_entry_t *pte;
2634	boolean_t rv;
2635
2636	rv = FALSE;
2637	PMAP_LOCK(pmap);
2638	if (*pmap_pde(pmap, addr)) {
2639		pte = pmap_pte(pmap, addr);
2640		rv = (*pte == 0);
2641	}
2642	PMAP_UNLOCK(pmap);
2643	return (rv);
2644}
2645
2646/*
2647 *	Clear the modify bits on the specified physical page.
2648 */
2649void
2650pmap_clear_modify(vm_page_t m)
2651{
2652
2653	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2654	    ("pmap_clear_modify: page %p is not managed", m));
2655	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2656	KASSERT((m->oflags & VPO_BUSY) == 0,
2657	    ("pmap_clear_modify: page %p is busy", m));
2658
2659	/*
2660	 * If the page is not PG_WRITEABLE, then no PTEs can have PTE_M set.
2661	 * If the object containing the page is locked and the page is not
2662	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
2663	 */
2664	if ((m->flags & PG_WRITEABLE) == 0)
2665		return;
2666	vm_page_lock_queues();
2667	if (m->md.pv_flags & PV_TABLE_MOD) {
2668		pmap_changebit(m, PTE_M, FALSE);
2669		m->md.pv_flags &= ~PV_TABLE_MOD;
2670	}
2671	vm_page_unlock_queues();
2672}
2673
2674/*
2675 *	pmap_is_referenced:
2676 *
2677 *	Return whether or not the specified physical page was referenced
2678 *	in any physical maps.
2679 */
2680boolean_t
2681pmap_is_referenced(vm_page_t m)
2682{
2683
2684	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2685	    ("pmap_is_referenced: page %p is not managed", m));
2686	return ((m->md.pv_flags & PV_TABLE_REF) != 0);
2687}
2688
2689/*
2690 *	pmap_clear_reference:
2691 *
2692 *	Clear the reference bit on the specified physical page.
2693 */
2694void
2695pmap_clear_reference(vm_page_t m)
2696{
2697
2698	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2699	    ("pmap_clear_reference: page %p is not managed", m));
2700	vm_page_lock_queues();
2701	if (m->md.pv_flags & PV_TABLE_REF) {
2702		m->md.pv_flags &= ~PV_TABLE_REF;
2703	}
2704	vm_page_unlock_queues();
2705}
2706
2707/*
2708 * Miscellaneous support routines follow
2709 */
2710
2711/*
2712 * Map a set of physical memory pages into the kernel virtual
2713 * address space. Return a pointer to where it is mapped. This
2714 * routine is intended to be used for mapping device memory,
2715 * NOT real memory.
2716 */
2717
2718/*
2719 * Map a set of physical memory pages into the kernel virtual
2720 * address space. Return a pointer to where it is mapped. This
2721 * routine is intended to be used for mapping device memory,
2722 * NOT real memory.
2723 */
2724void *
2725pmap_mapdev(vm_offset_t pa, vm_size_t size)
2726{
2727        vm_offset_t va, tmpva, offset;
2728
2729	/*
2730	 * KSEG1 maps only first 512M of phys address space. For
2731	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2732	 */
2733	if ((pa + size - 1) < MIPS_KSEG0_LARGEST_PHYS)
2734		return (void *)MIPS_PHYS_TO_KSEG1(pa);
2735	else {
2736		offset = pa & PAGE_MASK;
2737		size = roundup(size + offset, PAGE_SIZE);
2738
2739		va = kmem_alloc_nofault(kernel_map, size);
2740		if (!va)
2741			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2742		pa = trunc_page(pa);
2743		for (tmpva = va; size > 0;) {
2744			pmap_kenter(tmpva, pa);
2745			size -= PAGE_SIZE;
2746			tmpva += PAGE_SIZE;
2747			pa += PAGE_SIZE;
2748		}
2749	}
2750
2751	return ((void *)(va + offset));
2752}
2753
2754void
2755pmap_unmapdev(vm_offset_t va, vm_size_t size)
2756{
2757	vm_offset_t base, offset, tmpva;
2758
2759	/* If the address is within KSEG1 then there is nothing to do */
2760	if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
2761		return;
2762
2763	base = trunc_page(va);
2764	offset = va & PAGE_MASK;
2765	size = roundup(size + offset, PAGE_SIZE);
2766	for (tmpva = base; tmpva < base + size; tmpva += PAGE_SIZE)
2767		pmap_kremove(tmpva);
2768	kmem_free(kernel_map, base, size);
2769}
2770
2771/*
2772 * perform the pmap work for mincore
2773 */
2774int
2775pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2776{
2777	pt_entry_t *ptep, pte;
2778	vm_offset_t pa;
2779	vm_page_t m;
2780	int val;
2781	boolean_t managed;
2782
2783	PMAP_LOCK(pmap);
2784retry:
2785	ptep = pmap_pte(pmap, addr);
2786	pte = (ptep != NULL) ? *ptep : 0;
2787	if (!mips_pg_v(pte)) {
2788		val = 0;
2789		goto out;
2790	}
2791	val = MINCORE_INCORE;
2792	if ((pte & PTE_M) != 0)
2793		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2794	pa = TLBLO_PTE_TO_PA(pte);
2795	managed = page_is_managed(pa);
2796	if (managed) {
2797		/*
2798		 * This may falsely report the given address as
2799		 * MINCORE_REFERENCED.  Unfortunately, due to the lack of
2800		 * per-PTE reference information, it is impossible to
2801		 * determine if the address is MINCORE_REFERENCED.
2802		 */
2803		m = PHYS_TO_VM_PAGE(pa);
2804		if ((m->flags & PG_REFERENCED) != 0)
2805			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2806	}
2807	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2808	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
2809		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2810		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2811			goto retry;
2812	} else
2813out:
2814		PA_UNLOCK_COND(*locked_pa);
2815	PMAP_UNLOCK(pmap);
2816	return (val);
2817}
2818
2819void
2820pmap_activate(struct thread *td)
2821{
2822	pmap_t pmap, oldpmap;
2823	struct proc *p = td->td_proc;
2824
2825	critical_enter();
2826
2827	pmap = vmspace_pmap(p->p_vmspace);
2828	oldpmap = PCPU_GET(curpmap);
2829
2830	if (oldpmap)
2831		atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
2832	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2833	pmap_asid_alloc(pmap);
2834	if (td == curthread) {
2835		PCPU_SET(segbase, pmap->pm_segtab);
2836		mips_wr_entryhi(pmap->pm_asid[PCPU_GET(cpuid)].asid);
2837	}
2838
2839	PCPU_SET(curpmap, pmap);
2840	critical_exit();
2841}
2842
2843void
2844pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2845{
2846}
2847
2848/*
2849 *	Increase the starting virtual address of the given mapping if a
2850 *	different alignment might result in more superpage mappings.
2851 */
2852void
2853pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2854    vm_offset_t *addr, vm_size_t size)
2855{
2856	vm_offset_t superpage_offset;
2857
2858	if (size < NBSEG)
2859		return;
2860	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
2861		offset += ptoa(object->pg_color);
2862	superpage_offset = offset & SEGOFSET;
2863	if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG ||
2864	    (*addr & SEGOFSET) == superpage_offset)
2865		return;
2866	if ((*addr & SEGOFSET) < superpage_offset)
2867		*addr = (*addr & ~SEGOFSET) + superpage_offset;
2868	else
2869		*addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset;
2870}
2871
2872/*
2873 * 	Increase the starting virtual address of the given mapping so
2874 * 	that it is aligned to not be the second page in a TLB entry.
2875 * 	This routine assumes that the length is appropriately-sized so
2876 * 	that the allocation does not share a TLB entry at all if required.
2877 */
2878void
2879pmap_align_tlb(vm_offset_t *addr)
2880{
2881	if ((*addr & PAGE_SIZE) == 0)
2882		return;
2883	*addr += PAGE_SIZE;
2884	return;
2885}
2886
2887int pmap_pid_dump(int pid);
2888
2889int
2890pmap_pid_dump(int pid)
2891{
2892	pmap_t pmap;
2893	struct proc *p;
2894	int npte = 0;
2895	int index;
2896
2897	sx_slock(&allproc_lock);
2898	LIST_FOREACH(p, &allproc, p_list) {
2899		if (p->p_pid != pid)
2900			continue;
2901
2902		if (p->p_vmspace) {
2903			int i, j;
2904
2905			printf("vmspace is %p\n",
2906			       p->p_vmspace);
2907			index = 0;
2908			pmap = vmspace_pmap(p->p_vmspace);
2909			printf("pmap asid:%x generation:%x\n",
2910			       pmap->pm_asid[0].asid,
2911			       pmap->pm_asid[0].gen);
2912			for (i = 0; i < NUSERPGTBLS; i++) {
2913				pd_entry_t *pde;
2914				pt_entry_t *pte;
2915				unsigned base = i << SEGSHIFT;
2916
2917				pde = &pmap->pm_segtab[i];
2918				if (pde && pmap_pde_v(pde)) {
2919					for (j = 0; j < 1024; j++) {
2920						vm_offset_t va = base +
2921						(j << PAGE_SHIFT);
2922
2923						pte = pmap_pte(pmap, va);
2924						if (pte && pmap_pte_v(pte)) {
2925							vm_offset_t pa;
2926							vm_page_t m;
2927
2928							pa = TLBLO_PFN_TO_PA(*pte);
2929							m = PHYS_TO_VM_PAGE(pa);
2930							printf("va: %p, pt: %p, h: %d, w: %d, f: 0x%x",
2931							    (void *)va,
2932							    (void *)pa,
2933							    m->hold_count,
2934							    m->wire_count,
2935							    m->flags);
2936							npte++;
2937							index++;
2938							if (index >= 2) {
2939								index = 0;
2940								printf("\n");
2941							} else {
2942								printf(" ");
2943							}
2944						}
2945					}
2946				}
2947			}
2948		} else {
2949		  printf("Process pid:%d has no vm_space\n", pid);
2950		}
2951		break;
2952	}
2953	sx_sunlock(&allproc_lock);
2954	return npte;
2955}
2956
2957
2958#if defined(DEBUG)
2959
2960static void pads(pmap_t pm);
2961void pmap_pvdump(vm_offset_t pa);
2962
2963/* print address space of pmap*/
2964static void
2965pads(pmap_t pm)
2966{
2967	unsigned va, i, j;
2968	pt_entry_t *ptep;
2969
2970	if (pm == kernel_pmap)
2971		return;
2972	for (i = 0; i < NPTEPG; i++)
2973		if (pm->pm_segtab[i])
2974			for (j = 0; j < NPTEPG; j++) {
2975				va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
2976				if (pm == kernel_pmap && va < KERNBASE)
2977					continue;
2978				if (pm != kernel_pmap &&
2979				    va >= VM_MAXUSER_ADDRESS)
2980					continue;
2981				ptep = pmap_pte(pm, va);
2982				if (pmap_pte_v(ptep))
2983					printf("%x:%x ", va, *(int *)ptep);
2984			}
2985
2986}
2987
2988void
2989pmap_pvdump(vm_offset_t pa)
2990{
2991	register pv_entry_t pv;
2992	vm_page_t m;
2993
2994	printf("pa %x", pa);
2995	m = PHYS_TO_VM_PAGE(pa);
2996	for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
2997	    pv = TAILQ_NEXT(pv, pv_list)) {
2998		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
2999		pads(pv->pv_pmap);
3000	}
3001	printf(" ");
3002}
3003
3004/* N/C */
3005#endif
3006
3007
3008/*
3009 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3010 * It takes almost as much or more time to search the TLB for a
3011 * specific ASID and flush those entries as it does to flush the entire TLB.
3012 * Therefore, when we allocate a new ASID, we just take the next number. When
3013 * we run out of numbers, we flush the TLB, increment the generation count
3014 * and start over. ASID zero is reserved for kernel use.
3015 */
3016static void
3017pmap_asid_alloc(pmap)
3018	pmap_t pmap;
3019{
3020	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3021	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3022	else {
3023		if (PCPU_GET(next_asid) == pmap_max_asid) {
3024			tlb_invalidate_all_user(NULL);
3025			PCPU_SET(asid_generation,
3026			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3027			if (PCPU_GET(asid_generation) == 0) {
3028				PCPU_SET(asid_generation, 1);
3029			}
3030			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3031		}
3032		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3033		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3034		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3035	}
3036}
3037
3038int
3039page_is_managed(vm_offset_t pa)
3040{
3041	vm_offset_t pgnum = mips_btop(pa);
3042
3043	if (pgnum >= first_page) {
3044		vm_page_t m;
3045
3046		m = PHYS_TO_VM_PAGE(pa);
3047		if (m == NULL)
3048			return 0;
3049		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
3050			return 1;
3051	}
3052	return 0;
3053}
3054
3055static int
3056init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot)
3057{
3058	int rw;
3059
3060	if (!(prot & VM_PROT_WRITE))
3061		rw = PTE_ROPAGE;
3062	else if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
3063		if ((m->md.pv_flags & PV_TABLE_MOD) != 0)
3064			rw = PTE_RWPAGE;
3065		else
3066			rw = PTE_CWPAGE;
3067		vm_page_flag_set(m, PG_WRITEABLE);
3068	} else
3069		/* Needn't emulate a modified bit for unmanaged pages. */
3070		rw = PTE_RWPAGE;
3071	return (rw);
3072}
3073
3074/*
3075 *	pmap_set_modified:
3076 *
3077 *	Sets the page modified and reference bits for the specified page.
3078 */
3079void
3080pmap_set_modified(vm_offset_t pa)
3081{
3082
3083	PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD);
3084}
3085
3086/*
3087 *	Routine:	pmap_kextract
3088 *	Function:
3089 *		Extract the physical page address associated
3090 *		virtual address.
3091 */
3092 /* PMAP_INLINE */ vm_offset_t
3093pmap_kextract(vm_offset_t va)
3094{
3095	vm_offset_t pa = 0;
3096
3097	if (va < MIPS_KSEG0_START) {
3098		/* user virtual address */
3099		pt_entry_t *ptep;
3100
3101		if (curproc && curproc->p_vmspace) {
3102			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3103			if (ptep)
3104				pa = TLBLO_PTE_TO_PA(*ptep) |
3105				    (va & PAGE_MASK);
3106		}
3107	} else if (va >= MIPS_KSEG0_START &&
3108	    va < MIPS_KSEG1_START)
3109		pa = MIPS_KSEG0_TO_PHYS(va);
3110	else if (va >= MIPS_KSEG1_START &&
3111	    va < MIPS_KSEG2_START)
3112		pa = MIPS_KSEG1_TO_PHYS(va);
3113	else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) {
3114		pt_entry_t *ptep;
3115
3116		/* Is the kernel pmap initialized? */
3117		if (kernel_pmap->pm_active) {
3118			/* Its inside the virtual address range */
3119			ptep = pmap_pte(kernel_pmap, va);
3120			if (ptep) {
3121				return (TLBLO_PTE_TO_PA(*ptep) |
3122				    (va & PAGE_MASK));
3123			}
3124			return (0);
3125		}
3126	}
3127	return pa;
3128}
3129
3130void
3131pmap_flush_pvcache(vm_page_t m)
3132{
3133	pv_entry_t pv;
3134
3135	if (m != NULL) {
3136		for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3137	    	    pv = TAILQ_NEXT(pv, pv_list)) {
3138			mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3139		}
3140	}
3141}
3142