pmap.c revision 192628
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
38 *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42/*
43 *	Manages physical address maps.
44 *
45 *	In addition to hardware address maps, this
46 *	module is called upon to provide software-use-only
47 *	maps which may or may not be stored in the same
48 *	form as hardware maps.	These pseudo-maps are
49 *	used to store intermediate results from copy
50 *	operations to and from address spaces.
51 *
52 *	Since the information managed by this module is
53 *	also stored by the logical address mapping module,
54 *	this module may throw away valid virtual-to-physical
55 *	mappings at almost any time.  However, invalidations
56 *	of virtual-to-physical mappings must be done as
57 *	requested.
58 *
59 *	In order to cope with hardware architectures which
60 *	make virtual-to-physical map invalidates expensive,
61 *	this module may delay invalidate or reduced protection
62 *	operations until such time as they are actually
63 *	necessary.  This module is given full information as
64 *	to which processors are currently using which maps,
65 *	and to when physical maps must be made correct.
66 */
67
68#include <sys/cdefs.h>
69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 192628 2009-05-23 07:58:56Z alc $");
70
71#include "opt_ddb.h"
72#include "opt_msgbuf.h"
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/proc.h>
76#include <sys/msgbuf.h>
77#include <sys/vmmeter.h>
78#include <sys/mman.h>
79
80#include <vm/vm.h>
81#include <vm/vm_param.h>
82#include <sys/lock.h>
83#include <sys/mutex.h>
84#include <vm/vm_kern.h>
85#include <vm/vm_page.h>
86#include <vm/vm_map.h>
87#include <vm/vm_object.h>
88#include <vm/vm_extern.h>
89#include <vm/vm_pageout.h>
90#include <vm/vm_pager.h>
91#include <vm/uma.h>
92#include <sys/pcpu.h>
93#include <sys/sched.h>
94#ifdef SMP
95#include <sys/smp.h>
96#endif
97
98#include <machine/cache.h>
99#include <machine/pltfm.h>
100#include <machine/md_var.h>
101
102#if defined(DIAGNOSTIC)
103#define	PMAP_DIAGNOSTIC
104#endif
105
106#undef PMAP_DEBUG
107
108#ifndef PMAP_SHPGPERPROC
109#define	PMAP_SHPGPERPROC 200
110#endif
111
112#if !defined(PMAP_DIAGNOSTIC)
113#define	PMAP_INLINE __inline
114#else
115#define	PMAP_INLINE
116#endif
117
118/*
119 * Get PDEs and PTEs for user/kernel address space
120 */
121#define	pmap_pde(m, v)	       (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT]))
122#define	segtab_pde(m, v)	(m[(vm_offset_t)(v) >> SEGSHIFT])
123
124#define	pmap_pte_w(pte)		((*(int *)pte & PTE_W) != 0)
125#define	pmap_pde_v(pte)		((*(int *)pte) != 0)
126#define	pmap_pte_m(pte)		((*(int *)pte & PTE_M) != 0)
127#define	pmap_pte_v(pte)		((*(int *)pte & PTE_V) != 0)
128
129#define	pmap_pte_set_w(pte, v)	((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W))
130#define	pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
131
132#define	MIPS_SEGSIZE		(1L << SEGSHIFT)
133#define	mips_segtrunc(va)	((va) & ~(MIPS_SEGSIZE-1))
134#define	pmap_TLB_invalidate_all() MIPS_TBIAP()
135#define	pmap_va_asid(pmap, va)	((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT))
136#define	is_kernel_pmap(x)	((x) == kernel_pmap)
137
138struct pmap kernel_pmap_store;
139pd_entry_t *kernel_segmap;
140
141vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
142vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
143
144static int nkpt;
145unsigned pmap_max_asid;		/* max ASID supported by the system */
146
147
148#define	PMAP_ASID_RESERVED	0
149
150
151vm_offset_t kernel_vm_end;
152
153static void pmap_asid_alloc(pmap_t pmap);
154
155/*
156 * Data for the pv entry allocation mechanism
157 */
158static uma_zone_t pvzone;
159static struct vm_object pvzone_obj;
160static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
161
162struct fpage fpages_shared[FPAGES_SHARED];
163
164struct sysmaps sysmaps_pcpu[MAXCPU];
165
166static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
167static pv_entry_t get_pv_entry(pmap_t locked_pmap);
168static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
169
170static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
171    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
172static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va);
173static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
174static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
175static boolean_t pmap_testbit(vm_page_t m, int bit);
176static void
177pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
178    vm_page_t m, boolean_t wired);
179static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
180    vm_offset_t va, vm_page_t m);
181
182static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
183
184static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
185static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
186static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
187static void pmap_TLB_invalidate_kernel(vm_offset_t);
188static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t);
189static void pmap_init_fpage(void);
190
191#ifdef SMP
192static void pmap_invalidate_page_action(void *arg);
193static void pmap_invalidate_all_action(void *arg);
194static void pmap_update_page_action(void *arg);
195
196#endif
197
198struct local_sysmaps {
199	struct mtx lock;
200	pt_entry_t CMAP1;
201	pt_entry_t CMAP2;
202	caddr_t CADDR1;
203	caddr_t CADDR2;
204	uint16_t valid1, valid2;
205};
206
207/* This structure is for large memory
208 * above 512Meg. We can't (in 32 bit mode)
209 * just use the direct mapped MIPS_CACHED_TO_PHYS()
210 * macros since we can't see the memory and must
211 * map it in when we need to access it. In 64
212 * bit mode this goes away.
213 */
214static struct local_sysmaps sysmap_lmem[MAXCPU];
215caddr_t virtual_sys_start = (caddr_t)0;
216
217pd_entry_t
218pmap_segmap(pmap_t pmap, vm_offset_t va)
219{
220	if (pmap->pm_segtab)
221		return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]);
222	else
223		return ((pd_entry_t)0);
224}
225
226/*
227 *	Routine:	pmap_pte
228 *	Function:
229 *		Extract the page table entry associated
230 *		with the given map/virtual_address pair.
231 */
232pt_entry_t *
233pmap_pte(pmap_t pmap, vm_offset_t va)
234{
235	pt_entry_t *pdeaddr;
236
237	if (pmap) {
238		pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va);
239		if (pdeaddr) {
240			return pdeaddr + vad_to_pte_offset(va);
241		}
242	}
243	return ((pt_entry_t *)0);
244}
245
246
247vm_offset_t
248pmap_steal_memory(vm_size_t size)
249{
250	vm_size_t bank_size;
251	vm_offset_t pa, va;
252
253	size = round_page(size);
254
255	bank_size = phys_avail[1] - phys_avail[0];
256	while (size > bank_size) {
257		int i;
258
259		for (i = 0; phys_avail[i + 2]; i += 2) {
260			phys_avail[i] = phys_avail[i + 2];
261			phys_avail[i + 1] = phys_avail[i + 3];
262		}
263		phys_avail[i] = 0;
264		phys_avail[i + 1] = 0;
265		if (!phys_avail[0])
266			panic("pmap_steal_memory: out of memory");
267		bank_size = phys_avail[1] - phys_avail[0];
268	}
269
270	pa = phys_avail[0];
271	phys_avail[0] += size;
272	if (pa >= MIPS_KSEG0_LARGEST_PHYS) {
273		panic("Out of memory below 512Meg?");
274	}
275	va = MIPS_PHYS_TO_CACHED(pa);
276	bzero((caddr_t)va, size);
277	return va;
278}
279
280/*
281 *	Bootstrap the system enough to run with virtual memory.  This
282 * assumes that the phys_avail array has been initialized.
283 */
284void
285pmap_bootstrap(void)
286{
287	pt_entry_t *pgtab;
288	pt_entry_t *pte;
289	int i, j;
290	int memory_larger_than_512meg = 0;
291
292	/* Sort. */
293again:
294	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
295		if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS) {
296			memory_larger_than_512meg++;
297		}
298		if (i < 2)
299			continue;
300		if (phys_avail[i - 2] > phys_avail[i]) {
301			vm_paddr_t ptemp[2];
302
303
304			ptemp[0] = phys_avail[i + 0];
305			ptemp[1] = phys_avail[i + 1];
306
307			phys_avail[i + 0] = phys_avail[i - 2];
308			phys_avail[i + 1] = phys_avail[i - 1];
309
310			phys_avail[i - 2] = ptemp[0];
311			phys_avail[i - 1] = ptemp[1];
312			goto again;
313		}
314	}
315
316	if (bootverbose) {
317		printf("Physical memory chunk(s):\n");
318		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
319			vm_paddr_t size;
320
321			size = phys_avail[i + 1] - phys_avail[i];
322			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
323			    (uintmax_t) phys_avail[i],
324			    (uintmax_t) phys_avail[i + 1] - 1,
325			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
326		}
327	}
328	/*
329	 * Steal the message buffer from the beginning of memory.
330	 */
331	msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE);
332	msgbufinit(msgbufp, MSGBUF_SIZE);
333
334	/*
335	 * Steal thread0 kstack.
336	 */
337	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
338
339
340	virtual_avail = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET;
341	virtual_end = VM_MAX_KERNEL_ADDRESS;
342
343	/*
344	 * Steal some virtual space that will not be in kernel_segmap. This
345	 * va memory space will be used to map in kernel pages that are
346	 * outside the 512Meg region. Note that we only do this steal when
347	 * we do have memory in this region, that way for systems with
348	 * smaller memory we don't "steal" any va ranges :-)
349	 */
350	if (memory_larger_than_512meg) {
351		for (i = 0; i < MAXCPU; i++) {
352			sysmap_lmem[i].CMAP1 = PTE_G;
353			sysmap_lmem[i].CMAP2 = PTE_G;
354			sysmap_lmem[i].CADDR1 = (caddr_t)virtual_avail;
355			virtual_avail += PAGE_SIZE;
356			sysmap_lmem[i].CADDR2 = (caddr_t)virtual_avail;
357			virtual_avail += PAGE_SIZE;
358			sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
359			PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]);
360		}
361	}
362	virtual_sys_start = (caddr_t)virtual_avail;
363	/*
364	 * Allocate segment table for the kernel
365	 */
366	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
367
368	/*
369	 * Allocate second level page tables for the kernel
370	 */
371	nkpt = NKPT;
372	if (memory_larger_than_512meg) {
373		/*
374		 * If we have a large memory system we CANNOT afford to hit
375		 * pmap_growkernel() and allocate memory. Since we MAY end
376		 * up with a page that is NOT mappable. For that reason we
377		 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h)
378		 * this gives us 480meg of kernel virtual addresses at the
379		 * cost of 120 pages (each page gets us 4 Meg). Since the
380		 * kernel starts at virtual_avail, we can use this to
381		 * calculate how many entris are left from there to the end
382		 * of the segmap, we want to allocate all of it, which would
383		 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results
384		 * in about 256 entries or so instead of the 120.
385		 */
386		nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT);
387	}
388	pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt);
389
390	/*
391	 * The R[4-7]?00 stores only one copy of the Global bit in the
392	 * translation lookaside buffer for each 2 page entry. Thus invalid
393	 * entrys must have the Global bit set so when Entry LO and Entry HI
394	 * G bits are anded together they will produce a global bit to store
395	 * in the tlb.
396	 */
397	for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++)
398		*pte = PTE_G;
399
400	printf("Va=0x%x Ve=%x\n", virtual_avail, virtual_end);
401	/*
402	 * The segment table contains the KVA of the pages in the second
403	 * level page table.
404	 */
405	printf("init kernel_segmap va >> = %d nkpt:%d\n",
406	    (virtual_avail >> SEGSHIFT),
407	    nkpt);
408	for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++)
409		kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG));
410
411	for (i = 0; phys_avail[i + 2]; i += 2)
412		continue;
413	printf("avail_start:0x%x avail_end:0x%x\n",
414	    phys_avail[0], phys_avail[i + 1]);
415
416	/*
417	 * The kernel's pmap is statically allocated so we don't have to use
418	 * pmap_create, which is unlikely to work correctly at this part of
419	 * the boot sequence (XXX and which no longer exists).
420	 */
421	PMAP_LOCK_INIT(kernel_pmap);
422	kernel_pmap->pm_segtab = kernel_segmap;
423	kernel_pmap->pm_active = ~0;
424	TAILQ_INIT(&kernel_pmap->pm_pvlist);
425	kernel_pmap->pm_asid[PCPU_GET(cpuid)].asid = PMAP_ASID_RESERVED;
426	kernel_pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
427	pmap_max_asid = VMNUM_PIDS;
428	MachSetPID(0);
429}
430
431/*
432 * Initialize a vm_page's machine-dependent fields.
433 */
434void
435pmap_page_init(vm_page_t m)
436{
437
438	TAILQ_INIT(&m->md.pv_list);
439	m->md.pv_list_count = 0;
440	m->md.pv_flags = 0;
441}
442
443/*
444 *	Initialize the pmap module.
445 *	Called by vm_init, to initialize any structures that the pmap
446 *	system needs to map virtual memory.
447 *	pmap_init has been enhanced to support in a fairly consistant
448 *	way, discontiguous physical memory.
449 */
450void
451pmap_init(void)
452{
453
454	if (need_wired_tlb_page_pool)
455		pmap_init_fpage();
456	/*
457	 * Initialize the address space (zone) for the pv entries.  Set a
458	 * high water mark so that the system can recover from excessive
459	 * numbers of pv entries.
460	 */
461	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
462	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
463	pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
464	pv_entry_high_water = 9 * (pv_entry_max / 10);
465	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
466}
467
468/***************************************************
469 * Low level helper routines.....
470 ***************************************************/
471
472#if defined(PMAP_DIAGNOSTIC)
473
474/*
475 * This code checks for non-writeable/modified pages.
476 * This should be an invalid condition.
477 */
478static int
479pmap_nw_modified(pt_entry_t pte)
480{
481	if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO))
482		return (1);
483	else
484		return (0);
485}
486
487#endif
488
489static void
490pmap_invalidate_all(pmap_t pmap)
491{
492#ifdef SMP
493	smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap);
494}
495
496static void
497pmap_invalidate_all_action(void *arg)
498{
499	pmap_t pmap = (pmap_t)arg;
500
501#endif
502
503	if (pmap->pm_active & PCPU_GET(cpumask)) {
504		pmap_TLB_invalidate_all();
505	} else
506		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
507}
508
509struct pmap_invalidate_page_arg {
510	pmap_t pmap;
511	vm_offset_t va;
512};
513
514static __inline void
515pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
516{
517#ifdef SMP
518	struct pmap_invalidate_page_arg arg;
519
520	arg.pmap = pmap;
521	arg.va = va;
522
523	smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg);
524}
525
526static void
527pmap_invalidate_page_action(void *arg)
528{
529	pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap;
530	vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va;
531
532#endif
533
534	if (is_kernel_pmap(pmap)) {
535		pmap_TLB_invalidate_kernel(va);
536		return;
537	}
538	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
539		return;
540	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
541		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
542		return;
543	}
544	va = pmap_va_asid(pmap, (va & ~PGOFSET));
545	mips_TBIS(va);
546}
547
548static void
549pmap_TLB_invalidate_kernel(vm_offset_t va)
550{
551	u_int32_t pid;
552
553	MachTLBGetPID(pid);
554	va = va | (pid << VMTLB_PID_SHIFT);
555	mips_TBIS(va);
556}
557
558struct pmap_update_page_arg {
559	pmap_t pmap;
560	vm_offset_t va;
561	pt_entry_t pte;
562};
563
564void
565pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
566{
567#ifdef SMP
568	struct pmap_update_page_arg arg;
569
570	arg.pmap = pmap;
571	arg.va = va;
572	arg.pte = pte;
573
574	smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg);
575}
576
577static void
578pmap_update_page_action(void *arg)
579{
580	pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap;
581	vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va;
582	pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte;
583
584#endif
585	if (is_kernel_pmap(pmap)) {
586		pmap_TLB_update_kernel(va, pte);
587		return;
588	}
589	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
590		return;
591	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
592		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
593		return;
594	}
595	va = pmap_va_asid(pmap, va);
596	MachTLBUpdate(va, pte);
597}
598
599static void
600pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte)
601{
602	u_int32_t pid;
603
604	MachTLBGetPID(pid);
605	va = va | (pid << VMTLB_PID_SHIFT);
606
607	MachTLBUpdate(va, pte);
608}
609
610/*
611 *	Routine:	pmap_extract
612 *	Function:
613 *		Extract the physical page address associated
614 *		with the given map/virtual_address pair.
615 */
616vm_paddr_t
617pmap_extract(pmap_t pmap, vm_offset_t va)
618{
619	pt_entry_t *pte;
620	vm_offset_t retval = 0;
621
622	PMAP_LOCK(pmap);
623	pte = pmap_pte(pmap, va);
624	if (pte) {
625		retval = mips_tlbpfn_to_paddr(*pte) | (va & PAGE_MASK);
626	}
627	PMAP_UNLOCK(pmap);
628	return retval;
629}
630
631/*
632 *	Routine:	pmap_extract_and_hold
633 *	Function:
634 *		Atomically extract and hold the physical page
635 *		with the given pmap and virtual address pair
636 *		if that mapping permits the given protection.
637 */
638vm_page_t
639pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
640{
641	pt_entry_t pte;
642	vm_page_t m;
643
644	m = NULL;
645	vm_page_lock_queues();
646	PMAP_LOCK(pmap);
647
648	pte = *pmap_pte(pmap, va);
649	if (pte != 0 && pmap_pte_v(&pte) &&
650	    ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) {
651		m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte));
652		vm_page_hold(m);
653	}
654	vm_page_unlock_queues();
655	PMAP_UNLOCK(pmap);
656	return (m);
657}
658
659/***************************************************
660 * Low level mapping routines.....
661 ***************************************************/
662
663/*
664 * add a wired page to the kva
665 */
666 /* PMAP_INLINE */ void
667pmap_kenter(vm_offset_t va, vm_paddr_t pa)
668{
669	register pt_entry_t *pte;
670	pt_entry_t npte, opte;
671
672#ifdef PMAP_DEBUG
673	printf("pmap_kenter:  va: 0x%08x -> pa: 0x%08x\n", va, pa);
674#endif
675	npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W;
676
677	if (is_cacheable_mem(pa))
678		npte |= PTE_CACHE;
679	else
680		npte |= PTE_UNCACHED;
681
682	pte = pmap_pte(kernel_pmap, va);
683	opte = *pte;
684	*pte = npte;
685
686	pmap_update_page(kernel_pmap, va, npte);
687}
688
689/*
690 * remove a page from the kernel pagetables
691 */
692 /* PMAP_INLINE */ void
693pmap_kremove(vm_offset_t va)
694{
695	register pt_entry_t *pte;
696
697	pte = pmap_pte(kernel_pmap, va);
698	*pte = PTE_G;
699	pmap_invalidate_page(kernel_pmap, va);
700}
701
702/*
703 *	Used to map a range of physical addresses into kernel
704 *	virtual address space.
705 *
706 *	The value passed in '*virt' is a suggested virtual address for
707 *	the mapping. Architectures which can support a direct-mapped
708 *	physical to virtual region can return the appropriate address
709 *	within that region, leaving '*virt' unchanged. Other
710 *	architectures should map the pages starting at '*virt' and
711 *	update '*virt' with the first usable address after the mapped
712 *	region.
713 */
714vm_offset_t
715pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
716{
717	vm_offset_t va, sva;
718
719	va = sva = *virt;
720	while (start < end) {
721		pmap_kenter(va, start);
722		va += PAGE_SIZE;
723		start += PAGE_SIZE;
724	}
725	*virt = va;
726	return (sva);
727}
728
729/*
730 * Add a list of wired pages to the kva
731 * this routine is only used for temporary
732 * kernel mappings that do not need to have
733 * page modification or references recorded.
734 * Note that old mappings are simply written
735 * over.  The page *must* be wired.
736 */
737void
738pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
739{
740	int i;
741
742	for (i = 0; i < count; i++) {
743		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
744		va += PAGE_SIZE;
745	}
746}
747
748/*
749 * this routine jerks page mappings from the
750 * kernel -- it is meant only for temporary mappings.
751 */
752void
753pmap_qremove(vm_offset_t va, int count)
754{
755	while (count-- > 0) {
756		pmap_kremove(va);
757		va += PAGE_SIZE;
758	}
759}
760
761/***************************************************
762 * Page table page management routines.....
763 ***************************************************/
764
765/*
766 * floating pages (FPAGES) management routines
767 *
768 * FPAGES are the reserved virtual memory areas which can be
769 * mapped to any physical memory. This gets used typically
770 * in the following functions:
771 *
772 * pmap_zero_page
773 * pmap_copy_page
774 */
775
776/*
777 * Create the floating pages, aka FPAGES!
778 */
779static void
780pmap_init_fpage()
781{
782	vm_offset_t kva;
783	int i, j;
784	struct sysmaps *sysmaps;
785
786	/*
787	 * We allocate a total of (FPAGES*MAXCPU + FPAGES_SHARED + 1) pages
788	 * at first. FPAGES & FPAGES_SHARED should be EVEN Then we'll adjust
789	 * 'kva' to be even-page aligned so that the fpage area can be wired
790	 * in the TLB with a single TLB entry.
791	 */
792	kva = kmem_alloc_nofault(kernel_map,
793	    (FPAGES * MAXCPU + 1 + FPAGES_SHARED) * PAGE_SIZE);
794	if ((void *)kva == NULL)
795		panic("pmap_init_fpage: fpage allocation failed");
796
797	/*
798	 * Make up start at an even page number so we can wire down the
799	 * fpage area in the tlb with a single tlb entry.
800	 */
801	if ((((vm_offset_t)kva) >> PGSHIFT) & 1) {
802		/*
803		 * 'kva' is not even-page aligned. Adjust it and free the
804		 * first page which is unused.
805		 */
806		kmem_free(kernel_map, (vm_offset_t)kva, NBPG);
807		kva = ((vm_offset_t)kva) + NBPG;
808	} else {
809		/*
810		 * 'kva' is even page aligned. We don't need the last page,
811		 * free it.
812		 */
813		kmem_free(kernel_map, ((vm_offset_t)kva) + FSPACE, NBPG);
814	}
815
816	for (i = 0; i < MAXCPU; i++) {
817		sysmaps = &sysmaps_pcpu[i];
818		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
819
820		/* Assign FPAGES pages to the CPU */
821		for (j = 0; j < FPAGES; j++)
822			sysmaps->fp[j].kva = kva + (j) * PAGE_SIZE;
823		kva = ((vm_offset_t)kva) + (FPAGES * PAGE_SIZE);
824	}
825
826	/*
827	 * An additional 2 pages are needed, one for pmap_zero_page_idle()
828	 * and one for coredump. These pages are shared by all cpu's
829	 */
830	fpages_shared[PMAP_FPAGE3].kva = kva;
831	fpages_shared[PMAP_FPAGE_KENTER_TEMP].kva = kva + PAGE_SIZE;
832}
833
834/*
835 * Map the page to the fpage virtual address as specified thru' fpage id
836 */
837vm_offset_t
838pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, boolean_t check_unmaped)
839{
840	vm_offset_t kva;
841	register pt_entry_t *pte;
842	pt_entry_t npte;
843
844	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
845	/*
846	 * Check if the fpage is free
847	 */
848	if (fp->state) {
849		if (check_unmaped == TRUE)
850			pmap_unmap_fpage(pa, fp);
851		else
852			panic("pmap_map_fpage: fpage is busy");
853	}
854	fp->state = TRUE;
855	kva = fp->kva;
856
857	npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
858	pte = pmap_pte(kernel_pmap, kva);
859	*pte = npte;
860
861	pmap_TLB_update_kernel(kva, npte);
862
863	return (kva);
864}
865
866/*
867 * Unmap the page from the fpage virtual address as specified thru' fpage id
868 */
869void
870pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp)
871{
872	vm_offset_t kva;
873	register pt_entry_t *pte;
874
875	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
876	/*
877	 * Check if the fpage is busy
878	 */
879	if (!(fp->state)) {
880		panic("pmap_unmap_fpage: fpage is free");
881	}
882	kva = fp->kva;
883
884	pte = pmap_pte(kernel_pmap, kva);
885	*pte = PTE_G;
886	pmap_TLB_invalidate_kernel(kva);
887
888	fp->state = FALSE;
889
890	/*
891	 * Should there be any flush operation at the end?
892	 */
893}
894
895/*  Revision 1.507
896 *
897 * Simplify the reference counting of page table pages.	 Specifically, use
898 * the page table page's wired count rather than its hold count to contain
899 * the reference count.
900 */
901
902/*
903 * This routine unholds page table pages, and if the hold count
904 * drops to zero, then it decrements the wire count.
905 */
906static int
907_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
908{
909
910	/*
911	 * unmap the page table page
912	 */
913	pmap->pm_segtab[m->pindex] = 0;
914	--pmap->pm_stats.resident_count;
915
916	if (pmap->pm_ptphint == m)
917		pmap->pm_ptphint = NULL;
918
919	/*
920	 * If the page is finally unwired, simply free it.
921	 */
922	vm_page_free_zero(m);
923	atomic_subtract_int(&cnt.v_wire_count, 1);
924	return (1);
925}
926
927static PMAP_INLINE int
928pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
929{
930	--m->wire_count;
931	if (m->wire_count == 0)
932		return (_pmap_unwire_pte_hold(pmap, m));
933	else
934		return (0);
935}
936
937/*
938 * After removing a page table entry, this routine is used to
939 * conditionally free the page, and manage the hold/wire counts.
940 */
941static int
942pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
943{
944	unsigned ptepindex;
945	pd_entry_t pteva;
946
947	if (va >= VM_MAXUSER_ADDRESS)
948		return (0);
949
950	if (mpte == NULL) {
951		ptepindex = (va >> SEGSHIFT);
952		if (pmap->pm_ptphint &&
953		    (pmap->pm_ptphint->pindex == ptepindex)) {
954			mpte = pmap->pm_ptphint;
955		} else {
956			pteva = *pmap_pde(pmap, va);
957			mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
958			pmap->pm_ptphint = mpte;
959		}
960	}
961	return pmap_unwire_pte_hold(pmap, mpte);
962}
963
964void
965pmap_pinit0(pmap_t pmap)
966{
967	int i;
968
969	PMAP_LOCK_INIT(pmap);
970	pmap->pm_segtab = kernel_segmap;
971	pmap->pm_active = 0;
972	pmap->pm_ptphint = NULL;
973	for (i = 0; i < MAXCPU; i++) {
974		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
975		pmap->pm_asid[i].gen = 0;
976	}
977	PCPU_SET(curpmap, pmap);
978	TAILQ_INIT(&pmap->pm_pvlist);
979	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
980}
981
982/*
983 * Initialize a preallocated and zeroed pmap structure,
984 * such as one in a vmspace structure.
985 */
986int
987pmap_pinit(pmap_t pmap)
988{
989	vm_page_t ptdpg;
990	int i;
991	int req;
992
993	PMAP_LOCK_INIT(pmap);
994
995	req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
996	    VM_ALLOC_ZERO;
997
998#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
999	if (need_wired_tlb_page_pool)
1000		req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1001#endif
1002	/*
1003	 * allocate the page directory page
1004	 */
1005	while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL)
1006		VM_WAIT;
1007
1008	ptdpg->valid = VM_PAGE_BITS_ALL;
1009
1010	pmap->pm_segtab = (pd_entry_t *)
1011	    MIPS_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(ptdpg));
1012	if ((ptdpg->flags & PG_ZERO) == 0)
1013		bzero(pmap->pm_segtab, PAGE_SIZE);
1014
1015	pmap->pm_active = 0;
1016	pmap->pm_ptphint = NULL;
1017	for (i = 0; i < MAXCPU; i++) {
1018		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1019		pmap->pm_asid[i].gen = 0;
1020	}
1021	TAILQ_INIT(&pmap->pm_pvlist);
1022	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1023
1024	return (1);
1025}
1026
1027/*
1028 * this routine is called if the page table page is not
1029 * mapped correctly.
1030 */
1031static vm_page_t
1032_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1033{
1034	vm_offset_t pteva, ptepa;
1035	vm_page_t m;
1036	int req;
1037
1038	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1039	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1040	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1041
1042	req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ;
1043#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
1044	if (need_wired_tlb_page_pool)
1045		req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1046#endif
1047	/*
1048	 * Find or fabricate a new pagetable page
1049	 */
1050	if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) {
1051		if (flags & M_WAITOK) {
1052			PMAP_UNLOCK(pmap);
1053			vm_page_unlock_queues();
1054			VM_WAIT;
1055			vm_page_lock_queues();
1056			PMAP_LOCK(pmap);
1057		}
1058		/*
1059		 * Indicate the need to retry.	While waiting, the page
1060		 * table page may have been allocated.
1061		 */
1062		return (NULL);
1063	}
1064	if ((m->flags & PG_ZERO) == 0)
1065		pmap_zero_page(m);
1066
1067	KASSERT(m->queue == PQ_NONE,
1068	    ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1069
1070	/*
1071	 * Map the pagetable page into the process address space, if it
1072	 * isn't already there.
1073	 */
1074
1075	pmap->pm_stats.resident_count++;
1076
1077	ptepa = VM_PAGE_TO_PHYS(m);
1078	pteva = MIPS_PHYS_TO_CACHED(ptepa);
1079	pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva;
1080
1081	/*
1082	 * Set the page table hint
1083	 */
1084	pmap->pm_ptphint = m;
1085
1086	/*
1087	 * Kernel page tables are allocated in pmap_bootstrap() or
1088	 * pmap_growkernel().
1089	 */
1090	if (is_kernel_pmap(pmap))
1091		panic("_pmap_allocpte() called for kernel pmap\n");
1092
1093	m->valid = VM_PAGE_BITS_ALL;
1094	vm_page_flag_clear(m, PG_ZERO);
1095
1096	return (m);
1097}
1098
1099static vm_page_t
1100pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1101{
1102	unsigned ptepindex;
1103	vm_offset_t pteva;
1104	vm_page_t m;
1105
1106	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1107	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1108	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1109
1110	/*
1111	 * Calculate pagetable page index
1112	 */
1113	ptepindex = va >> SEGSHIFT;
1114retry:
1115	/*
1116	 * Get the page directory entry
1117	 */
1118	pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1119
1120	/*
1121	 * If the page table page is mapped, we just increment the hold
1122	 * count, and activate it.
1123	 */
1124	if (pteva) {
1125		/*
1126		 * In order to get the page table page, try the hint first.
1127		 */
1128		if (pmap->pm_ptphint &&
1129		    (pmap->pm_ptphint->pindex == ptepindex)) {
1130			m = pmap->pm_ptphint;
1131		} else {
1132			m = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
1133			pmap->pm_ptphint = m;
1134		}
1135		m->wire_count++;
1136	} else {
1137		/*
1138		 * Here if the pte page isn't mapped, or if it has been
1139		 * deallocated.
1140		 */
1141		m = _pmap_allocpte(pmap, ptepindex, flags);
1142		if (m == NULL && (flags & M_WAITOK))
1143			goto retry;
1144	}
1145	return m;
1146}
1147
1148
1149/***************************************************
1150* Pmap allocation/deallocation routines.
1151 ***************************************************/
1152/*
1153 *  Revision 1.397
1154 *  - Merged pmap_release and pmap_release_free_page.  When pmap_release is
1155 *    called only the page directory page(s) can be left in the pmap pte
1156 *    object, since all page table pages will have been freed by
1157 *    pmap_remove_pages and pmap_remove.  In addition, there can only be one
1158 *    reference to the pmap and the page directory is wired, so the page(s)
1159 *    can never be busy.  So all there is to do is clear the magic mappings
1160 *    from the page directory and free the page(s).
1161 */
1162
1163
1164/*
1165 * Release any resources held by the given physical map.
1166 * Called when a pmap initialized by pmap_pinit is being released.
1167 * Should only be called if the map contains no valid mappings.
1168 */
1169void
1170pmap_release(pmap_t pmap)
1171{
1172	vm_page_t ptdpg;
1173
1174	KASSERT(pmap->pm_stats.resident_count == 0,
1175	    ("pmap_release: pmap resident count %ld != 0",
1176	    pmap->pm_stats.resident_count));
1177
1178	ptdpg = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pmap->pm_segtab));
1179	ptdpg->wire_count--;
1180	atomic_subtract_int(&cnt.v_wire_count, 1);
1181	vm_page_free_zero(ptdpg);
1182}
1183
1184/*
1185 * grow the number of kernel page table entries, if needed
1186 */
1187void
1188pmap_growkernel(vm_offset_t addr)
1189{
1190	vm_offset_t ptppaddr;
1191	vm_page_t nkpg;
1192	pt_entry_t *pte;
1193	int i, req;
1194
1195	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1196	if (kernel_vm_end == 0) {
1197		kernel_vm_end = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET;
1198		nkpt = 0;
1199		while (segtab_pde(kernel_segmap, kernel_vm_end)) {
1200			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1201			    ~(PAGE_SIZE * NPTEPG - 1);
1202			nkpt++;
1203			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1204				kernel_vm_end = kernel_map->max_offset;
1205				break;
1206			}
1207		}
1208	}
1209	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1210	if (addr - 1 >= kernel_map->max_offset)
1211		addr = kernel_map->max_offset;
1212	while (kernel_vm_end < addr) {
1213		if (segtab_pde(kernel_segmap, kernel_vm_end)) {
1214			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1215			    ~(PAGE_SIZE * NPTEPG - 1);
1216			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1217				kernel_vm_end = kernel_map->max_offset;
1218				break;
1219			}
1220			continue;
1221		}
1222		/*
1223		 * This index is bogus, but out of the way
1224		 */
1225		req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
1226#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
1227		if (need_wired_tlb_page_pool)
1228			req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1229#endif
1230		nkpg = vm_page_alloc(NULL, nkpt, req);
1231		if (!nkpg)
1232			panic("pmap_growkernel: no memory to grow kernel");
1233
1234		nkpt++;
1235
1236		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1237		if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) {
1238			/*
1239			 * We need to do something here, but I am not sure
1240			 * what. We can access anything in the 0 - 512Meg
1241			 * region, but if we get a page to go in the kernel
1242			 * segmap that is outside of of that we really need
1243			 * to have another mapping beyond the temporary ones
1244			 * I have. Not sure how to do this yet. FIXME FIXME.
1245			 */
1246			panic("Gak, can't handle a k-page table outside of lower 512Meg");
1247		}
1248		pte = (pt_entry_t *)MIPS_PHYS_TO_CACHED(ptppaddr);
1249		segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte;
1250
1251		/*
1252		 * The R[4-7]?00 stores only one copy of the Global bit in
1253		 * the translation lookaside buffer for each 2 page entry.
1254		 * Thus invalid entrys must have the Global bit set so when
1255		 * Entry LO and Entry HI G bits are anded together they will
1256		 * produce a global bit to store in the tlb.
1257		 */
1258		for (i = 0; i < NPTEPG; i++, pte++)
1259			*pte = PTE_G;
1260
1261		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1262		    ~(PAGE_SIZE * NPTEPG - 1);
1263		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1264			kernel_vm_end = kernel_map->max_offset;
1265			break;
1266		}
1267	}
1268}
1269
1270/***************************************************
1271* page management routines.
1272 ***************************************************/
1273
1274/*
1275 * free the pv_entry back to the free list
1276 */
1277static PMAP_INLINE void
1278free_pv_entry(pv_entry_t pv)
1279{
1280
1281	pv_entry_count--;
1282	uma_zfree(pvzone, pv);
1283}
1284
1285/*
1286 * get a new pv_entry, allocating a block from the system
1287 * when needed.
1288 * the memory allocation is performed bypassing the malloc code
1289 * because of the possibility of allocations at interrupt time.
1290 */
1291static pv_entry_t
1292get_pv_entry(pmap_t locked_pmap)
1293{
1294	static const struct timeval printinterval = { 60, 0 };
1295	static struct timeval lastprint;
1296	struct vpgqueues *vpq;
1297	pt_entry_t *pte, oldpte;
1298	pmap_t pmap;
1299	pv_entry_t allocated_pv, next_pv, pv;
1300	vm_offset_t va;
1301	vm_page_t m;
1302
1303	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1304	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1305	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
1306	if (allocated_pv != NULL) {
1307		pv_entry_count++;
1308		if (pv_entry_count > pv_entry_high_water)
1309			pagedaemon_wakeup();
1310		else
1311			return (allocated_pv);
1312	}
1313	/*
1314	 * Reclaim pv entries: At first, destroy mappings to inactive
1315	 * pages.  After that, if a pv entry is still needed, destroy
1316	 * mappings to active pages.
1317	 */
1318	if (ratecheck(&lastprint, &printinterval))
1319		printf("Approaching the limit on PV entries, "
1320		    "increase the vm.pmap.shpgperproc tunable.\n");
1321	vpq = &vm_page_queues[PQ_INACTIVE];
1322retry:
1323	TAILQ_FOREACH(m, &vpq->pl, pageq) {
1324		if (m->hold_count || m->busy)
1325			continue;
1326		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
1327			va = pv->pv_va;
1328			pmap = pv->pv_pmap;
1329			/* Avoid deadlock and lock recursion. */
1330			if (pmap > locked_pmap)
1331				PMAP_LOCK(pmap);
1332			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
1333				continue;
1334			pmap->pm_stats.resident_count--;
1335			pte = pmap_pte(pmap, va);
1336			KASSERT(pte != NULL, ("pte"));
1337			oldpte = loadandclear((u_int *)pte);
1338			if (is_kernel_pmap(pmap))
1339				*pte = PTE_G;
1340			KASSERT((oldpte & PTE_W) == 0,
1341			    ("wired pte for unwired page"));
1342			if (m->md.pv_flags & PV_TABLE_REF)
1343				vm_page_flag_set(m, PG_REFERENCED);
1344			if (oldpte & PTE_M)
1345				vm_page_dirty(m);
1346			pmap_invalidate_page(pmap, va);
1347			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1348			m->md.pv_list_count--;
1349			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1350			if (TAILQ_EMPTY(&m->md.pv_list)) {
1351				vm_page_flag_clear(m, PG_WRITEABLE);
1352				m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1353			}
1354			pmap_unuse_pt(pmap, va, pv->pv_ptem);
1355			if (pmap != locked_pmap)
1356				PMAP_UNLOCK(pmap);
1357			if (allocated_pv == NULL)
1358				allocated_pv = pv;
1359			else
1360				free_pv_entry(pv);
1361		}
1362	}
1363	if (allocated_pv == NULL) {
1364		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
1365			vpq = &vm_page_queues[PQ_ACTIVE];
1366			goto retry;
1367		}
1368		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
1369	}
1370	return (allocated_pv);
1371}
1372
1373/*
1374 *  Revision 1.370
1375 *
1376 *  Move pmap_collect() out of the machine-dependent code, rename it
1377 *  to reflect its new location, and add page queue and flag locking.
1378 *
1379 *  Notes: (1) alpha, i386, and ia64 had identical implementations
1380 *  of pmap_collect() in terms of machine-independent interfaces;
1381 *  (2) sparc64 doesn't require it; (3) powerpc had it as a TODO.
1382 *
1383 *  MIPS implementation was identical to alpha [Junos 8.2]
1384 */
1385
1386/*
1387 * If it is the first entry on the list, it is actually
1388 * in the header and we must copy the following entry up
1389 * to the header.  Otherwise we must search the list for
1390 * the entry.  In either case we free the now unused entry.
1391 */
1392
1393static void
1394pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
1395{
1396	pv_entry_t pv;
1397
1398	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1399	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1400	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1401		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1402			if (pmap == pv->pv_pmap && va == pv->pv_va)
1403				break;
1404		}
1405	} else {
1406		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1407			if (va == pv->pv_va)
1408				break;
1409		}
1410	}
1411
1412	KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
1413	TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1414	m->md.pv_list_count--;
1415	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1416		vm_page_flag_clear(m, PG_WRITEABLE);
1417
1418	TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1419	free_pv_entry(pv);
1420}
1421
1422/*
1423 * Create a pv entry for page at pa for
1424 * (pmap, va).
1425 */
1426static void
1427pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m,
1428    boolean_t wired)
1429{
1430	pv_entry_t pv;
1431
1432	pv = get_pv_entry(pmap);
1433	pv->pv_va = va;
1434	pv->pv_pmap = pmap;
1435	pv->pv_ptem = mpte;
1436	pv->pv_wired = wired;
1437
1438	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1439	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1440	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1441	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1442	m->md.pv_list_count++;
1443}
1444
1445/*
1446 * Conditionally create a pv entry.
1447 */
1448static boolean_t
1449pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1450    vm_page_t m)
1451{
1452	pv_entry_t pv;
1453
1454	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1455	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1456	if (pv_entry_count < pv_entry_high_water &&
1457	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
1458		pv_entry_count++;
1459		pv->pv_va = va;
1460		pv->pv_pmap = pmap;
1461		pv->pv_ptem = mpte;
1462		pv->pv_wired = FALSE;
1463		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1464		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1465		m->md.pv_list_count++;
1466		return (TRUE);
1467	} else
1468		return (FALSE);
1469}
1470
1471/*
1472 * pmap_remove_pte: do the things to unmap a page in a process
1473 */
1474static int
1475pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
1476{
1477	pt_entry_t oldpte;
1478	vm_page_t m;
1479	vm_offset_t pa;
1480
1481	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1482	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1483
1484	oldpte = loadandclear((u_int *)ptq);
1485	if (is_kernel_pmap(pmap))
1486		*ptq = PTE_G;
1487
1488	if (oldpte & PTE_W)
1489		pmap->pm_stats.wired_count -= 1;
1490
1491	pmap->pm_stats.resident_count -= 1;
1492	pa = mips_tlbpfn_to_paddr(oldpte);
1493
1494	if (page_is_managed(pa)) {
1495		m = PHYS_TO_VM_PAGE(pa);
1496		if (oldpte & PTE_M) {
1497#if defined(PMAP_DIAGNOSTIC)
1498			if (pmap_nw_modified(oldpte)) {
1499				printf(
1500				    "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1501				    va, oldpte);
1502			}
1503#endif
1504			vm_page_dirty(m);
1505		}
1506		if (m->md.pv_flags & PV_TABLE_REF)
1507			vm_page_flag_set(m, PG_REFERENCED);
1508		m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1509
1510		pmap_remove_entry(pmap, m, va);
1511	}
1512	return pmap_unuse_pt(pmap, va, NULL);
1513}
1514
1515/*
1516 * Remove a single page from a process address space
1517 */
1518static void
1519pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1520{
1521	register pt_entry_t *ptq;
1522
1523	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1524	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1525	ptq = pmap_pte(pmap, va);
1526
1527	/*
1528	 * if there is no pte for this address, just skip it!!!
1529	 */
1530	if (!ptq || !pmap_pte_v(ptq)) {
1531		return;
1532	}
1533	/*
1534	 * get a local va for mappings for this pmap.
1535	 */
1536	(void)pmap_remove_pte(pmap, ptq, va);
1537	pmap_invalidate_page(pmap, va);
1538
1539	return;
1540}
1541
1542/*
1543 *	Remove the given range of addresses from the specified map.
1544 *
1545 *	It is assumed that the start and end are properly
1546 *	rounded to the page size.
1547 */
1548void
1549pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1550{
1551	vm_offset_t va, nva;
1552
1553	if (pmap == NULL)
1554		return;
1555
1556	if (pmap->pm_stats.resident_count == 0)
1557		return;
1558
1559	vm_page_lock_queues();
1560	PMAP_LOCK(pmap);
1561
1562	/*
1563	 * special handling of removing one page.  a very common operation
1564	 * and easy to short circuit some code.
1565	 */
1566	if ((sva + PAGE_SIZE) == eva) {
1567		pmap_remove_page(pmap, sva);
1568		goto out;
1569	}
1570	for (va = sva; va < eva; va = nva) {
1571		if (!*pmap_pde(pmap, va)) {
1572			nva = mips_segtrunc(va + MIPS_SEGSIZE);
1573			continue;
1574		}
1575		pmap_remove_page(pmap, va);
1576		nva = va + PAGE_SIZE;
1577	}
1578
1579out:
1580	vm_page_unlock_queues();
1581	PMAP_UNLOCK(pmap);
1582}
1583
1584/*
1585 *	Routine:	pmap_remove_all
1586 *	Function:
1587 *		Removes this physical page from
1588 *		all physical maps in which it resides.
1589 *		Reflects back modify bits to the pager.
1590 *
1591 *	Notes:
1592 *		Original versions of this routine were very
1593 *		inefficient because they iteratively called
1594 *		pmap_remove (slow...)
1595 */
1596
1597void
1598pmap_remove_all(vm_page_t m)
1599{
1600	register pv_entry_t pv;
1601	register pt_entry_t *pte, tpte;
1602
1603	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1604	    ("pmap_remove_all: page %p is fictitious", m));
1605	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1606
1607	if (m->md.pv_flags & PV_TABLE_REF)
1608		vm_page_flag_set(m, PG_REFERENCED);
1609
1610	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1611		PMAP_LOCK(pv->pv_pmap);
1612		pv->pv_pmap->pm_stats.resident_count--;
1613
1614		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1615
1616		tpte = loadandclear((u_int *)pte);
1617		if (is_kernel_pmap(pv->pv_pmap))
1618			*pte = PTE_G;
1619
1620		if (tpte & PTE_W)
1621			pv->pv_pmap->pm_stats.wired_count--;
1622
1623		/*
1624		 * Update the vm_page_t clean and reference bits.
1625		 */
1626		if (tpte & PTE_M) {
1627#if defined(PMAP_DIAGNOSTIC)
1628			if (pmap_nw_modified(tpte)) {
1629				printf(
1630				    "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1631				    pv->pv_va, tpte);
1632			}
1633#endif
1634			vm_page_dirty(m);
1635		}
1636		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1637
1638		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1639		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1640		m->md.pv_list_count--;
1641		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1642		PMAP_UNLOCK(pv->pv_pmap);
1643		free_pv_entry(pv);
1644	}
1645
1646	vm_page_flag_clear(m, PG_WRITEABLE);
1647	m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1648}
1649
1650/*
1651 *	Set the physical protection on the
1652 *	specified range of this map as requested.
1653 */
1654void
1655pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1656{
1657	pt_entry_t *pte;
1658
1659	if (pmap == NULL)
1660		return;
1661
1662	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1663		pmap_remove(pmap, sva, eva);
1664		return;
1665	}
1666	if (prot & VM_PROT_WRITE)
1667		return;
1668
1669	vm_page_lock_queues();
1670	PMAP_LOCK(pmap);
1671	while (sva < eva) {
1672		pt_entry_t pbits, obits;
1673		vm_page_t m;
1674		vm_offset_t pa;
1675
1676		/*
1677		 * If segment table entry is empty, skip this segment.
1678		 */
1679		if (!*pmap_pde(pmap, sva)) {
1680			sva = mips_segtrunc(sva + MIPS_SEGSIZE);
1681			continue;
1682		}
1683		/*
1684		 * If pte is invalid, skip this page
1685		 */
1686		pte = pmap_pte(pmap, sva);
1687		if (!pmap_pte_v(pte)) {
1688			sva += PAGE_SIZE;
1689			continue;
1690		}
1691retry:
1692		obits = pbits = *pte;
1693		pa = mips_tlbpfn_to_paddr(pbits);
1694
1695		if (page_is_managed(pa)) {
1696			m = PHYS_TO_VM_PAGE(pa);
1697			if (m->md.pv_flags & PV_TABLE_REF) {
1698				vm_page_flag_set(m, PG_REFERENCED);
1699				m->md.pv_flags &= ~PV_TABLE_REF;
1700			}
1701			if (pbits & PTE_M) {
1702				vm_page_dirty(m);
1703				m->md.pv_flags &= ~PV_TABLE_MOD;
1704			}
1705		}
1706		pbits = (pbits & ~PTE_M) | PTE_RO;
1707
1708		if (pbits != *pte) {
1709			if (!atomic_cmpset_int((u_int *)pte, obits, pbits))
1710				goto retry;
1711			pmap_update_page(pmap, sva, pbits);
1712		}
1713		sva += PAGE_SIZE;
1714	}
1715	vm_page_unlock_queues();
1716	PMAP_UNLOCK(pmap);
1717}
1718
1719/*
1720 *	Insert the given physical page (p) at
1721 *	the specified virtual address (v) in the
1722 *	target physical map with the protection requested.
1723 *
1724 *	If specified, the page will be wired down, meaning
1725 *	that the related pte can not be reclaimed.
1726 *
1727 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1728 *	or lose information.  That is, this routine must actually
1729 *	insert this page into the given map NOW.
1730 */
1731void
1732pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t fault_type, vm_page_t m, vm_prot_t prot,
1733    boolean_t wired)
1734{
1735	vm_offset_t pa, opa;
1736	register pt_entry_t *pte;
1737	pt_entry_t origpte, newpte;
1738	vm_page_t mpte, om;
1739	int rw = 0;
1740
1741	if (pmap == NULL)
1742		return;
1743
1744	va &= ~PAGE_MASK;
1745#ifdef PMAP_DIAGNOSTIC
1746	if (va > VM_MAX_KERNEL_ADDRESS)
1747		panic("pmap_enter: toobig");
1748#endif
1749
1750	mpte = NULL;
1751
1752	vm_page_lock_queues();
1753	PMAP_LOCK(pmap);
1754
1755	/*
1756	 * In the case that a page table page is not resident, we are
1757	 * creating it here.
1758	 */
1759	if (va < VM_MAXUSER_ADDRESS) {
1760		mpte = pmap_allocpte(pmap, va, M_WAITOK);
1761	}
1762	pte = pmap_pte(pmap, va);
1763
1764	/*
1765	 * Page Directory table entry not valid, we need a new PT page
1766	 */
1767	if (pte == NULL) {
1768		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
1769		    (void *)pmap->pm_segtab, va);
1770	}
1771	pa = VM_PAGE_TO_PHYS(m);
1772	om = NULL;
1773	origpte = *pte;
1774	opa = mips_tlbpfn_to_paddr(origpte);
1775
1776	/*
1777	 * Mapping has not changed, must be protection or wiring change.
1778	 */
1779	if ((origpte & PTE_V) && (opa == pa)) {
1780		/*
1781		 * Wiring change, just update stats. We don't worry about
1782		 * wiring PT pages as they remain resident as long as there
1783		 * are valid mappings in them. Hence, if a user page is
1784		 * wired, the PT page will be also.
1785		 */
1786		if (wired && ((origpte & PTE_W) == 0))
1787			pmap->pm_stats.wired_count++;
1788		else if (!wired && (origpte & PTE_W))
1789			pmap->pm_stats.wired_count--;
1790
1791#if defined(PMAP_DIAGNOSTIC)
1792		if (pmap_nw_modified(origpte)) {
1793			printf(
1794			    "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
1795			    va, origpte);
1796		}
1797#endif
1798
1799		/*
1800		 * Remove extra pte reference
1801		 */
1802		if (mpte)
1803			mpte->wire_count--;
1804
1805		/*
1806		 * We might be turning off write access to the page, so we
1807		 * go ahead and sense modify status.
1808		 */
1809		if (page_is_managed(opa)) {
1810			om = m;
1811		}
1812		goto validate;
1813	}
1814	/*
1815	 * Mapping has changed, invalidate old range and fall through to
1816	 * handle validating new mapping.
1817	 */
1818	if (opa) {
1819		if (origpte & PTE_W)
1820			pmap->pm_stats.wired_count--;
1821
1822		if (page_is_managed(opa)) {
1823			om = PHYS_TO_VM_PAGE(opa);
1824			pmap_remove_entry(pmap, om, va);
1825		}
1826		if (mpte != NULL) {
1827			mpte->wire_count--;
1828			KASSERT(mpte->wire_count > 0,
1829			    ("pmap_enter: missing reference to page table page,"
1830			    " va: 0x%x", va));
1831		}
1832	} else
1833		pmap->pm_stats.resident_count++;
1834
1835	/*
1836	 * Enter on the PV list if part of our managed memory. Note that we
1837	 * raise IPL while manipulating pv_table since pmap_enter can be
1838	 * called at interrupt time.
1839	 */
1840	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1841		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1842		    ("pmap_enter: managed mapping within the clean submap"));
1843		pmap_insert_entry(pmap, va, mpte, m, wired);
1844	}
1845	/*
1846	 * Increment counters
1847	 */
1848	if (wired)
1849		pmap->pm_stats.wired_count++;
1850
1851validate:
1852	if ((access & VM_PROT_WRITE) != 0)
1853		m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF;
1854	rw = init_pte_prot(va, m, prot);
1855
1856#ifdef PMAP_DEBUG
1857	printf("pmap_enter:  va: 0x%08x -> pa: 0x%08x\n", va, pa);
1858#endif
1859	/*
1860	 * Now validate mapping with desired protection/wiring.
1861	 */
1862	newpte = mips_paddr_to_tlbpfn(pa) | rw | PTE_V;
1863
1864	if (is_cacheable_mem(pa))
1865		newpte |= PTE_CACHE;
1866	else
1867		newpte |= PTE_UNCACHED;
1868
1869	if (wired)
1870		newpte |= PTE_W;
1871
1872	if (is_kernel_pmap(pmap)) {
1873	         newpte |= PTE_G;
1874	}
1875
1876	/*
1877	 * if the mapping or permission bits are different, we need to
1878	 * update the pte.
1879	 */
1880	if (origpte != newpte) {
1881		if (origpte & PTE_V) {
1882			*pte = newpte;
1883			if (page_is_managed(opa) && (opa != pa)) {
1884				if (om->md.pv_flags & PV_TABLE_REF)
1885					vm_page_flag_set(om, PG_REFERENCED);
1886				om->md.pv_flags &=
1887				    ~(PV_TABLE_REF | PV_TABLE_MOD);
1888			}
1889			if (origpte & PTE_M) {
1890				KASSERT((origpte & PTE_RW),
1891				    ("pmap_enter: modified page not writable:"
1892				    " va: 0x%x, pte: 0x%lx", va, origpte));
1893				if (page_is_managed(opa))
1894					vm_page_dirty(om);
1895			}
1896		} else {
1897			*pte = newpte;
1898		}
1899	}
1900	pmap_update_page(pmap, va, newpte);
1901
1902	/*
1903	 * Sync I & D caches for executable pages.  Do this only if the the
1904	 * target pmap belongs to the current process.  Otherwise, an
1905	 * unresolvable TLB miss may occur.
1906	 */
1907	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
1908	    (prot & VM_PROT_EXECUTE)) {
1909		mips_icache_sync_range(va, NBPG);
1910		mips_dcache_wbinv_range(va, NBPG);
1911	}
1912	vm_page_unlock_queues();
1913	PMAP_UNLOCK(pmap);
1914}
1915
1916/*
1917 * this code makes some *MAJOR* assumptions:
1918 * 1. Current pmap & pmap exists.
1919 * 2. Not wired.
1920 * 3. Read access.
1921 * 4. No page table pages.
1922 * but is *MUCH* faster than pmap_enter...
1923 */
1924
1925void
1926pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1927{
1928
1929	PMAP_LOCK(pmap);
1930	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
1931	PMAP_UNLOCK(pmap);
1932}
1933
1934static vm_page_t
1935pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1936    vm_prot_t prot, vm_page_t mpte)
1937{
1938	pt_entry_t *pte;
1939	vm_offset_t pa;
1940
1941	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1942	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1943	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1944	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1945	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1946
1947	/*
1948	 * In the case that a page table page is not resident, we are
1949	 * creating it here.
1950	 */
1951	if (va < VM_MAXUSER_ADDRESS) {
1952		unsigned ptepindex;
1953		vm_offset_t pteva;
1954
1955		/*
1956		 * Calculate pagetable page index
1957		 */
1958		ptepindex = va >> SEGSHIFT;
1959		if (mpte && (mpte->pindex == ptepindex)) {
1960			mpte->wire_count++;
1961		} else {
1962			/*
1963			 * Get the page directory entry
1964			 */
1965			pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1966
1967			/*
1968			 * If the page table page is mapped, we just
1969			 * increment the hold count, and activate it.
1970			 */
1971			if (pteva) {
1972				if (pmap->pm_ptphint &&
1973				    (pmap->pm_ptphint->pindex == ptepindex)) {
1974					mpte = pmap->pm_ptphint;
1975				} else {
1976					mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
1977					pmap->pm_ptphint = mpte;
1978				}
1979				mpte->wire_count++;
1980			} else {
1981				mpte = _pmap_allocpte(pmap, ptepindex,
1982				    M_NOWAIT);
1983				if (mpte == NULL)
1984					return (mpte);
1985			}
1986		}
1987	} else {
1988		mpte = NULL;
1989	}
1990
1991	pte = pmap_pte(pmap, va);
1992	if (pmap_pte_v(pte)) {
1993		if (mpte != NULL) {
1994			mpte->wire_count--;
1995			mpte = NULL;
1996		}
1997		return (mpte);
1998	}
1999
2000	/*
2001	 * Enter on the PV list if part of our managed memory.
2002	 */
2003	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
2004	    !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2005		if (mpte != NULL) {
2006			pmap_unwire_pte_hold(pmap, mpte);
2007			mpte = NULL;
2008		}
2009		return (mpte);
2010	}
2011
2012	/*
2013	 * Increment counters
2014	 */
2015	pmap->pm_stats.resident_count++;
2016
2017	pa = VM_PAGE_TO_PHYS(m);
2018
2019	/*
2020	 * Now validate mapping with RO protection
2021	 */
2022	*pte = mips_paddr_to_tlbpfn(pa) | PTE_V;
2023
2024	if (is_cacheable_mem(pa))
2025		*pte |= PTE_CACHE;
2026	else
2027		*pte |= PTE_UNCACHED;
2028
2029	if (is_kernel_pmap(pmap))
2030		*pte |= PTE_G;
2031	else {
2032		*pte |= PTE_RO;
2033		/*
2034		 * Sync I & D caches.  Do this only if the the target pmap
2035		 * belongs to the current process.  Otherwise, an
2036		 * unresolvable TLB miss may occur. */
2037		if (pmap == &curproc->p_vmspace->vm_pmap) {
2038			va &= ~PAGE_MASK;
2039			mips_icache_sync_range(va, NBPG);
2040			mips_dcache_wbinv_range(va, NBPG);
2041		}
2042	}
2043	return (mpte);
2044}
2045
2046/*
2047 * Make a temporary mapping for a physical address.  This is only intended
2048 * to be used for panic dumps.
2049 */
2050void *
2051pmap_kenter_temporary(vm_paddr_t pa, int i)
2052{
2053	vm_offset_t va;
2054
2055	if (i != 0)
2056		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2057		    __func__);
2058
2059#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2060	if (need_wired_tlb_page_pool) {
2061		va = pmap_map_fpage(pa, &fpages_shared[PMAP_FPAGE_KENTER_TEMP],
2062		    TRUE);
2063	} else
2064#endif
2065	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
2066		va = MIPS_PHYS_TO_CACHED(pa);
2067	} else {
2068		int cpu;
2069		struct local_sysmaps *sysm;
2070
2071		cpu = PCPU_GET(cpuid);
2072		sysm = &sysmap_lmem[cpu];
2073		/* Since this is for the debugger, no locks or any other fun */
2074		sysm->CMAP1 = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2075		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2076		sysm->valid1 = 1;
2077		va = (vm_offset_t)sysm->CADDR1;
2078	}
2079	return ((void *)va);
2080}
2081
2082void
2083pmap_kenter_temporary_free(vm_paddr_t pa)
2084{
2085	int cpu;
2086	struct local_sysmaps *sysm;
2087
2088	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
2089		/* nothing to do for this case */
2090		return;
2091	}
2092	cpu = PCPU_GET(cpuid);
2093	sysm = &sysmap_lmem[cpu];
2094	if (sysm->valid1) {
2095		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2096		sysm->CMAP1 = 0;
2097		sysm->valid1 = 0;
2098	}
2099}
2100
2101/*
2102 * Moved the code to Machine Independent
2103 *	 vm_map_pmap_enter()
2104 */
2105
2106/*
2107 * Maps a sequence of resident pages belonging to the same object.
2108 * The sequence begins with the given page m_start.  This page is
2109 * mapped at the given virtual address start.  Each subsequent page is
2110 * mapped at a virtual address that is offset from start by the same
2111 * amount as the page is offset from m_start within the object.  The
2112 * last page in the sequence is the page with the largest offset from
2113 * m_start that can be mapped at a virtual address less than the given
2114 * virtual address end.  Not every virtual page between start and end
2115 * is mapped; only those for which a resident page exists with the
2116 * corresponding offset from m_start are mapped.
2117 */
2118void
2119pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2120    vm_page_t m_start, vm_prot_t prot)
2121{
2122	vm_page_t m, mpte;
2123	vm_pindex_t diff, psize;
2124
2125	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
2126	psize = atop(end - start);
2127	mpte = NULL;
2128	m = m_start;
2129	PMAP_LOCK(pmap);
2130	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2131		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2132		    prot, mpte);
2133		m = TAILQ_NEXT(m, listq);
2134	}
2135 	PMAP_UNLOCK(pmap);
2136}
2137
2138/*
2139 * pmap_object_init_pt preloads the ptes for a given object
2140 * into the specified pmap.  This eliminates the blast of soft
2141 * faults on process startup and immediately after an mmap.
2142 */
2143void
2144pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2145    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2146{
2147	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2148	KASSERT(object->type == OBJT_DEVICE,
2149	    ("pmap_object_init_pt: non-device object"));
2150}
2151
2152/*
2153 *	Routine:	pmap_change_wiring
2154 *	Function:	Change the wiring attribute for a map/virtual-address
2155 *			pair.
2156 *	In/out conditions:
2157 *			The mapping must already exist in the pmap.
2158 */
2159void
2160pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2161{
2162	register pt_entry_t *pte;
2163
2164	if (pmap == NULL)
2165		return;
2166
2167	PMAP_LOCK(pmap);
2168	pte = pmap_pte(pmap, va);
2169
2170	if (wired && !pmap_pte_w(pte))
2171		pmap->pm_stats.wired_count++;
2172	else if (!wired && pmap_pte_w(pte))
2173		pmap->pm_stats.wired_count--;
2174
2175	/*
2176	 * Wiring is not a hardware characteristic so there is no need to
2177	 * invalidate TLB.
2178	 */
2179	pmap_pte_set_w(pte, wired);
2180	PMAP_UNLOCK(pmap);
2181}
2182
2183/*
2184 *	Copy the range specified by src_addr/len
2185 *	from the source map to the range dst_addr/len
2186 *	in the destination map.
2187 *
2188 *	This routine is only advisory and need not do anything.
2189 */
2190
2191void
2192pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2193    vm_size_t len, vm_offset_t src_addr)
2194{
2195}
2196
2197/*
2198 *	pmap_zero_page zeros the specified hardware page by mapping
2199 *	the page into KVM and using bzero to clear its contents.
2200 */
2201void
2202pmap_zero_page(vm_page_t m)
2203{
2204	vm_offset_t va;
2205	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2206
2207#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2208	if (need_wired_tlb_page_pool) {
2209		struct fpage *fp1;
2210		struct sysmaps *sysmaps;
2211
2212		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2213		mtx_lock(&sysmaps->lock);
2214		sched_pin();
2215
2216		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2217		va = pmap_map_fpage(phys, fp1, FALSE);
2218		bzero((caddr_t)va, PAGE_SIZE);
2219		pmap_unmap_fpage(phys, fp1);
2220		sched_unpin();
2221		mtx_unlock(&sysmaps->lock);
2222		/*
2223		 * Should you do cache flush?
2224		 */
2225	} else
2226#endif
2227	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2228
2229		va = MIPS_PHYS_TO_UNCACHED(phys);
2230
2231		bzero((caddr_t)va, PAGE_SIZE);
2232		mips_dcache_wbinv_range(va, PAGE_SIZE);
2233	} else {
2234		int cpu;
2235		struct local_sysmaps *sysm;
2236
2237		cpu = PCPU_GET(cpuid);
2238		sysm = &sysmap_lmem[cpu];
2239		PMAP_LGMEM_LOCK(sysm);
2240		sched_pin();
2241		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2242		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2243		sysm->valid1 = 1;
2244		bzero(sysm->CADDR1, PAGE_SIZE);
2245		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2246		sysm->CMAP1 = 0;
2247		sysm->valid1 = 0;
2248		sched_unpin();
2249		PMAP_LGMEM_UNLOCK(sysm);
2250	}
2251
2252}
2253
2254/*
2255 *	pmap_zero_page_area zeros the specified hardware page by mapping
2256 *	the page into KVM and using bzero to clear its contents.
2257 *
2258 *	off and size may not cover an area beyond a single hardware page.
2259 */
2260void
2261pmap_zero_page_area(vm_page_t m, int off, int size)
2262{
2263	vm_offset_t va;
2264	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2265
2266#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2267	if (need_wired_tlb_page_pool) {
2268		struct fpage *fp1;
2269		struct sysmaps *sysmaps;
2270
2271		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2272		mtx_lock(&sysmaps->lock);
2273		sched_pin();
2274
2275		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2276		va = pmap_map_fpage(phys, fp1, FALSE);
2277		bzero((caddr_t)va + off, size);
2278		pmap_unmap_fpage(phys, fp1);
2279
2280		sched_unpin();
2281		mtx_unlock(&sysmaps->lock);
2282	} else
2283#endif
2284	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2285		va = MIPS_PHYS_TO_UNCACHED(phys);
2286		bzero((char *)(caddr_t)va + off, size);
2287		mips_dcache_wbinv_range(va + off, size);
2288	} else {
2289		int cpu;
2290		struct local_sysmaps *sysm;
2291
2292		cpu = PCPU_GET(cpuid);
2293		sysm = &sysmap_lmem[cpu];
2294		PMAP_LGMEM_LOCK(sysm);
2295		sched_pin();
2296		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2297		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2298		sysm->valid1 = 1;
2299		bzero((char *)sysm->CADDR1 + off, size);
2300		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2301		sysm->CMAP1 = 0;
2302		sysm->valid1 = 0;
2303		sched_unpin();
2304		PMAP_LGMEM_UNLOCK(sysm);
2305	}
2306}
2307
2308void
2309pmap_zero_page_idle(vm_page_t m)
2310{
2311	vm_offset_t va;
2312	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2313
2314#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2315	if (need_wired_tlb_page_pool) {
2316		sched_pin();
2317		va = pmap_map_fpage(phys, &fpages_shared[PMAP_FPAGE3], FALSE);
2318		bzero((caddr_t)va, PAGE_SIZE);
2319		pmap_unmap_fpage(phys, &fpages_shared[PMAP_FPAGE3]);
2320		sched_unpin();
2321	} else
2322#endif
2323	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2324		va = MIPS_PHYS_TO_UNCACHED(phys);
2325		bzero((caddr_t)va, PAGE_SIZE);
2326		mips_dcache_wbinv_range(va, PAGE_SIZE);
2327	} else {
2328		int cpu;
2329		struct local_sysmaps *sysm;
2330
2331		cpu = PCPU_GET(cpuid);
2332		sysm = &sysmap_lmem[cpu];
2333		PMAP_LGMEM_LOCK(sysm);
2334		sched_pin();
2335		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2336		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2337		sysm->valid1 = 1;
2338		bzero(sysm->CADDR1, PAGE_SIZE);
2339		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2340		sysm->CMAP1 = 0;
2341		sysm->valid1 = 0;
2342		sched_unpin();
2343		PMAP_LGMEM_UNLOCK(sysm);
2344	}
2345
2346}
2347
2348/*
2349 *	pmap_copy_page copies the specified (machine independent)
2350 *	page by mapping the page into virtual memory and using
2351 *	bcopy to copy the page, one machine dependent page at a
2352 *	time.
2353 */
2354void
2355pmap_copy_page(vm_page_t src, vm_page_t dst)
2356{
2357	vm_offset_t va_src, va_dst;
2358	vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src);
2359	vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst);
2360
2361
2362#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2363	if (need_wired_tlb_page_pool) {
2364		struct fpage *fp1, *fp2;
2365		struct sysmaps *sysmaps;
2366
2367		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2368		mtx_lock(&sysmaps->lock);
2369		sched_pin();
2370
2371		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2372		fp2 = &sysmaps->fp[PMAP_FPAGE2];
2373
2374		va_src = pmap_map_fpage(phy_src, fp1, FALSE);
2375		va_dst = pmap_map_fpage(phy_dst, fp2, FALSE);
2376
2377		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2378
2379		pmap_unmap_fpage(phy_src, fp1);
2380		pmap_unmap_fpage(phy_dst, fp2);
2381		sched_unpin();
2382		mtx_unlock(&sysmaps->lock);
2383
2384		/*
2385		 * Should you flush the cache?
2386		 */
2387	} else
2388#endif
2389	{
2390		if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) {
2391			/* easy case, all can be accessed via KSEG0 */
2392			va_src = MIPS_PHYS_TO_CACHED(phy_src);
2393			va_dst = MIPS_PHYS_TO_CACHED(phy_dst);
2394			bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2395		} else {
2396			int cpu;
2397			struct local_sysmaps *sysm;
2398
2399			cpu = PCPU_GET(cpuid);
2400			sysm = &sysmap_lmem[cpu];
2401			PMAP_LGMEM_LOCK(sysm);
2402			sched_pin();
2403			if (phy_src < MIPS_KSEG0_LARGEST_PHYS) {
2404				/* one side needs mapping - dest */
2405				va_src = MIPS_PHYS_TO_CACHED(phy_src);
2406				sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2407				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2);
2408				sysm->valid2 = 2;
2409				va_dst = (vm_offset_t)sysm->CADDR2;
2410			} else if (phy_dst < MIPS_KSEG0_LARGEST_PHYS) {
2411				/* one side needs mapping - src */
2412				va_dst = MIPS_PHYS_TO_CACHED(phy_dst);
2413				sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2414				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2415				va_src = (vm_offset_t)sysm->CADDR1;
2416				sysm->valid1 = 1;
2417			} else {
2418				/* all need mapping */
2419				sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2420				sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2421				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2422				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2);
2423				sysm->valid1 = sysm->valid2 = 1;
2424				va_src = (vm_offset_t)sysm->CADDR1;
2425				va_dst = (vm_offset_t)sysm->CADDR2;
2426			}
2427			bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2428			if (sysm->valid1) {
2429				pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2430				sysm->CMAP1 = 0;
2431				sysm->valid1 = 0;
2432			}
2433			if (sysm->valid2) {
2434				pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR2);
2435				sysm->CMAP2 = 0;
2436				sysm->valid2 = 0;
2437			}
2438			sched_unpin();
2439			PMAP_LGMEM_UNLOCK(sysm);
2440		}
2441	}
2442}
2443
2444/*
2445 * Returns true if the pmap's pv is one of the first
2446 * 16 pvs linked to from this page.  This count may
2447 * be changed upwards or downwards in the future; it
2448 * is only necessary that true be returned for a small
2449 * subset of pmaps for proper page aging.
2450 */
2451boolean_t
2452pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2453{
2454	pv_entry_t pv;
2455	int loops = 0;
2456
2457	if (m->flags & PG_FICTITIOUS)
2458		return FALSE;
2459
2460	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2461	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2462		if (pv->pv_pmap == pmap) {
2463			return TRUE;
2464		}
2465		loops++;
2466		if (loops >= 16)
2467			break;
2468	}
2469	return (FALSE);
2470}
2471
2472/*
2473 * Remove all pages from specified address space
2474 * this aids process exit speeds.  Also, this code
2475 * is special cased for current process only, but
2476 * can have the more generic (and slightly slower)
2477 * mode enabled.  This is much faster than pmap_remove
2478 * in the case of running down an entire address space.
2479 */
2480void
2481pmap_remove_pages(pmap_t pmap)
2482{
2483	pt_entry_t *pte, tpte;
2484	pv_entry_t pv, npv;
2485	vm_page_t m;
2486
2487	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2488		printf("warning: pmap_remove_pages called with non-current pmap\n");
2489		return;
2490	}
2491	vm_page_lock_queues();
2492	PMAP_LOCK(pmap);
2493	sched_pin();
2494	//XXX need to be TAILQ_FOREACH_SAFE ?
2495	    for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2496	    pv;
2497	    pv = npv) {
2498
2499		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2500		if (!pmap_pte_v(pte))
2501			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2502		tpte = *pte;
2503
2504/*
2505 * We cannot remove wired pages from a process' mapping at this time
2506 */
2507		if (tpte & PTE_W) {
2508			npv = TAILQ_NEXT(pv, pv_plist);
2509			continue;
2510		}
2511		*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2512
2513		m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte));
2514
2515		KASSERT(m < &vm_page_array[vm_page_array_size],
2516		    ("pmap_remove_pages: bad tpte %lx", tpte));
2517
2518		pv->pv_pmap->pm_stats.resident_count--;
2519
2520		/*
2521		 * Update the vm_page_t clean and reference bits.
2522		 */
2523		if (tpte & PTE_M) {
2524			vm_page_dirty(m);
2525		}
2526		npv = TAILQ_NEXT(pv, pv_plist);
2527		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2528
2529		m->md.pv_list_count--;
2530		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2531		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
2532			vm_page_flag_clear(m, PG_WRITEABLE);
2533		}
2534		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2535		free_pv_entry(pv);
2536	}
2537	sched_unpin();
2538	pmap_invalidate_all(pmap);
2539	PMAP_UNLOCK(pmap);
2540	vm_page_unlock_queues();
2541}
2542
2543/*
2544 * pmap_testbit tests bits in pte's
2545 * note that the testbit/changebit routines are inline,
2546 * and a lot of things compile-time evaluate.
2547 */
2548static boolean_t
2549pmap_testbit(vm_page_t m, int bit)
2550{
2551	pv_entry_t pv;
2552	pt_entry_t *pte;
2553	boolean_t rv = FALSE;
2554
2555	if (m->flags & PG_FICTITIOUS)
2556		return rv;
2557
2558	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2559		return rv;
2560
2561	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2562	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2563#if defined(PMAP_DIAGNOSTIC)
2564		if (!pv->pv_pmap) {
2565			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2566			continue;
2567		}
2568#endif
2569		PMAP_LOCK(pv->pv_pmap);
2570		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2571		rv = (*pte & bit) != 0;
2572		PMAP_UNLOCK(pv->pv_pmap);
2573		if (rv)
2574			break;
2575	}
2576	return (rv);
2577}
2578
2579/*
2580 * this routine is used to modify bits in ptes
2581 */
2582static __inline void
2583pmap_changebit(vm_page_t m, int bit, boolean_t setem)
2584{
2585	register pv_entry_t pv;
2586	register pt_entry_t *pte;
2587
2588	if (m->flags & PG_FICTITIOUS)
2589		return;
2590
2591	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2592	/*
2593	 * Loop over all current mappings setting/clearing as appropos If
2594	 * setting RO do we need to clear the VAC?
2595	 */
2596	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2597#if defined(PMAP_DIAGNOSTIC)
2598		if (!pv->pv_pmap) {
2599			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2600			continue;
2601		}
2602#endif
2603
2604		PMAP_LOCK(pv->pv_pmap);
2605		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2606
2607		if (setem) {
2608			*(int *)pte |= bit;
2609			pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2610		} else {
2611			vm_offset_t pbits = *(vm_offset_t *)pte;
2612
2613			if (pbits & bit) {
2614				if (bit == PTE_RW) {
2615					if (pbits & PTE_M) {
2616						vm_page_dirty(m);
2617					}
2618					*(int *)pte = (pbits & ~(PTE_M | PTE_RW)) |
2619					    PTE_RO;
2620				} else {
2621					*(int *)pte = pbits & ~bit;
2622				}
2623				pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2624			}
2625		}
2626		PMAP_UNLOCK(pv->pv_pmap);
2627	}
2628	if (!setem && bit == PTE_RW)
2629		vm_page_flag_clear(m, PG_WRITEABLE);
2630}
2631
2632/*
2633 *	pmap_page_wired_mappings:
2634 *
2635 *	Return the number of managed mappings to the given physical page
2636 *	that are wired.
2637 */
2638int
2639pmap_page_wired_mappings(vm_page_t m)
2640{
2641	pv_entry_t pv;
2642	int count;
2643
2644	count = 0;
2645	if ((m->flags & PG_FICTITIOUS) != 0)
2646		return (count);
2647	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2648	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
2649	    if (pv->pv_wired)
2650		count++;
2651	return (count);
2652}
2653
2654/*
2655 * Clear the write and modified bits in each of the given page's mappings.
2656 */
2657void
2658pmap_remove_write(vm_page_t m)
2659{
2660	pv_entry_t pv, npv;
2661	vm_offset_t va;
2662	pt_entry_t *pte;
2663
2664	if ((m->flags & PG_WRITEABLE) == 0)
2665		return;
2666
2667	/*
2668	 * Loop over all current mappings setting/clearing as appropos.
2669	 */
2670	for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) {
2671		npv = TAILQ_NEXT(pv, pv_plist);
2672		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2673
2674		if ((pte == NULL) || !mips_pg_v(*pte))
2675			panic("page on pm_pvlist has no pte\n");
2676
2677		va = pv->pv_va;
2678		pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE,
2679		    VM_PROT_READ | VM_PROT_EXECUTE);
2680	}
2681	vm_page_flag_clear(m, PG_WRITEABLE);
2682}
2683
2684/*
2685 *	pmap_ts_referenced:
2686 *
2687 *	Return the count of reference bits for a page, clearing all of them.
2688 */
2689int
2690pmap_ts_referenced(vm_page_t m)
2691{
2692	if (m->flags & PG_FICTITIOUS)
2693		return (0);
2694
2695	if (m->md.pv_flags & PV_TABLE_REF) {
2696		m->md.pv_flags &= ~PV_TABLE_REF;
2697		return 1;
2698	}
2699	return 0;
2700}
2701
2702/*
2703 *	pmap_is_modified:
2704 *
2705 *	Return whether or not the specified physical page was modified
2706 *	in any physical maps.
2707 */
2708boolean_t
2709pmap_is_modified(vm_page_t m)
2710{
2711	if (m->flags & PG_FICTITIOUS)
2712		return FALSE;
2713
2714	if (m->md.pv_flags & PV_TABLE_MOD)
2715		return TRUE;
2716	else
2717		return pmap_testbit(m, PTE_M);
2718}
2719
2720/* N/C */
2721
2722/*
2723 *	pmap_is_prefaultable:
2724 *
2725 *	Return whether or not the specified virtual address is elgible
2726 *	for prefault.
2727 */
2728boolean_t
2729pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2730{
2731	pt_entry_t *pte;
2732	boolean_t rv;
2733
2734	rv = FALSE;
2735	PMAP_LOCK(pmap);
2736	if (*pmap_pde(pmap, addr)) {
2737		pte = pmap_pte(pmap, addr);
2738		rv = (*pte == 0);
2739	}
2740	PMAP_UNLOCK(pmap);
2741	return (rv);
2742}
2743
2744/*
2745 *	Clear the modify bits on the specified physical page.
2746 */
2747void
2748pmap_clear_modify(vm_page_t m)
2749{
2750	if (m->flags & PG_FICTITIOUS)
2751		return;
2752	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2753	if (m->md.pv_flags & PV_TABLE_MOD) {
2754		pmap_changebit(m, PTE_M, FALSE);
2755		m->md.pv_flags &= ~PV_TABLE_MOD;
2756	}
2757}
2758
2759/*
2760 *	pmap_clear_reference:
2761 *
2762 *	Clear the reference bit on the specified physical page.
2763 */
2764void
2765pmap_clear_reference(vm_page_t m)
2766{
2767	if (m->flags & PG_FICTITIOUS)
2768		return;
2769
2770	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2771	if (m->md.pv_flags & PV_TABLE_REF) {
2772		m->md.pv_flags &= ~PV_TABLE_REF;
2773	}
2774}
2775
2776/*
2777 * Miscellaneous support routines follow
2778 */
2779
2780/*
2781 * Map a set of physical memory pages into the kernel virtual
2782 * address space. Return a pointer to where it is mapped. This
2783 * routine is intended to be used for mapping device memory,
2784 * NOT real memory.
2785 */
2786
2787/*
2788 * Map a set of physical memory pages into the kernel virtual
2789 * address space. Return a pointer to where it is mapped. This
2790 * routine is intended to be used for mapping device memory,
2791 * NOT real memory.
2792 */
2793void *
2794pmap_mapdev(vm_offset_t pa, vm_size_t size)
2795{
2796        vm_offset_t va, tmpva, offset;
2797
2798	/*
2799	 * KSEG1 maps only first 512M of phys address space. For
2800	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2801	 */
2802	if (pa + size < MIPS_KSEG0_LARGEST_PHYS)
2803		return (void *)MIPS_PHYS_TO_KSEG1(pa);
2804	else {
2805		offset = pa & PAGE_MASK;
2806		size = roundup(size, PAGE_SIZE);
2807
2808		va = kmem_alloc_nofault(kernel_map, size);
2809		if (!va)
2810			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2811		for (tmpva = va; size > 0;) {
2812			pmap_kenter(tmpva, pa);
2813			size -= PAGE_SIZE;
2814			tmpva += PAGE_SIZE;
2815			pa += PAGE_SIZE;
2816		}
2817	}
2818
2819	return ((void *)(va + offset));
2820}
2821
2822void
2823pmap_unmapdev(vm_offset_t va, vm_size_t size)
2824{
2825}
2826
2827/*
2828 * perform the pmap work for mincore
2829 */
2830int
2831pmap_mincore(pmap_t pmap, vm_offset_t addr)
2832{
2833
2834	pt_entry_t *ptep, pte;
2835	vm_page_t m;
2836	int val = 0;
2837
2838	PMAP_LOCK(pmap);
2839	ptep = pmap_pte(pmap, addr);
2840	pte = (ptep != NULL) ? *ptep : 0;
2841	PMAP_UNLOCK(pmap);
2842
2843	if (mips_pg_v(pte)) {
2844		vm_offset_t pa;
2845
2846		val = MINCORE_INCORE;
2847		pa = mips_tlbpfn_to_paddr(pte);
2848		if (!page_is_managed(pa))
2849			return val;
2850
2851		m = PHYS_TO_VM_PAGE(pa);
2852
2853		/*
2854		 * Modified by us
2855		 */
2856		if (pte & PTE_M)
2857			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2858		/*
2859		 * Modified by someone
2860		 */
2861		else {
2862			vm_page_lock_queues();
2863			if (m->dirty || pmap_is_modified(m))
2864				val |= MINCORE_MODIFIED_OTHER;
2865			vm_page_unlock_queues();
2866		}
2867		/*
2868		 * Referenced by us or someone
2869		 */
2870		vm_page_lock_queues();
2871		if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
2872			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2873			vm_page_flag_set(m, PG_REFERENCED);
2874		}
2875		vm_page_unlock_queues();
2876	}
2877	return val;
2878}
2879
2880void
2881pmap_activate(struct thread *td)
2882{
2883	pmap_t pmap, oldpmap;
2884	struct proc *p = td->td_proc;
2885
2886	critical_enter();
2887
2888	pmap = vmspace_pmap(p->p_vmspace);
2889	oldpmap = PCPU_GET(curpmap);
2890
2891	if (oldpmap)
2892		atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
2893	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2894	pmap_asid_alloc(pmap);
2895	if (td == curthread) {
2896		PCPU_SET(segbase, pmap->pm_segtab);
2897		MachSetPID(pmap->pm_asid[PCPU_GET(cpuid)].asid);
2898	}
2899	PCPU_SET(curpmap, pmap);
2900	critical_exit();
2901}
2902
2903/*
2904 *	Increase the starting virtual address of the given mapping if a
2905 *	different alignment might result in more superpage mappings.
2906 */
2907void
2908pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2909    vm_offset_t *addr, vm_size_t size)
2910{
2911	vm_offset_t superpage_offset;
2912
2913	if (size < NBSEG)
2914		return;
2915	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
2916		offset += ptoa(object->pg_color);
2917	superpage_offset = offset & SEGOFSET;
2918	if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG ||
2919	    (*addr & SEGOFSET) == superpage_offset)
2920		return;
2921	if ((*addr & SEGOFSET) < superpage_offset)
2922		*addr = (*addr & ~SEGOFSET) + superpage_offset;
2923	else
2924		*addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset;
2925}
2926
2927int pmap_pid_dump(int pid);
2928
2929int
2930pmap_pid_dump(int pid)
2931{
2932	pmap_t pmap;
2933	struct proc *p;
2934	int npte = 0;
2935	int index;
2936
2937	sx_slock(&allproc_lock);
2938	LIST_FOREACH(p, &allproc, p_list) {
2939		if (p->p_pid != pid)
2940			continue;
2941
2942		if (p->p_vmspace) {
2943			int i, j;
2944
2945			printf("vmspace is %p\n",
2946			       p->p_vmspace);
2947			index = 0;
2948			pmap = vmspace_pmap(p->p_vmspace);
2949			printf("pmap asid:%x generation:%x\n",
2950			       pmap->pm_asid[0].asid,
2951			       pmap->pm_asid[0].gen);
2952			for (i = 0; i < NUSERPGTBLS; i++) {
2953				pd_entry_t *pde;
2954				pt_entry_t *pte;
2955				unsigned base = i << SEGSHIFT;
2956
2957				pde = &pmap->pm_segtab[i];
2958				if (pde && pmap_pde_v(pde)) {
2959					for (j = 0; j < 1024; j++) {
2960						unsigned va = base +
2961						(j << PAGE_SHIFT);
2962
2963						pte = pmap_pte(pmap, va);
2964						if (pte && pmap_pte_v(pte)) {
2965							vm_offset_t pa;
2966							vm_page_t m;
2967
2968							pa = mips_tlbpfn_to_paddr(*pte);
2969							m = PHYS_TO_VM_PAGE(pa);
2970							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
2971							    va, pa,
2972							    m->hold_count,
2973							    m->wire_count,
2974							    m->flags);
2975							npte++;
2976							index++;
2977							if (index >= 2) {
2978								index = 0;
2979								printf("\n");
2980							} else {
2981								printf(" ");
2982							}
2983						}
2984					}
2985				}
2986			}
2987		} else {
2988		  printf("Process pid:%d has no vm_space\n", pid);
2989		}
2990		break;
2991	}
2992	sx_sunlock(&allproc_lock);
2993	return npte;
2994}
2995
2996
2997#if defined(DEBUG)
2998
2999static void pads(pmap_t pm);
3000void pmap_pvdump(vm_offset_t pa);
3001
3002/* print address space of pmap*/
3003static void
3004pads(pmap_t pm)
3005{
3006	unsigned va, i, j;
3007	pt_entry_t *ptep;
3008
3009	if (pm == kernel_pmap)
3010		return;
3011	for (i = 0; i < NPTEPG; i++)
3012		if (pm->pm_segtab[i])
3013			for (j = 0; j < NPTEPG; j++) {
3014				va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
3015				if (pm == kernel_pmap && va < KERNBASE)
3016					continue;
3017				if (pm != kernel_pmap &&
3018				    va >= VM_MAXUSER_ADDRESS)
3019					continue;
3020				ptep = pmap_pte(pm, va);
3021				if (pmap_pte_v(ptep))
3022					printf("%x:%x ", va, *(int *)ptep);
3023			}
3024
3025}
3026
3027void
3028pmap_pvdump(vm_offset_t pa)
3029{
3030	register pv_entry_t pv;
3031	vm_page_t m;
3032
3033	printf("pa %x", pa);
3034	m = PHYS_TO_VM_PAGE(pa);
3035	for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3036	    pv = TAILQ_NEXT(pv, pv_list)) {
3037		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3038		pads(pv->pv_pmap);
3039	}
3040	printf(" ");
3041}
3042
3043/* N/C */
3044#endif
3045
3046
3047/*
3048 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3049 * It takes almost as much or more time to search the TLB for a
3050 * specific ASID and flush those entries as it does to flush the entire TLB.
3051 * Therefore, when we allocate a new ASID, we just take the next number. When
3052 * we run out of numbers, we flush the TLB, increment the generation count
3053 * and start over. ASID zero is reserved for kernel use.
3054 */
3055static void
3056pmap_asid_alloc(pmap)
3057	pmap_t pmap;
3058{
3059	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3060	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3061	else {
3062		if (PCPU_GET(next_asid) == pmap_max_asid) {
3063			MIPS_TBIAP();
3064			PCPU_SET(asid_generation,
3065			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3066			if (PCPU_GET(asid_generation) == 0) {
3067				PCPU_SET(asid_generation, 1);
3068			}
3069			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3070		}
3071		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3072		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3073		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3074	}
3075
3076#ifdef DEBUG
3077	if (pmapdebug & (PDB_FOLLOW | PDB_TLBPID)) {
3078		if (curproc)
3079			printf("pmap_asid_alloc: curproc %d '%s' ",
3080			    curproc->p_pid, curproc->p_comm);
3081		else
3082			printf("pmap_asid_alloc: curproc <none> ");
3083		printf("segtab %p asid %d\n", pmap->pm_segtab,
3084		    pmap->pm_asid[PCPU_GET(cpuid)].asid);
3085	}
3086#endif
3087}
3088
3089int
3090page_is_managed(vm_offset_t pa)
3091{
3092	vm_offset_t pgnum = mips_btop(pa);
3093
3094	if (pgnum >= first_page && (pgnum < (first_page + vm_page_array_size))) {
3095		vm_page_t m;
3096
3097		m = PHYS_TO_VM_PAGE(pa);
3098		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
3099			return 1;
3100	}
3101	return 0;
3102}
3103
3104static int
3105init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot)
3106{
3107	int rw = 0;
3108
3109	if (!(prot & VM_PROT_WRITE))
3110		rw = PTE_ROPAGE;
3111	else {
3112		if (va >= VM_MIN_KERNEL_ADDRESS) {
3113			/*
3114			 * Don't bother to trap on kernel writes, just
3115			 * record page as dirty.
3116			 */
3117			rw = PTE_RWPAGE;
3118			vm_page_dirty(m);
3119		} else if ((m->md.pv_flags & PV_TABLE_MOD) || m->dirty)
3120			rw = PTE_RWPAGE;
3121		else
3122			rw = PTE_CWPAGE;
3123	}
3124	return rw;
3125}
3126
3127/*
3128 *	pmap_page_is_free:
3129 *
3130 *	Called when a page is freed to allow pmap to clean up
3131 *	any extra state associated with the page.  In this case
3132 *	clear modified/referenced bits.
3133 */
3134void
3135pmap_page_is_free(vm_page_t m)
3136{
3137
3138	m->md.pv_flags = 0;
3139}
3140
3141/*
3142 *	pmap_set_modified:
3143 *
3144 *	Sets the page modified and reference bits for the specified page.
3145 */
3146void
3147pmap_set_modified(vm_offset_t pa)
3148{
3149
3150	PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD);
3151}
3152
3153#include <machine/db_machdep.h>
3154
3155/*
3156 *  Dump the translation buffer (TLB) in readable form.
3157 */
3158
3159void
3160db_dump_tlb(int first, int last)
3161{
3162	struct tlb tlb;
3163	int tlbno;
3164
3165	tlbno = first;
3166
3167	while (tlbno <= last) {
3168		MachTLBRead(tlbno, &tlb);
3169		if (tlb.tlb_lo0 & PTE_V || tlb.tlb_lo1 & PTE_V) {
3170			printf("TLB %2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00));
3171		} else {
3172			printf("TLB*%2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00));
3173		}
3174		printf("0=0x%08x ", pfn_to_vad(tlb.tlb_lo0));
3175		printf("%c", tlb.tlb_lo0 & PTE_M ? 'M' : ' ');
3176		printf("%c", tlb.tlb_lo0 & PTE_G ? 'G' : ' ');
3177		printf(" atr %x ", (tlb.tlb_lo0 >> 3) & 7);
3178		printf("1=0x%08x ", pfn_to_vad(tlb.tlb_lo1));
3179		printf("%c", tlb.tlb_lo1 & PTE_M ? 'M' : ' ');
3180		printf("%c", tlb.tlb_lo1 & PTE_G ? 'G' : ' ');
3181		printf(" atr %x ", (tlb.tlb_lo1 >> 3) & 7);
3182		printf(" sz=%x pid=%x\n", tlb.tlb_mask,
3183		       (tlb.tlb_hi & 0x000000ff)
3184		       );
3185		tlbno++;
3186	}
3187}
3188
3189#ifdef DDB
3190#include <sys/kernel.h>
3191#include <ddb/ddb.h>
3192
3193DB_SHOW_COMMAND(tlb, ddb_dump_tlb)
3194{
3195	db_dump_tlb(0, num_tlbentries - 1);
3196}
3197
3198#endif
3199
3200/*
3201 *	Routine:	pmap_kextract
3202 *	Function:
3203 *		Extract the physical page address associated
3204 *		virtual address.
3205 */
3206 /* PMAP_INLINE */ vm_offset_t
3207pmap_kextract(vm_offset_t va)
3208{
3209	vm_offset_t pa = 0;
3210
3211	if (va < MIPS_CACHED_MEMORY_ADDR) {
3212		/* user virtual address */
3213		pt_entry_t *ptep;
3214
3215		if (curproc && curproc->p_vmspace) {
3216			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3217			if (ptep)
3218				pa = mips_tlbpfn_to_paddr(*ptep) |
3219				    (va & PAGE_MASK);
3220		}
3221	} else if (va >= MIPS_CACHED_MEMORY_ADDR &&
3222	    va < MIPS_UNCACHED_MEMORY_ADDR)
3223		pa = MIPS_CACHED_TO_PHYS(va);
3224	else if (va >= MIPS_UNCACHED_MEMORY_ADDR &&
3225	    va < MIPS_KSEG2_START)
3226		pa = MIPS_UNCACHED_TO_PHYS(va);
3227#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
3228	else if (need_wired_tlb_page_pool && ((va >= VM_MIN_KERNEL_ADDRESS) &&
3229	    (va < (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET))))
3230		pa = MIPS_CACHED_TO_PHYS(va);
3231#endif
3232	else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) {
3233		pt_entry_t *ptep;
3234
3235		/* Is the kernel pmap initialized? */
3236		if (kernel_pmap->pm_active) {
3237			if (va >= (vm_offset_t)virtual_sys_start) {
3238				/* Its inside the virtual address range */
3239				ptep = pmap_pte(kernel_pmap, va);
3240				if (ptep)
3241					pa = mips_tlbpfn_to_paddr(*ptep) |
3242					    (va & PAGE_MASK);
3243			} else {
3244				int i;
3245
3246				/*
3247				 * its inside the special mapping area, I
3248				 * don't think this should happen, but if it
3249				 * does I want it toa all work right :-)
3250				 * Note if it does happen, we assume the
3251				 * caller has the lock? FIXME, this needs to
3252				 * be checked FIXEM - RRS.
3253				 */
3254				for (i = 0; i < MAXCPU; i++) {
3255					if ((sysmap_lmem[i].valid1) && ((vm_offset_t)sysmap_lmem[i].CADDR1 == va)) {
3256						pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP1);
3257						break;
3258					}
3259					if ((sysmap_lmem[i].valid2) && ((vm_offset_t)sysmap_lmem[i].CADDR2 == va)) {
3260						pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP2);
3261						break;
3262					}
3263				}
3264			}
3265		}
3266	}
3267	return pa;
3268}
3269