pmap.c revision 183510
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
38 *	from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 *	JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42/*
43 *	Manages physical address maps.
44 *
45 *	In addition to hardware address maps, this
46 *	module is called upon to provide software-use-only
47 *	maps which may or may not be stored in the same
48 *	form as hardware maps.	These pseudo-maps are
49 *	used to store intermediate results from copy
50 *	operations to and from address spaces.
51 *
52 *	Since the information managed by this module is
53 *	also stored by the logical address mapping module,
54 *	this module may throw away valid virtual-to-physical
55 *	mappings at almost any time.  However, invalidations
56 *	of virtual-to-physical mappings must be done as
57 *	requested.
58 *
59 *	In order to cope with hardware architectures which
60 *	make virtual-to-physical map invalidates expensive,
61 *	this module may delay invalidate or reduced protection
62 *	operations until such time as they are actually
63 *	necessary.  This module is given full information as
64 *	to which processors are currently using which maps,
65 *	and to when physical maps must be made correct.
66 */
67
68#include <sys/cdefs.h>
69__FBSDID("$FreeBSD: head/sys/mips/mips/pmap.c 183510 2008-10-01 05:47:17Z imp $");
70
71#include "opt_ddb.h"
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/proc.h>
75#include <sys/msgbuf.h>
76#include <sys/vmmeter.h>
77#include <sys/mman.h>
78
79#include <vm/vm.h>
80#include <vm/vm_param.h>
81#include <sys/lock.h>
82#include <sys/mutex.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_page.h>
85#include <vm/vm_map.h>
86#include <vm/vm_object.h>
87#include <vm/vm_extern.h>
88#include <vm/vm_pageout.h>
89#include <vm/vm_pager.h>
90#include <vm/uma.h>
91#include <sys/pcpu.h>
92#include <sys/sched.h>
93#ifdef SMP
94#include <sys/smp.h>
95#endif
96
97#include <machine/cache.h>
98#include <machine/pltfm.h>
99#include <machine/md_var.h>
100
101#if defined(DIAGNOSTIC)
102#define	PMAP_DIAGNOSTIC
103#endif
104
105#ifndef PMAP_SHPGPERPROC
106#define	PMAP_SHPGPERPROC 200
107#endif
108
109#if !defined(PMAP_DIAGNOSTIC)
110#define	PMAP_INLINE __inline
111#else
112#define	PMAP_INLINE
113#endif
114
115/*
116 * Get PDEs and PTEs for user/kernel address space
117 */
118#define	pmap_pde(m, v)	       (&((m)->pm_segtab[(vm_offset_t)(v) >> SEGSHIFT]))
119#define	segtab_pde(m, v)	(m[(vm_offset_t)(v) >> SEGSHIFT])
120
121#define	pmap_pte_w(pte)		((*(int *)pte & PTE_W) != 0)
122#define	pmap_pde_v(pte)		((*(int *)pte) != 0)
123#define	pmap_pte_m(pte)		((*(int *)pte & PTE_M) != 0)
124#define	pmap_pte_v(pte)		((*(int *)pte & PTE_V) != 0)
125
126#define	pmap_pte_set_w(pte, v)	((v)?(*(int *)pte |= PTE_W):(*(int *)pte &= ~PTE_W))
127#define	pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
128
129#define	MIPS_SEGSIZE		(1L << SEGSHIFT)
130#define	mips_segtrunc(va)	((va) & ~(MIPS_SEGSIZE-1))
131#define	pmap_TLB_invalidate_all() MIPS_TBIAP()
132#define	pmap_va_asid(pmap, va)	((va) | ((pmap)->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT))
133#define	is_kernel_pmap(x)	((x) == kernel_pmap)
134
135static struct pmap kernel_pmap_store;
136pmap_t kernel_pmap;
137pd_entry_t *kernel_segmap;
138
139vm_offset_t avail_start;	/* PA of first available physical page */
140vm_offset_t avail_end;		/* PA of last available physical page */
141vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
142vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
143
144static int nkpt;
145unsigned pmap_max_asid;		/* max ASID supported by the system */
146
147
148#define	PMAP_ASID_RESERVED	0
149
150
151vm_offset_t kernel_vm_end;
152
153static void pmap_asid_alloc(pmap_t pmap);
154
155/*
156 * Data for the pv entry allocation mechanism
157 */
158static uma_zone_t pvzone;
159static struct vm_object pvzone_obj;
160static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
161int pmap_pagedaemon_waken = 0;
162
163struct fpage fpages_shared[FPAGES_SHARED];
164
165struct sysmaps sysmaps_pcpu[MAXCPU];
166
167static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
168static pv_entry_t get_pv_entry(void);
169static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
170
171static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va);
172static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
173static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
174static boolean_t pmap_testbit(vm_page_t m, int bit);
175static void
176pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte,
177    vm_page_t m, boolean_t wired);
178
179static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
180
181static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
182static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
183static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
184static void pmap_TLB_invalidate_kernel(vm_offset_t);
185static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t);
186static void pmap_init_fpage(void);
187
188#ifdef SMP
189static void pmap_invalidate_page_action(void *arg);
190static void pmap_invalidate_all_action(void *arg);
191static void pmap_update_page_action(void *arg);
192
193#endif
194
195struct local_sysmaps {
196	struct mtx lock;
197	pt_entry_t CMAP1;
198	pt_entry_t CMAP2;
199	caddr_t CADDR1;
200	caddr_t CADDR2;
201	uint16_t valid1, valid2;
202};
203
204/* This structure is for large memory
205 * above 512Meg. We can't (in 32 bit mode)
206 * just use the direct mapped MIPS_CACHED_TO_PHYS()
207 * macros since we can't see the memory and must
208 * map it in when we need to access it. In 64
209 * bit mode this goes away.
210 */
211static struct local_sysmaps sysmap_lmem[MAXCPU];
212caddr_t virtual_sys_start = (caddr_t)0;
213
214pd_entry_t
215pmap_segmap(pmap_t pmap, vm_offset_t va)
216{
217	if (pmap->pm_segtab)
218		return (pmap->pm_segtab[((vm_offset_t)(va) >> SEGSHIFT)]);
219	else
220		return ((pd_entry_t)0);
221}
222
223/*
224 *	Routine:	pmap_pte
225 *	Function:
226 *		Extract the page table entry associated
227 *		with the given map/virtual_address pair.
228 */
229pt_entry_t *
230pmap_pte(pmap_t pmap, vm_offset_t va)
231{
232	pt_entry_t *pdeaddr;
233
234	if (pmap) {
235		pdeaddr = (pt_entry_t *)pmap_segmap(pmap, va);
236		if (pdeaddr) {
237			return pdeaddr + vad_to_pte_offset(va);
238		}
239	}
240	return ((pt_entry_t *)0);
241}
242
243
244vm_offset_t
245pmap_steal_memory(vm_size_t size)
246{
247	vm_size_t bank_size;
248	vm_offset_t pa, va;
249
250	size = round_page(size);
251
252	bank_size = phys_avail[1] - phys_avail[0];
253	while (size > bank_size) {
254		int i;
255
256		for (i = 0; phys_avail[i + 2]; i += 2) {
257			phys_avail[i] = phys_avail[i + 2];
258			phys_avail[i + 1] = phys_avail[i + 3];
259		}
260		phys_avail[i] = 0;
261		phys_avail[i + 1] = 0;
262		if (!phys_avail[0])
263			panic("pmap_steal_memory: out of memory");
264		bank_size = phys_avail[1] - phys_avail[0];
265	}
266
267	pa = phys_avail[0];
268	phys_avail[0] += size;
269	if (pa >= MIPS_KSEG0_LARGEST_PHYS) {
270		panic("Out of memory below 512Meg?");
271	}
272	va = MIPS_PHYS_TO_CACHED(pa);
273	bzero((caddr_t)va, size);
274	return va;
275}
276
277/*
278 *	Bootstrap the system enough to run with virtual memory.  This
279 * assumes that the phys_avail array has been initialized.
280 */
281void
282pmap_bootstrap(void)
283{
284	pt_entry_t *pgtab;
285	pt_entry_t *pte;
286	int i, j;
287	int memory_larger_than_512meg = 0;
288
289	/* Sort. */
290again:
291	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
292		if (phys_avail[i + 1] >= MIPS_KSEG0_LARGEST_PHYS) {
293			memory_larger_than_512meg++;
294		}
295		if (i < 2)
296			continue;
297		if (phys_avail[i - 2] > phys_avail[i]) {
298			vm_paddr_t ptemp[2];
299
300
301			ptemp[0] = phys_avail[i + 0];
302			ptemp[1] = phys_avail[i + 1];
303
304			phys_avail[i + 0] = phys_avail[i - 2];
305			phys_avail[i + 1] = phys_avail[i - 1];
306
307			phys_avail[i - 2] = ptemp[0];
308			phys_avail[i - 1] = ptemp[1];
309			goto again;
310		}
311	}
312
313	if (bootverbose) {
314		printf("Physical memory chunk(s):\n");
315		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
316			vm_paddr_t size;
317
318			size = phys_avail[i + 1] - phys_avail[i];
319			printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
320			    (uintmax_t) phys_avail[i],
321			    (uintmax_t) phys_avail[i + 1] - 1,
322			    (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
323		}
324	}
325	/*
326	 * Steal the message buffer from the beginning of memory.
327	 */
328	msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE);
329	msgbufinit(msgbufp, MSGBUF_SIZE);
330
331	/*
332	 * Steal thread0 kstack.
333	 */
334	kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
335
336
337	virtual_avail = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET;
338	virtual_end = VM_MAX_KERNEL_ADDRESS;
339
340	/*
341	 * Steal some virtual space that will not be in kernel_segmap. This
342	 * va memory space will be used to map in kernel pages that are
343	 * outside the 512Meg region. Note that we only do this steal when
344	 * we do have memory in this region, that way for systems with
345	 * smaller memory we don't "steal" any va ranges :-)
346	 */
347	if (memory_larger_than_512meg) {
348		for (i = 0; i < MAXCPU; i++) {
349			sysmap_lmem[i].CMAP1 = PTE_G;
350			sysmap_lmem[i].CMAP2 = PTE_G;
351			sysmap_lmem[i].CADDR1 = (caddr_t)virtual_avail;
352			virtual_avail += PAGE_SIZE;
353			sysmap_lmem[i].CADDR2 = (caddr_t)virtual_avail;
354			virtual_avail += PAGE_SIZE;
355			sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
356			PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]);
357		}
358	}
359	virtual_sys_start = (caddr_t)virtual_avail;
360	/*
361	 * Allocate segment table for the kernel
362	 */
363	kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
364
365	/*
366	 * Allocate second level page tables for the kernel
367	 */
368	nkpt = NKPT;
369	if (memory_larger_than_512meg) {
370		/*
371		 * If we have a large memory system we CANNOT afford to hit
372		 * pmap_growkernel() and allocate memory. Since we MAY end
373		 * up with a page that is NOT mappable. For that reason we
374		 * up front grab more. Normall NKPT is 120 (YMMV see pmap.h)
375		 * this gives us 480meg of kernel virtual addresses at the
376		 * cost of 120 pages (each page gets us 4 Meg). Since the
377		 * kernel starts at virtual_avail, we can use this to
378		 * calculate how many entris are left from there to the end
379		 * of the segmap, we want to allocate all of it, which would
380		 * be somewhere above 0xC0000000 - 0xFFFFFFFF which results
381		 * in about 256 entries or so instead of the 120.
382		 */
383		nkpt = (PAGE_SIZE / sizeof(pd_entry_t)) - (virtual_avail >> SEGSHIFT);
384	}
385	pgtab = (pt_entry_t *)pmap_steal_memory(PAGE_SIZE * nkpt);
386
387	/*
388	 * The R[4-7]?00 stores only one copy of the Global bit in the
389	 * translation lookaside buffer for each 2 page entry. Thus invalid
390	 * entrys must have the Global bit set so when Entry LO and Entry HI
391	 * G bits are anded together they will produce a global bit to store
392	 * in the tlb.
393	 */
394	for (i = 0, pte = pgtab; i < (nkpt * NPTEPG); i++, pte++)
395		*pte = PTE_G;
396
397	printf("Va=0x%x Ve=%x\n", virtual_avail, virtual_end);
398	/*
399	 * The segment table contains the KVA of the pages in the second
400	 * level page table.
401	 */
402	printf("init kernel_segmap va >> = %d nkpt:%d\n",
403	    (virtual_avail >> SEGSHIFT),
404	    nkpt);
405	for (i = 0, j = (virtual_avail >> SEGSHIFT); i < nkpt; i++, j++)
406		kernel_segmap[j] = (pd_entry_t)(pgtab + (i * NPTEPG));
407
408	avail_start = phys_avail[0];
409	for (i = 0; phys_avail[i + 2]; i += 2);
410	avail_end = phys_avail[i + 1];
411
412	/*
413	 * The kernel's pmap is statically allocated so we don't have to use
414	 * pmap_create, which is unlikely to work correctly at this part of
415	 * the boot sequence (XXX and which no longer exists).
416	 */
417	kernel_pmap = &kernel_pmap_store;
418
419	PMAP_LOCK_INIT(kernel_pmap);
420	kernel_pmap->pm_segtab = kernel_segmap;
421	kernel_pmap->pm_active = ~0;
422	TAILQ_INIT(&kernel_pmap->pm_pvlist);
423	printf("avail_start:0x%x avail_end:0x%x\n",
424	    avail_start, avail_end);
425
426	kernel_pmap->pm_asid[PCPU_GET(cpuid)].asid = PMAP_ASID_RESERVED;
427	kernel_pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
428	pmap_max_asid = VMNUM_PIDS;
429	MachSetPID(0);
430}
431
432/*
433 * Initialize a vm_page's machine-dependent fields.
434 */
435void
436pmap_page_init(vm_page_t m)
437{
438
439	TAILQ_INIT(&m->md.pv_list);
440	m->md.pv_list_count = 0;
441	m->md.pv_flags = 0;
442}
443
444/*
445 *	Initialize the pmap module.
446 *	Called by vm_init, to initialize any structures that the pmap
447 *	system needs to map virtual memory.
448 *	pmap_init has been enhanced to support in a fairly consistant
449 *	way, discontiguous physical memory.
450 */
451void
452pmap_init(void)
453{
454
455	if (need_wired_tlb_page_pool)
456		pmap_init_fpage();
457	/*
458	 * Initialize the address space (zone) for the pv entries.  Set a
459	 * high water mark so that the system can recover from excessive
460	 * numbers of pv entries.
461	 */
462	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
463	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
464	pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
465	pv_entry_high_water = 9 * (pv_entry_max / 10);
466	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
467}
468
469/***************************************************
470 * Low level helper routines.....
471 ***************************************************/
472
473#if defined(PMAP_DIAGNOSTIC)
474
475/*
476 * This code checks for non-writeable/modified pages.
477 * This should be an invalid condition.
478 */
479static int
480pmap_nw_modified(pt_entry_t pte)
481{
482	if ((pte & (PTE_M | PTE_RO)) == (PTE_M | PTE_RO))
483		return (1);
484	else
485		return (0);
486}
487
488#endif
489
490
491static void
492pmap_invalidate_all(pmap_t pmap)
493{
494#ifdef SMP
495	smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *)pmap);
496}
497
498static void
499pmap_invalidate_all_action(void *arg)
500{
501	pmap_t pmap = (pmap_t)arg;
502
503#endif
504
505	if (pmap->pm_active & PCPU_GET(cpumask)) {
506		pmap_TLB_invalidate_all();
507	} else
508		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
509}
510
511struct pmap_invalidate_page_arg {
512	pmap_t pmap;
513	vm_offset_t va;
514};
515
516static __inline void
517pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
518{
519#ifdef SMP
520	struct pmap_invalidate_page_arg arg;
521
522	arg.pmap = pmap;
523	arg.va = va;
524
525	smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg);
526}
527
528static void
529pmap_invalidate_page_action(void *arg)
530{
531	pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap;
532	vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va;
533
534#endif
535
536	if (is_kernel_pmap(pmap)) {
537		pmap_TLB_invalidate_kernel(va);
538		return;
539	}
540	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
541		return;
542	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
543		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
544		return;
545	}
546	va = pmap_va_asid(pmap, (va & ~PGOFSET));
547	mips_TBIS(va);
548}
549
550static void
551pmap_TLB_invalidate_kernel(vm_offset_t va)
552{
553	u_int32_t pid;
554
555	MachTLBGetPID(pid);
556	va = va | (pid << VMTLB_PID_SHIFT);
557	mips_TBIS(va);
558}
559
560struct pmap_update_page_arg {
561	pmap_t pmap;
562	vm_offset_t va;
563	pt_entry_t pte;
564};
565
566void
567pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
568{
569#ifdef SMP
570	struct pmap_update_page_arg arg;
571
572	arg.pmap = pmap;
573	arg.va = va;
574	arg.pte = pte;
575
576	smp_rendezvous(0, pmap_update_page_action, 0, (void *)&arg);
577}
578
579static void
580pmap_update_page_action(void *arg)
581{
582	pmap_t pmap = ((struct pmap_update_page_arg *)arg)->pmap;
583	vm_offset_t va = ((struct pmap_update_page_arg *)arg)->va;
584	pt_entry_t pte = ((struct pmap_update_page_arg *)arg)->pte;
585
586#endif
587	if (is_kernel_pmap(pmap)) {
588		pmap_TLB_update_kernel(va, pte);
589		return;
590	}
591	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
592		return;
593	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
594		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
595		return;
596	}
597	va = pmap_va_asid(pmap, va);
598	MachTLBUpdate(va, pte);
599}
600
601static void
602pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte)
603{
604	u_int32_t pid;
605
606	MachTLBGetPID(pid);
607	va = va | (pid << VMTLB_PID_SHIFT);
608
609	MachTLBUpdate(va, pte);
610}
611
612/*
613 *	Routine:	pmap_extract
614 *	Function:
615 *		Extract the physical page address associated
616 *		with the given map/virtual_address pair.
617 */
618vm_paddr_t
619pmap_extract(pmap_t pmap, vm_offset_t va)
620{
621	pt_entry_t *pte;
622	vm_offset_t retval = 0;
623
624	PMAP_LOCK(pmap);
625	pte = pmap_pte(pmap, va);
626	if (pte) {
627		retval = mips_tlbpfn_to_paddr(*pte) | (va & PAGE_MASK);
628	}
629	PMAP_UNLOCK(pmap);
630	return retval;
631}
632
633/*
634 *	Routine:	pmap_extract_and_hold
635 *	Function:
636 *		Atomically extract and hold the physical page
637 *		with the given pmap and virtual address pair
638 *		if that mapping permits the given protection.
639 */
640vm_page_t
641pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
642{
643	pt_entry_t pte;
644	vm_page_t m;
645
646	m = NULL;
647	vm_page_lock_queues();
648	PMAP_LOCK(pmap);
649
650	pte = *pmap_pte(pmap, va);
651	if (pte != 0 && pmap_pte_v(&pte) &&
652	    ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) {
653		m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte));
654		vm_page_hold(m);
655	}
656	vm_page_unlock_queues();
657	PMAP_UNLOCK(pmap);
658	return (m);
659}
660
661/***************************************************
662 * Low level mapping routines.....
663 ***************************************************/
664
665/*
666 * add a wired page to the kva
667 */
668 /* PMAP_INLINE */ void
669pmap_kenter(vm_offset_t va, vm_paddr_t pa)
670{
671	register pt_entry_t *pte;
672	pt_entry_t npte, opte;
673
674	npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W;
675
676	if (is_cacheable_mem(pa))
677		npte |= PTE_CACHE;
678	else
679		npte |= PTE_UNCACHED;
680
681	pte = pmap_pte(kernel_pmap, va);
682	opte = *pte;
683	*pte = npte;
684
685	pmap_update_page(kernel_pmap, va, npte);
686}
687
688/*
689 * remove a page from the kernel pagetables
690 */
691 /* PMAP_INLINE */ void
692pmap_kremove(vm_offset_t va)
693{
694	register pt_entry_t *pte;
695
696	pte = pmap_pte(kernel_pmap, va);
697	*pte = PTE_G;
698	pmap_invalidate_page(kernel_pmap, va);
699}
700
701/*
702 *	Used to map a range of physical addresses into kernel
703 *	virtual address space.
704 *
705 *	The value passed in '*virt' is a suggested virtual address for
706 *	the mapping. Architectures which can support a direct-mapped
707 *	physical to virtual region can return the appropriate address
708 *	within that region, leaving '*virt' unchanged. Other
709 *	architectures should map the pages starting at '*virt' and
710 *	update '*virt' with the first usable address after the mapped
711 *	region.
712 */
713vm_offset_t
714pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
715{
716	vm_offset_t va, sva;
717
718	va = sva = *virt;
719	while (start < end) {
720		pmap_kenter(va, start);
721		va += PAGE_SIZE;
722		start += PAGE_SIZE;
723	}
724	*virt = va;
725	return (sva);
726}
727
728/*
729 * Add a list of wired pages to the kva
730 * this routine is only used for temporary
731 * kernel mappings that do not need to have
732 * page modification or references recorded.
733 * Note that old mappings are simply written
734 * over.  The page *must* be wired.
735 */
736void
737pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
738{
739	int i;
740
741	for (i = 0; i < count; i++) {
742		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
743		va += PAGE_SIZE;
744	}
745}
746
747/*
748 * this routine jerks page mappings from the
749 * kernel -- it is meant only for temporary mappings.
750 */
751void
752pmap_qremove(vm_offset_t va, int count)
753{
754	while (count-- > 0) {
755		pmap_kremove(va);
756		va += PAGE_SIZE;
757	}
758}
759
760/***************************************************
761 * Page table page management routines.....
762 ***************************************************/
763
764/*
765 * floating pages (FPAGES) management routines
766 *
767 * FPAGES are the reserved virtual memory areas which can be
768 * mapped to any physical memory. This gets used typically
769 * in the following functions:
770 *
771 * pmap_zero_page
772 * pmap_copy_page
773 */
774
775/*
776 * Create the floating pages, aka FPAGES!
777 */
778static void
779pmap_init_fpage()
780{
781	vm_offset_t kva;
782	int i, j;
783	struct sysmaps *sysmaps;
784
785	/*
786	 * We allocate a total of (FPAGES*MAXCPU + FPAGES_SHARED + 1) pages
787	 * at first. FPAGES & FPAGES_SHARED should be EVEN Then we'll adjust
788	 * 'kva' to be even-page aligned so that the fpage area can be wired
789	 * in the TLB with a single TLB entry.
790	 */
791	kva = kmem_alloc_nofault(kernel_map,
792	    (FPAGES * MAXCPU + 1 + FPAGES_SHARED) * PAGE_SIZE);
793	if ((void *)kva == NULL)
794		panic("pmap_init_fpage: fpage allocation failed");
795
796	/*
797	 * Make up start at an even page number so we can wire down the
798	 * fpage area in the tlb with a single tlb entry.
799	 */
800	if ((((vm_offset_t)kva) >> PGSHIFT) & 1) {
801		/*
802		 * 'kva' is not even-page aligned. Adjust it and free the
803		 * first page which is unused.
804		 */
805		kmem_free(kernel_map, (vm_offset_t)kva, NBPG);
806		kva = ((vm_offset_t)kva) + NBPG;
807	} else {
808		/*
809		 * 'kva' is even page aligned. We don't need the last page,
810		 * free it.
811		 */
812		kmem_free(kernel_map, ((vm_offset_t)kva) + FSPACE, NBPG);
813	}
814
815	for (i = 0; i < MAXCPU; i++) {
816		sysmaps = &sysmaps_pcpu[i];
817		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
818
819		/* Assign FPAGES pages to the CPU */
820		for (j = 0; j < FPAGES; j++)
821			sysmaps->fp[j].kva = kva + (j) * PAGE_SIZE;
822		kva = ((vm_offset_t)kva) + (FPAGES * PAGE_SIZE);
823	}
824
825	/*
826	 * An additional 2 pages are needed, one for pmap_zero_page_idle()
827	 * and one for coredump. These pages are shared by all cpu's
828	 */
829	fpages_shared[PMAP_FPAGE3].kva = kva;
830	fpages_shared[PMAP_FPAGE_KENTER_TEMP].kva = kva + PAGE_SIZE;
831}
832
833/*
834 * Map the page to the fpage virtual address as specified thru' fpage id
835 */
836vm_offset_t
837pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, boolean_t check_unmaped)
838{
839	vm_offset_t kva;
840	register pt_entry_t *pte;
841	pt_entry_t npte;
842
843	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
844	/*
845	 * Check if the fpage is free
846	 */
847	if (fp->state) {
848		if (check_unmaped == TRUE)
849			pmap_unmap_fpage(pa, fp);
850		else
851			panic("pmap_map_fpage: fpage is busy");
852	}
853	fp->state = TRUE;
854	kva = fp->kva;
855
856	npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
857	pte = pmap_pte(kernel_pmap, kva);
858	*pte = npte;
859
860	pmap_TLB_update_kernel(kva, npte);
861
862	return (kva);
863}
864
865/*
866 * Unmap the page from the fpage virtual address as specified thru' fpage id
867 */
868void
869pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp)
870{
871	vm_offset_t kva;
872	register pt_entry_t *pte;
873
874	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
875	/*
876	 * Check if the fpage is busy
877	 */
878	if (!(fp->state)) {
879		panic("pmap_unmap_fpage: fpage is free");
880	}
881	kva = fp->kva;
882
883	pte = pmap_pte(kernel_pmap, kva);
884	*pte = PTE_G;
885	pmap_TLB_invalidate_kernel(kva);
886
887	fp->state = FALSE;
888
889	/*
890	 * Should there be any flush operation at the end?
891	 */
892}
893
894/*  Revision 1.507
895 *
896 * Simplify the reference counting of page table pages.	 Specifically, use
897 * the page table page's wired count rather than its hold count to contain
898 * the reference count.
899 */
900
901/*
902 * This routine unholds page table pages, and if the hold count
903 * drops to zero, then it decrements the wire count.
904 */
905static int
906_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
907{
908
909	/*
910	 * unmap the page table page
911	 */
912	pmap->pm_segtab[m->pindex] = 0;
913	--pmap->pm_stats.resident_count;
914
915	if (pmap->pm_ptphint == m)
916		pmap->pm_ptphint = NULL;
917
918	/*
919	 * If the page is finally unwired, simply free it.
920	 */
921	vm_page_free_zero(m);
922	atomic_subtract_int(&cnt.v_wire_count, 1);
923	return (1);
924}
925
926static PMAP_INLINE int
927pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
928{
929	--m->wire_count;
930	if (m->wire_count == 0)
931		return (_pmap_unwire_pte_hold(pmap, m));
932	else
933		return (0);
934}
935
936/*
937 * After removing a page table entry, this routine is used to
938 * conditionally free the page, and manage the hold/wire counts.
939 */
940static int
941pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
942{
943	unsigned ptepindex;
944	pd_entry_t pteva;
945
946	if (va >= VM_MAXUSER_ADDRESS)
947		return (0);
948
949	if (mpte == NULL) {
950		ptepindex = (va >> SEGSHIFT);
951		if (pmap->pm_ptphint &&
952		    (pmap->pm_ptphint->pindex == ptepindex)) {
953			mpte = pmap->pm_ptphint;
954		} else {
955			pteva = *pmap_pde(pmap, va);
956			mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
957			pmap->pm_ptphint = mpte;
958		}
959	}
960	return pmap_unwire_pte_hold(pmap, mpte);
961}
962
963void
964pmap_pinit0(pmap_t pmap)
965{
966	int i;
967
968	PMAP_LOCK_INIT(pmap);
969	pmap->pm_segtab = kernel_segmap;
970	pmap->pm_active = 0;
971	pmap->pm_ptphint = NULL;
972	for (i = 0; i < MAXCPU; i++) {
973		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
974		pmap->pm_asid[i].gen = 0;
975	}
976	PCPU_SET(curpmap, pmap);
977	TAILQ_INIT(&pmap->pm_pvlist);
978	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
979}
980
981/*
982 * Initialize a preallocated and zeroed pmap structure,
983 * such as one in a vmspace structure.
984 */
985int
986pmap_pinit(pmap_t pmap)
987{
988	vm_page_t ptdpg;
989	int i;
990	int req;
991
992	PMAP_LOCK_INIT(pmap);
993
994	req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
995	    VM_ALLOC_ZERO;
996
997#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
998	if (need_wired_tlb_page_pool)
999		req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1000#endif
1001	/*
1002	 * allocate the page directory page
1003	 */
1004	ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req);
1005
1006#if 0
1007	/* I think we can just delete these, now that PG_BUSY is gone */
1008	vm_page_lock_queues();
1009	vm_page_flag_clear(ptdpg, PTE_BUSY);	/* not usually mapped */
1010#endif
1011	ptdpg->valid = VM_PAGE_BITS_ALL;
1012
1013#if 0
1014	vm_page_unlock_queues();
1015#endif
1016
1017	pmap->pm_segtab = (pd_entry_t *)
1018	    MIPS_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(ptdpg));
1019	if ((ptdpg->flags & PG_ZERO) == 0)
1020		bzero(pmap->pm_segtab, PAGE_SIZE);
1021
1022	pmap->pm_active = 0;
1023	pmap->pm_ptphint = NULL;
1024	for (i = 0; i < MAXCPU; i++) {
1025		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1026		pmap->pm_asid[i].gen = 0;
1027	}
1028	TAILQ_INIT(&pmap->pm_pvlist);
1029	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1030
1031	return (1);
1032}
1033
1034/*
1035 * this routine is called if the page table page is not
1036 * mapped correctly.
1037 */
1038static vm_page_t
1039_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1040{
1041	vm_offset_t pteva, ptepa;
1042	vm_page_t m;
1043	int req;
1044
1045	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1046	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1047	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1048
1049	req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ;
1050#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
1051	if (need_wired_tlb_page_pool)
1052		req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1053#endif
1054	/*
1055	 * Find or fabricate a new pagetable page
1056	 */
1057	if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) {
1058		if (flags & M_WAITOK) {
1059			PMAP_UNLOCK(pmap);
1060			vm_page_unlock_queues();
1061			VM_WAIT;
1062			vm_page_lock_queues();
1063			PMAP_LOCK(pmap);
1064		}
1065		/*
1066		 * Indicate the need to retry.	While waiting, the page
1067		 * table page may have been allocated.
1068		 */
1069		return (NULL);
1070	}
1071	if ((m->flags & PG_ZERO) == 0)
1072		pmap_zero_page(m);
1073
1074	KASSERT(m->queue == PQ_NONE,
1075	    ("_pmap_allocpte: %p->queue != PQ_NONE", m));
1076
1077	/*
1078	 * Map the pagetable page into the process address space, if it
1079	 * isn't already there.
1080	 */
1081
1082	pmap->pm_stats.resident_count++;
1083
1084	ptepa = VM_PAGE_TO_PHYS(m);
1085	pteva = MIPS_PHYS_TO_CACHED(ptepa);
1086	pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva;
1087
1088	/*
1089	 * Set the page table hint
1090	 */
1091	pmap->pm_ptphint = m;
1092
1093	/*
1094	 * Kernel page tables are allocated in pmap_bootstrap() or
1095	 * pmap_growkernel().
1096	 */
1097	if (is_kernel_pmap(pmap))
1098		panic("_pmap_allocpte() called for kernel pmap\n");
1099
1100	m->valid = VM_PAGE_BITS_ALL;
1101	vm_page_flag_clear(m, PG_ZERO);
1102
1103	return (m);
1104}
1105
1106static vm_page_t
1107pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1108{
1109	unsigned ptepindex;
1110	vm_offset_t pteva;
1111	vm_page_t m;
1112
1113	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1114	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1115	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1116
1117	/*
1118	 * Calculate pagetable page index
1119	 */
1120	ptepindex = va >> SEGSHIFT;
1121retry:
1122	/*
1123	 * Get the page directory entry
1124	 */
1125	pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1126
1127	/*
1128	 * If the page table page is mapped, we just increment the hold
1129	 * count, and activate it.
1130	 */
1131	if (pteva) {
1132		/*
1133		 * In order to get the page table page, try the hint first.
1134		 */
1135		if (pmap->pm_ptphint &&
1136		    (pmap->pm_ptphint->pindex == ptepindex)) {
1137			m = pmap->pm_ptphint;
1138		} else {
1139			m = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
1140			pmap->pm_ptphint = m;
1141		}
1142		m->wire_count++;
1143	} else {
1144		/*
1145		 * Here if the pte page isn't mapped, or if it has been
1146		 * deallocated.
1147		 */
1148		m = _pmap_allocpte(pmap, ptepindex, flags);
1149		if (m == NULL && (flags & M_WAITOK))
1150			goto retry;
1151	}
1152	return m;
1153}
1154
1155
1156/***************************************************
1157* Pmap allocation/deallocation routines.
1158 ***************************************************/
1159/*
1160 *  Revision 1.397
1161 *  - Merged pmap_release and pmap_release_free_page.  When pmap_release is
1162 *    called only the page directory page(s) can be left in the pmap pte
1163 *    object, since all page table pages will have been freed by
1164 *    pmap_remove_pages and pmap_remove.  In addition, there can only be one
1165 *    reference to the pmap and the page directory is wired, so the page(s)
1166 *    can never be busy.  So all there is to do is clear the magic mappings
1167 *    from the page directory and free the page(s).
1168 */
1169
1170
1171/*
1172 * Release any resources held by the given physical map.
1173 * Called when a pmap initialized by pmap_pinit is being released.
1174 * Should only be called if the map contains no valid mappings.
1175 */
1176void
1177pmap_release(pmap_t pmap)
1178{
1179	vm_page_t ptdpg;
1180
1181	KASSERT(pmap->pm_stats.resident_count == 0,
1182	    ("pmap_release: pmap resident count %ld != 0",
1183	    pmap->pm_stats.resident_count));
1184
1185	ptdpg = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pmap->pm_segtab));
1186
1187	vm_page_lock_queues();
1188	ptdpg->wire_count--;
1189	atomic_subtract_int(&cnt.v_wire_count, 1);
1190	vm_page_free_zero(ptdpg);
1191	vm_page_unlock_queues();
1192}
1193
1194/*
1195 * grow the number of kernel page table entries, if needed
1196 */
1197void
1198pmap_growkernel(vm_offset_t addr)
1199{
1200	vm_offset_t ptppaddr;
1201	vm_page_t nkpg;
1202	pt_entry_t *pte;
1203	int i, req;
1204
1205	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1206	if (kernel_vm_end == 0) {
1207		kernel_vm_end = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET;
1208		nkpt = 0;
1209		while (segtab_pde(kernel_segmap, kernel_vm_end)) {
1210			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1211			    ~(PAGE_SIZE * NPTEPG - 1);
1212			nkpt++;
1213			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1214				kernel_vm_end = kernel_map->max_offset;
1215				break;
1216			}
1217		}
1218	}
1219	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1220	if (addr - 1 >= kernel_map->max_offset)
1221		addr = kernel_map->max_offset;
1222	while (kernel_vm_end < addr) {
1223		if (segtab_pde(kernel_segmap, kernel_vm_end)) {
1224			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1225			    ~(PAGE_SIZE * NPTEPG - 1);
1226			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1227				kernel_vm_end = kernel_map->max_offset;
1228				break;
1229			}
1230			continue;
1231		}
1232		/*
1233		 * This index is bogus, but out of the way
1234		 */
1235		req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
1236#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
1237		if (need_wired_tlb_page_pool)
1238			req |= VM_ALLOC_WIRED_TLB_PG_POOL;
1239#endif
1240		nkpg = vm_page_alloc(NULL, nkpt, req);
1241		if (!nkpg)
1242			panic("pmap_growkernel: no memory to grow kernel");
1243
1244		nkpt++;
1245
1246		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1247		if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) {
1248			/*
1249			 * We need to do something here, but I am not sure
1250			 * what. We can access anything in the 0 - 512Meg
1251			 * region, but if we get a page to go in the kernel
1252			 * segmap that is outside of of that we really need
1253			 * to have another mapping beyond the temporary ones
1254			 * I have. Not sure how to do this yet. FIXME FIXME.
1255			 */
1256			panic("Gak, can't handle a k-page table outside of lower 512Meg");
1257		}
1258		pte = (pt_entry_t *)MIPS_PHYS_TO_CACHED(ptppaddr);
1259		segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte;
1260
1261		/*
1262		 * The R[4-7]?00 stores only one copy of the Global bit in
1263		 * the translation lookaside buffer for each 2 page entry.
1264		 * Thus invalid entrys must have the Global bit set so when
1265		 * Entry LO and Entry HI G bits are anded together they will
1266		 * produce a global bit to store in the tlb.
1267		 */
1268		for (i = 0; i < NPTEPG; i++, pte++)
1269			*pte = PTE_G;
1270
1271		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
1272		    ~(PAGE_SIZE * NPTEPG - 1);
1273		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1274			kernel_vm_end = kernel_map->max_offset;
1275			break;
1276		}
1277	}
1278}
1279
1280/***************************************************
1281* page management routines.
1282 ***************************************************/
1283
1284/*
1285 * free the pv_entry back to the free list
1286 */
1287static PMAP_INLINE void
1288free_pv_entry(pv_entry_t pv)
1289{
1290
1291	pv_entry_count--;
1292	uma_zfree(pvzone, pv);
1293}
1294
1295/*
1296 * get a new pv_entry, allocating a block from the system
1297 * when needed.
1298 * the memory allocation is performed bypassing the malloc code
1299 * because of the possibility of allocations at interrupt time.
1300 */
1301static pv_entry_t
1302get_pv_entry(void)
1303{
1304
1305	pv_entry_count++;
1306	if ((pv_entry_count > pv_entry_high_water) &&
1307	    (pmap_pagedaemon_waken == 0)) {
1308		pmap_pagedaemon_waken = 1;
1309		wakeup(&vm_pages_needed);
1310	}
1311	return uma_zalloc(pvzone, M_NOWAIT);
1312}
1313
1314/*
1315 *  Revision 1.370
1316 *
1317 *  Move pmap_collect() out of the machine-dependent code, rename it
1318 *  to reflect its new location, and add page queue and flag locking.
1319 *
1320 *  Notes: (1) alpha, i386, and ia64 had identical implementations
1321 *  of pmap_collect() in terms of machine-independent interfaces;
1322 *  (2) sparc64 doesn't require it; (3) powerpc had it as a TODO.
1323 *
1324 *  MIPS implementation was identical to alpha [Junos 8.2]
1325 */
1326
1327/*
1328 * If it is the first entry on the list, it is actually
1329 * in the header and we must copy the following entry up
1330 * to the header.  Otherwise we must search the list for
1331 * the entry.  In either case we free the now unused entry.
1332 */
1333
1334static void
1335pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
1336{
1337	pv_entry_t pv;
1338
1339	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1340	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1341	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1342		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1343			if (pmap == pv->pv_pmap && va == pv->pv_va)
1344				break;
1345		}
1346	} else {
1347		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1348			if (va == pv->pv_va)
1349				break;
1350		}
1351	}
1352
1353	KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
1354	TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1355	m->md.pv_list_count--;
1356	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1357		vm_page_flag_clear(m, PG_WRITEABLE);
1358
1359	TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1360	free_pv_entry(pv);
1361}
1362
1363/*
1364 * Create a pv entry for page at pa for
1365 * (pmap, va).
1366 */
1367static void
1368pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m,
1369    boolean_t wired)
1370{
1371
1372	pv_entry_t pv;
1373
1374	pv = get_pv_entry();
1375	if (pv == NULL)
1376		panic("no pv entries: increase vm.pmap.shpgperproc");
1377	pv->pv_va = va;
1378	pv->pv_pmap = pmap;
1379	pv->pv_ptem = mpte;
1380	pv->pv_wired = wired;
1381
1382	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1383	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1384	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1385	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1386	m->md.pv_list_count++;
1387
1388}
1389
1390/*
1391 * pmap_remove_pte: do the things to unmap a page in a process
1392 */
1393static int
1394pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
1395{
1396	pt_entry_t oldpte;
1397	vm_page_t m;
1398	vm_offset_t pa;
1399
1400	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1401	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1402
1403	oldpte = loadandclear((u_int *)ptq);
1404	if (is_kernel_pmap(pmap))
1405		*ptq = PTE_G;
1406
1407	if (oldpte & PTE_W)
1408		pmap->pm_stats.wired_count -= 1;
1409
1410	pmap->pm_stats.resident_count -= 1;
1411	pa = mips_tlbpfn_to_paddr(oldpte);
1412
1413	if (page_is_managed(pa)) {
1414		m = PHYS_TO_VM_PAGE(pa);
1415		if (oldpte & PTE_M) {
1416#if defined(PMAP_DIAGNOSTIC)
1417			if (pmap_nw_modified(oldpte)) {
1418				printf(
1419				    "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1420				    va, oldpte);
1421			}
1422#endif
1423			vm_page_dirty(m);
1424		}
1425		if (m->md.pv_flags & PV_TABLE_REF)
1426			vm_page_flag_set(m, PG_REFERENCED);
1427		m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1428
1429		pmap_remove_entry(pmap, m, va);
1430	}
1431	return pmap_unuse_pt(pmap, va, NULL);
1432
1433}
1434
1435/*
1436 * Remove a single page from a process address space
1437 */
1438static void
1439pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1440{
1441	register pt_entry_t *ptq;
1442
1443	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1444	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1445	ptq = pmap_pte(pmap, va);
1446
1447	/*
1448	 * if there is no pte for this address, just skip it!!!
1449	 */
1450	if (!ptq || !pmap_pte_v(ptq)) {
1451		return;
1452	}
1453	/*
1454	 * get a local va for mappings for this pmap.
1455	 */
1456	(void)pmap_remove_pte(pmap, ptq, va);
1457	pmap_invalidate_page(pmap, va);
1458
1459	return;
1460}
1461
1462/*
1463 *	Remove the given range of addresses from the specified map.
1464 *
1465 *	It is assumed that the start and end are properly
1466 *	rounded to the page size.
1467 */
1468void
1469pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1470{
1471	vm_offset_t va, nva;
1472
1473	if (pmap == NULL)
1474		return;
1475
1476	if (pmap->pm_stats.resident_count == 0)
1477		return;
1478
1479	vm_page_lock_queues();
1480	PMAP_LOCK(pmap);
1481
1482	/*
1483	 * special handling of removing one page.  a very common operation
1484	 * and easy to short circuit some code.
1485	 */
1486	if ((sva + PAGE_SIZE) == eva) {
1487		pmap_remove_page(pmap, sva);
1488		goto out;
1489	}
1490	for (va = sva; va < eva; va = nva) {
1491		if (!*pmap_pde(pmap, va)) {
1492			nva = mips_segtrunc(va + MIPS_SEGSIZE);
1493			continue;
1494		}
1495		pmap_remove_page(pmap, va);
1496		nva = va + PAGE_SIZE;
1497	}
1498
1499out:
1500	vm_page_unlock_queues();
1501	PMAP_UNLOCK(pmap);
1502}
1503
1504/*
1505 *	Routine:	pmap_remove_all
1506 *	Function:
1507 *		Removes this physical page from
1508 *		all physical maps in which it resides.
1509 *		Reflects back modify bits to the pager.
1510 *
1511 *	Notes:
1512 *		Original versions of this routine were very
1513 *		inefficient because they iteratively called
1514 *		pmap_remove (slow...)
1515 */
1516
1517void
1518pmap_remove_all(vm_page_t m)
1519{
1520	register pv_entry_t pv;
1521	register pt_entry_t *pte, tpte;
1522
1523#if defined(PMAP_DEBUG)
1524	/*
1525	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1526	 */
1527	if (m->flags & PG_FICTITIOUS) {
1528		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
1529	}
1530#endif
1531	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1532
1533	if (m->md.pv_flags & PV_TABLE_REF)
1534		vm_page_flag_set(m, PG_REFERENCED);
1535
1536	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1537		PMAP_LOCK(pv->pv_pmap);
1538		pv->pv_pmap->pm_stats.resident_count--;
1539
1540		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1541
1542		tpte = loadandclear((u_int *)pte);
1543		if (is_kernel_pmap(pv->pv_pmap))
1544			*pte = PTE_G;
1545
1546		if (tpte & PTE_W)
1547			pv->pv_pmap->pm_stats.wired_count--;
1548
1549		/*
1550		 * Update the vm_page_t clean and reference bits.
1551		 */
1552		if (tpte & PTE_M) {
1553#if defined(PMAP_DIAGNOSTIC)
1554			if (pmap_nw_modified(tpte)) {
1555				printf(
1556				    "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1557				    pv->pv_va, tpte);
1558			}
1559#endif
1560			vm_page_dirty(m);
1561		}
1562		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1563
1564		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1565		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1566		m->md.pv_list_count--;
1567		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1568		PMAP_UNLOCK(pv->pv_pmap);
1569		free_pv_entry(pv);
1570	}
1571
1572	vm_page_flag_clear(m, PG_WRITEABLE);
1573	m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1574}
1575
1576/*
1577 *	Set the physical protection on the
1578 *	specified range of this map as requested.
1579 */
1580void
1581pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1582{
1583	pt_entry_t *pte;
1584
1585	if (pmap == NULL)
1586		return;
1587
1588	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1589		pmap_remove(pmap, sva, eva);
1590		return;
1591	}
1592	if (prot & VM_PROT_WRITE)
1593		return;
1594
1595	vm_page_lock_queues();
1596	PMAP_LOCK(pmap);
1597	while (sva < eva) {
1598		pt_entry_t pbits, obits;
1599		vm_page_t m;
1600		vm_offset_t pa;
1601
1602		/*
1603		 * If segment table entry is empty, skip this segment.
1604		 */
1605		if (!*pmap_pde(pmap, sva)) {
1606			sva = mips_segtrunc(sva + MIPS_SEGSIZE);
1607			continue;
1608		}
1609		/*
1610		 * If pte is invalid, skip this page
1611		 */
1612		pte = pmap_pte(pmap, sva);
1613		if (!pmap_pte_v(pte)) {
1614			sva += PAGE_SIZE;
1615			continue;
1616		}
1617retry:
1618		obits = pbits = *pte;
1619		pa = mips_tlbpfn_to_paddr(pbits);
1620
1621		if (page_is_managed(pa)) {
1622			m = PHYS_TO_VM_PAGE(pa);
1623			if (m->md.pv_flags & PV_TABLE_REF) {
1624				vm_page_flag_set(m, PG_REFERENCED);
1625				m->md.pv_flags &= ~PV_TABLE_REF;
1626			}
1627			if (pbits & PTE_M) {
1628				vm_page_dirty(m);
1629				m->md.pv_flags &= ~PV_TABLE_MOD;
1630			}
1631		}
1632		pbits = (pbits & ~PTE_M) | PTE_RO;
1633
1634		if (pbits != *pte) {
1635			if (!atomic_cmpset_int((u_int *)pte, obits, pbits))
1636				goto retry;
1637			pmap_update_page(pmap, sva, pbits);
1638		}
1639		sva += PAGE_SIZE;
1640	}
1641	vm_page_unlock_queues();
1642	PMAP_UNLOCK(pmap);
1643}
1644
1645/*
1646 *	Insert the given physical page (p) at
1647 *	the specified virtual address (v) in the
1648 *	target physical map with the protection requested.
1649 *
1650 *	If specified, the page will be wired down, meaning
1651 *	that the related pte can not be reclaimed.
1652 *
1653 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1654 *	or lose information.  That is, this routine must actually
1655 *	insert this page into the given map NOW.
1656 */
1657void
1658pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t fault_type, vm_page_t m, vm_prot_t prot,
1659    boolean_t wired)
1660{
1661	vm_offset_t pa, opa;
1662	register pt_entry_t *pte;
1663	pt_entry_t origpte, newpte;
1664	vm_page_t mpte, om;
1665	int rw = 0;
1666
1667	if (pmap == NULL)
1668		return;
1669
1670	va &= ~PAGE_MASK;
1671#ifdef PMAP_DIAGNOSTIC
1672	if (va > VM_MAX_KERNEL_ADDRESS)
1673		panic("pmap_enter: toobig");
1674#endif
1675
1676	mpte = NULL;
1677
1678	vm_page_lock_queues();
1679	PMAP_LOCK(pmap);
1680
1681	/*
1682	 * In the case that a page table page is not resident, we are
1683	 * creating it here.
1684	 */
1685	if (va < VM_MAXUSER_ADDRESS) {
1686		mpte = pmap_allocpte(pmap, va, M_WAITOK);
1687	}
1688	pte = pmap_pte(pmap, va);
1689
1690	/*
1691	 * Page Directory table entry not valid, we need a new PT page
1692	 */
1693	if (pte == NULL) {
1694		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
1695		    (void *)pmap->pm_segtab, va);
1696	}
1697	pa = VM_PAGE_TO_PHYS(m);
1698	om = NULL;
1699	origpte = *pte;
1700	opa = mips_tlbpfn_to_paddr(origpte);
1701
1702	/*
1703	 * Mapping has not changed, must be protection or wiring change.
1704	 */
1705	if ((origpte & PTE_V) && (opa == pa)) {
1706		/*
1707		 * Wiring change, just update stats. We don't worry about
1708		 * wiring PT pages as they remain resident as long as there
1709		 * are valid mappings in them. Hence, if a user page is
1710		 * wired, the PT page will be also.
1711		 */
1712		if (wired && ((origpte & PTE_W) == 0))
1713			pmap->pm_stats.wired_count++;
1714		else if (!wired && (origpte & PTE_W))
1715			pmap->pm_stats.wired_count--;
1716
1717#if defined(PMAP_DIAGNOSTIC)
1718		if (pmap_nw_modified(origpte)) {
1719			printf(
1720			    "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
1721			    va, origpte);
1722		}
1723#endif
1724
1725		/*
1726		 * Remove extra pte reference
1727		 */
1728		if (mpte)
1729			mpte->wire_count--;
1730
1731		/*
1732		 * We might be turning off write access to the page, so we
1733		 * go ahead and sense modify status.
1734		 */
1735		if (page_is_managed(opa)) {
1736			om = m;
1737		}
1738		goto validate;
1739	}
1740	/*
1741	 * Mapping has changed, invalidate old range and fall through to
1742	 * handle validating new mapping.
1743	 */
1744	if (opa) {
1745		if (origpte & PTE_W)
1746			pmap->pm_stats.wired_count--;
1747
1748		if (page_is_managed(opa)) {
1749			om = PHYS_TO_VM_PAGE(opa);
1750			pmap_remove_entry(pmap, om, va);
1751		}
1752		if (mpte != NULL) {
1753			mpte->wire_count--;
1754			KASSERT(mpte->wire_count > 0,
1755			    ("pmap_enter: missing reference to page table page,"
1756			    " va: 0x%x", va));
1757		}
1758	} else
1759		pmap->pm_stats.resident_count++;
1760
1761	/*
1762	 * Enter on the PV list if part of our managed memory. Note that we
1763	 * raise IPL while manipulating pv_table since pmap_enter can be
1764	 * called at interrupt time.
1765	 */
1766	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1767		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1768		    ("pmap_enter: managed mapping within the clean submap"));
1769		pmap_insert_entry(pmap, va, mpte, m, wired);
1770	}
1771	/*
1772	 * Increment counters
1773	 */
1774	if (wired)
1775		pmap->pm_stats.wired_count++;
1776
1777validate:
1778	rw = init_pte_prot(va, m, prot);
1779
1780	/*
1781	 * Now validate mapping with desired protection/wiring.
1782	 */
1783	newpte = mips_paddr_to_tlbpfn(pa) | rw | PTE_V;
1784
1785	if (is_cacheable_mem(pa))
1786		newpte |= PTE_CACHE;
1787	else
1788		newpte |= PTE_UNCACHED;
1789
1790	if (wired)
1791		newpte |= PTE_W;
1792
1793	if (is_kernel_pmap(pmap)) {
1794	         newpte |= PTE_G;
1795	}
1796
1797	/*
1798	 * if the mapping or permission bits are different, we need to
1799	 * update the pte.
1800	 */
1801	if (origpte != newpte) {
1802		if (origpte & PTE_V) {
1803			*pte = newpte;
1804			if (page_is_managed(opa) && (opa != pa)) {
1805				if (om->md.pv_flags & PV_TABLE_REF)
1806					vm_page_flag_set(om, PG_REFERENCED);
1807				om->md.pv_flags &=
1808				    ~(PV_TABLE_REF | PV_TABLE_MOD);
1809			}
1810			if (origpte & PTE_M) {
1811				KASSERT((origpte & PTE_RW),
1812				    ("pmap_enter: modified page not writable:"
1813				    " va: 0x%x, pte: 0x%lx", va, origpte));
1814				if (page_is_managed(opa))
1815					vm_page_dirty(om);
1816			}
1817		} else {
1818			*pte = newpte;
1819		}
1820	}
1821	pmap_update_page(pmap, va, newpte);
1822
1823	/*
1824	 * Sync I & D caches for executable pages.  Do this only if the the
1825	 * target pmap belongs to the current process.  Otherwise, an
1826	 * unresolvable TLB miss may occur.
1827	 */
1828	if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
1829	    (prot & VM_PROT_EXECUTE)) {
1830		mips_icache_sync_range(va, NBPG);
1831		mips_dcache_wbinv_range(va, NBPG);
1832	}
1833	vm_page_unlock_queues();
1834	PMAP_UNLOCK(pmap);
1835}
1836
1837/*
1838 * this code makes some *MAJOR* assumptions:
1839 * 1. Current pmap & pmap exists.
1840 * 2. Not wired.
1841 * 3. Read access.
1842 * 4. No page table pages.
1843 * but is *MUCH* faster than pmap_enter...
1844 */
1845
1846
1847void
1848pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1849{
1850	pt_entry_t *pte;
1851	vm_offset_t pa;
1852	vm_page_t mpte = NULL;
1853
1854	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1855	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1856	    ("pmap_enter_quick: managed mapping within the clean submap"));
1857	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1858	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1859	PMAP_LOCK(pmap);
1860	/*
1861	 * In the case that a page table page is not resident, we are
1862	 * creating it here.
1863	 */
1864	if (va < VM_MAXUSER_ADDRESS) {
1865		unsigned ptepindex;
1866		vm_offset_t pteva;
1867
1868		/*
1869		 * Calculate pagetable page index
1870		 */
1871		ptepindex = va >> SEGSHIFT;
1872		if (mpte && (mpte->pindex == ptepindex)) {
1873			mpte->wire_count++;
1874		} else {
1875	retry:
1876			/*
1877			 * Get the page directory entry
1878			 */
1879			pteva = (vm_offset_t)pmap->pm_segtab[ptepindex];
1880
1881			/*
1882			 * If the page table page is mapped, we just
1883			 * increment the hold count, and activate it.
1884			 */
1885			if (pteva) {
1886				if (pmap->pm_ptphint &&
1887				    (pmap->pm_ptphint->pindex == ptepindex)) {
1888					mpte = pmap->pm_ptphint;
1889				} else {
1890					mpte = PHYS_TO_VM_PAGE(MIPS_CACHED_TO_PHYS(pteva));
1891					pmap->pm_ptphint = mpte;
1892				}
1893				mpte->wire_count++;
1894			} else {
1895				mpte = _pmap_allocpte(pmap, ptepindex, M_NOWAIT);
1896				if (mpte == NULL) {
1897					PMAP_UNLOCK(pmap);
1898					vm_page_busy(m);
1899					vm_page_unlock_queues();
1900					VM_OBJECT_UNLOCK(m->object);
1901					VM_WAIT;
1902					VM_OBJECT_LOCK(m->object);
1903					vm_page_lock_queues();
1904					vm_page_wakeup(m);
1905					PMAP_LOCK(pmap);
1906					goto retry;
1907				}
1908			}
1909		}
1910	} else {
1911		mpte = NULL;
1912	}
1913
1914	pte = pmap_pte(pmap, va);
1915	if (pmap_pte_v(pte)) {
1916		if (mpte)
1917			pmap_unwire_pte_hold(pmap, mpte);
1918		PMAP_UNLOCK(pmap);
1919		return;
1920	}
1921	/*
1922	 * Enter on the PV list if part of our managed memory. Note that we
1923	 * raise IPL while manipulating pv_table since pmap_enter can be
1924	 * called at interrupt time.
1925	 */
1926	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
1927		pmap_insert_entry(pmap, va, mpte, m, FALSE);
1928
1929	/*
1930	 * Increment counters
1931	 */
1932	pmap->pm_stats.resident_count++;
1933
1934	pa = VM_PAGE_TO_PHYS(m);
1935
1936	/*
1937	 * Now validate mapping with RO protection
1938	 */
1939	*pte = mips_paddr_to_tlbpfn(pa) | PTE_V;
1940
1941	if (is_cacheable_mem(pa))
1942		*pte |= PTE_CACHE;
1943	else
1944		*pte |= PTE_UNCACHED;
1945
1946	if (is_kernel_pmap(pmap))
1947		*pte |= PTE_G;
1948	else {
1949		*pte |= PTE_RO;
1950		/*
1951		 * Sync I & D caches.  Do this only if the the target pmap
1952		 * belongs to the current process.  Otherwise, an
1953		 * unresolvable TLB miss may occur. */
1954		if (pmap == &curproc->p_vmspace->vm_pmap) {
1955			va &= ~PAGE_MASK;
1956			mips_icache_sync_range(va, NBPG);
1957			mips_dcache_wbinv_range(va, NBPG);
1958		}
1959	}
1960
1961	PMAP_UNLOCK(pmap);
1962	return;
1963}
1964
1965/*
1966 * Make a temporary mapping for a physical address.  This is only intended
1967 * to be used for panic dumps.
1968 */
1969void *
1970pmap_kenter_temporary(vm_paddr_t pa, int i)
1971{
1972	vm_offset_t va;
1973
1974	if (i != 0)
1975		printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
1976		    __func__);
1977
1978#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
1979	if (need_wired_tlb_page_pool) {
1980		va = pmap_map_fpage(pa, &fpages_shared[PMAP_FPAGE_KENTER_TEMP],
1981		    TRUE);
1982	} else
1983#endif
1984	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
1985		va = MIPS_PHYS_TO_CACHED(pa);
1986	} else {
1987		int cpu;
1988		struct local_sysmaps *sysm;
1989
1990		cpu = PCPU_GET(cpuid);
1991		sysm = &sysmap_lmem[cpu];
1992		/* Since this is for the debugger, no locks or any other fun */
1993		sysm->CMAP1 = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
1994		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
1995		sysm->valid1 = 1;
1996		va = (vm_offset_t)sysm->CADDR1;
1997	}
1998	return ((void *)va);
1999}
2000
2001void
2002pmap_kenter_temporary_free(vm_paddr_t pa)
2003{
2004	int cpu;
2005	struct local_sysmaps *sysm;
2006
2007	if (pa < MIPS_KSEG0_LARGEST_PHYS) {
2008		/* nothing to do for this case */
2009		return;
2010	}
2011	cpu = PCPU_GET(cpuid);
2012	sysm = &sysmap_lmem[cpu];
2013	if (sysm->valid1) {
2014		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2015		sysm->CMAP1 = 0;
2016		sysm->valid1 = 0;
2017	}
2018}
2019
2020/*
2021 * Moved the code to Machine Independent
2022 *	 vm_map_pmap_enter()
2023 */
2024
2025/*
2026 * Maps a sequence of resident pages belonging to the same object.
2027 * The sequence begins with the given page m_start.  This page is
2028 * mapped at the given virtual address start.  Each subsequent page is
2029 * mapped at a virtual address that is offset from start by the same
2030 * amount as the page is offset from m_start within the object.  The
2031 * last page in the sequence is the page with the largest offset from
2032 * m_start that can be mapped at a virtual address less than the given
2033 * virtual address end.  Not every virtual page between start and end
2034 * is mapped; only those for which a resident page exists with the
2035 * corresponding offset from m_start are mapped.
2036 */
2037void
2038pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2039    vm_page_t m_start, vm_prot_t prot)
2040{
2041	vm_page_t m;
2042	vm_pindex_t diff, psize;
2043
2044	psize = atop(end - start);
2045	m = m_start;
2046	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2047	        /* FIX ME FIX ME - prot is passed in both the
2048		 * the normal spot m, prot but also as the fault_type
2049		 * which we don't use. If we ever use it in pmap_enter
2050		 * we will have to fix this.
2051	         */
2052		pmap_enter(pmap, start + ptoa(diff), prot, m, prot &
2053		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
2054		m = TAILQ_NEXT(m, listq);
2055	}
2056}
2057
2058/*
2059 * pmap_object_init_pt preloads the ptes for a given object
2060 * into the specified pmap.  This eliminates the blast of soft
2061 * faults on process startup and immediately after an mmap.
2062 */
2063void
2064pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2065    vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2066{
2067	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2068	KASSERT(object->type == OBJT_DEVICE,
2069	    ("pmap_object_init_pt: non-device object"));
2070}
2071
2072/*
2073 *	Routine:	pmap_change_wiring
2074 *	Function:	Change the wiring attribute for a map/virtual-address
2075 *			pair.
2076 *	In/out conditions:
2077 *			The mapping must already exist in the pmap.
2078 */
2079void
2080pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2081{
2082	register pt_entry_t *pte;
2083
2084	if (pmap == NULL)
2085		return;
2086
2087	PMAP_LOCK(pmap);
2088	pte = pmap_pte(pmap, va);
2089
2090	if (wired && !pmap_pte_w(pte))
2091		pmap->pm_stats.wired_count++;
2092	else if (!wired && pmap_pte_w(pte))
2093		pmap->pm_stats.wired_count--;
2094
2095	/*
2096	 * Wiring is not a hardware characteristic so there is no need to
2097	 * invalidate TLB.
2098	 */
2099	pmap_pte_set_w(pte, wired);
2100	PMAP_UNLOCK(pmap);
2101}
2102
2103/*
2104 *	Copy the range specified by src_addr/len
2105 *	from the source map to the range dst_addr/len
2106 *	in the destination map.
2107 *
2108 *	This routine is only advisory and need not do anything.
2109 */
2110
2111void
2112pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2113    vm_size_t len, vm_offset_t src_addr)
2114{
2115}
2116
2117/*
2118 *	pmap_zero_page zeros the specified hardware page by mapping
2119 *	the page into KVM and using bzero to clear its contents.
2120 */
2121void
2122pmap_zero_page(vm_page_t m)
2123{
2124	vm_offset_t va;
2125	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2126
2127#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2128	if (need_wired_tlb_page_pool) {
2129		struct fpage *fp1;
2130		struct sysmaps *sysmaps;
2131
2132		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2133		mtx_lock(&sysmaps->lock);
2134		sched_pin();
2135
2136		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2137		va = pmap_map_fpage(phys, fp1, FALSE);
2138		bzero((caddr_t)va, PAGE_SIZE);
2139		pmap_unmap_fpage(phys, fp1);
2140		sched_unpin();
2141		mtx_unlock(&sysmaps->lock);
2142		/*
2143		 * Should you do cache flush?
2144		 */
2145	} else
2146#endif
2147	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2148
2149		va = MIPS_PHYS_TO_CACHED(phys);
2150
2151		bzero((caddr_t)va, PAGE_SIZE);
2152	} else {
2153		int cpu;
2154		struct local_sysmaps *sysm;
2155
2156		cpu = PCPU_GET(cpuid);
2157		sysm = &sysmap_lmem[cpu];
2158		PMAP_LGMEM_LOCK(sysm);
2159		sched_pin();
2160		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2161		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2162		sysm->valid1 = 1;
2163		bzero(sysm->CADDR1, PAGE_SIZE);
2164		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2165		sysm->CMAP1 = 0;
2166		sysm->valid1 = 0;
2167		sched_unpin();
2168		PMAP_LGMEM_UNLOCK(sysm);
2169	}
2170
2171}
2172
2173/*
2174 *	pmap_zero_page_area zeros the specified hardware page by mapping
2175 *	the page into KVM and using bzero to clear its contents.
2176 *
2177 *	off and size may not cover an area beyond a single hardware page.
2178 */
2179void
2180pmap_zero_page_area(vm_page_t m, int off, int size)
2181{
2182	vm_offset_t va;
2183	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2184
2185#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2186	if (need_wired_tlb_page_pool) {
2187		struct fpage *fp1;
2188		struct sysmaps *sysmaps;
2189
2190		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2191		mtx_lock(&sysmaps->lock);
2192		sched_pin();
2193
2194		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2195		va = pmap_map_fpage(phys, fp1, FALSE);
2196		bzero((caddr_t)va + off, size);
2197		pmap_unmap_fpage(phys, fp1);
2198
2199		sched_unpin();
2200		mtx_unlock(&sysmaps->lock);
2201	} else
2202#endif
2203	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2204		va = MIPS_PHYS_TO_CACHED(phys);
2205		bzero((char *)(caddr_t)va + off, size);
2206	} else {
2207		int cpu;
2208		struct local_sysmaps *sysm;
2209
2210		cpu = PCPU_GET(cpuid);
2211		sysm = &sysmap_lmem[cpu];
2212		PMAP_LGMEM_LOCK(sysm);
2213		sched_pin();
2214		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2215		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2216		sysm->valid1 = 1;
2217		bzero((char *)sysm->CADDR1 + off, size);
2218		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2219		sysm->CMAP1 = 0;
2220		sysm->valid1 = 0;
2221		sched_unpin();
2222		PMAP_LGMEM_UNLOCK(sysm);
2223	}
2224}
2225
2226void
2227pmap_zero_page_idle(vm_page_t m)
2228{
2229	vm_offset_t va;
2230	vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2231
2232#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2233	if (need_wired_tlb_page_pool) {
2234		sched_pin();
2235		va = pmap_map_fpage(phys, &fpages_shared[PMAP_FPAGE3], FALSE);
2236		bzero((caddr_t)va, PAGE_SIZE);
2237		pmap_unmap_fpage(phys, &fpages_shared[PMAP_FPAGE3]);
2238		sched_unpin();
2239	} else
2240#endif
2241	if (phys < MIPS_KSEG0_LARGEST_PHYS) {
2242		va = MIPS_PHYS_TO_CACHED(phys);
2243		bzero((caddr_t)va, PAGE_SIZE);
2244	} else {
2245		int cpu;
2246		struct local_sysmaps *sysm;
2247
2248		cpu = PCPU_GET(cpuid);
2249		sysm = &sysmap_lmem[cpu];
2250		PMAP_LGMEM_LOCK(sysm);
2251		sched_pin();
2252		sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2253		pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2254		sysm->valid1 = 1;
2255		bzero(sysm->CADDR1, PAGE_SIZE);
2256		pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2257		sysm->CMAP1 = 0;
2258		sysm->valid1 = 0;
2259		sched_unpin();
2260		PMAP_LGMEM_UNLOCK(sysm);
2261	}
2262
2263}
2264
2265/*
2266 *	pmap_copy_page copies the specified (machine independent)
2267 *	page by mapping the page into virtual memory and using
2268 *	bcopy to copy the page, one machine dependent page at a
2269 *	time.
2270 */
2271void
2272pmap_copy_page(vm_page_t src, vm_page_t dst)
2273{
2274	vm_offset_t va_src, va_dst;
2275	vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src);
2276	vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst);
2277
2278
2279#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
2280	if (need_wired_tlb_page_pool) {
2281		struct fpage *fp1, *fp2;
2282		struct sysmaps *sysmaps;
2283
2284		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
2285		mtx_lock(&sysmaps->lock);
2286		sched_pin();
2287
2288		fp1 = &sysmaps->fp[PMAP_FPAGE1];
2289		fp2 = &sysmaps->fp[PMAP_FPAGE2];
2290
2291		va_src = pmap_map_fpage(phy_src, fp1, FALSE);
2292		va_dst = pmap_map_fpage(phy_dst, fp2, FALSE);
2293
2294		bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2295
2296		pmap_unmap_fpage(phy_src, fp1);
2297		pmap_unmap_fpage(phy_dst, fp2);
2298		sched_unpin();
2299		mtx_unlock(&sysmaps->lock);
2300
2301		/*
2302		 * Should you flush the cache?
2303		 */
2304	} else
2305#endif
2306	{
2307		if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) {
2308			/* easy case, all can be accessed via KSEG0 */
2309			va_src = MIPS_PHYS_TO_CACHED(phy_src);
2310			va_dst = MIPS_PHYS_TO_CACHED(phy_dst);
2311			bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2312		} else {
2313			int cpu;
2314			struct local_sysmaps *sysm;
2315
2316			cpu = PCPU_GET(cpuid);
2317			sysm = &sysmap_lmem[cpu];
2318			PMAP_LGMEM_LOCK(sysm);
2319			sched_pin();
2320			if (phy_src < MIPS_KSEG0_LARGEST_PHYS) {
2321				/* one side needs mapping - dest */
2322				va_src = MIPS_PHYS_TO_CACHED(phy_src);
2323				sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2324				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2);
2325				sysm->valid2 = 2;
2326				va_dst = (vm_offset_t)sysm->CADDR2;
2327			} else if (phy_dst < MIPS_KSEG0_LARGEST_PHYS) {
2328				/* one side needs mapping - src */
2329				va_dst = MIPS_PHYS_TO_CACHED(phy_dst);
2330				sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2331				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2332				va_src = (vm_offset_t)sysm->CADDR1;
2333				sysm->valid1 = 1;
2334			} else {
2335				/* all need mapping */
2336				sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2337				sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE;
2338				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1);
2339				pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2);
2340				sysm->valid1 = sysm->valid2 = 1;
2341				va_src = (vm_offset_t)sysm->CADDR1;
2342				va_dst = (vm_offset_t)sysm->CADDR2;
2343			}
2344			bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2345			if (sysm->valid1) {
2346				pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1);
2347				sysm->CMAP1 = 0;
2348				sysm->valid1 = 0;
2349			}
2350			if (sysm->valid2) {
2351				pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR2);
2352				sysm->CMAP2 = 0;
2353				sysm->valid2 = 0;
2354			}
2355			sched_unpin();
2356			PMAP_LGMEM_UNLOCK(sysm);
2357		}
2358	}
2359}
2360
2361/*
2362 * Returns true if the pmap's pv is one of the first
2363 * 16 pvs linked to from this page.  This count may
2364 * be changed upwards or downwards in the future; it
2365 * is only necessary that true be returned for a small
2366 * subset of pmaps for proper page aging.
2367 */
2368boolean_t
2369pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2370{
2371	pv_entry_t pv;
2372	int loops = 0;
2373
2374	if (m->flags & PG_FICTITIOUS)
2375		return FALSE;
2376
2377	vm_page_lock_queues();
2378	PMAP_LOCK(pmap);
2379
2380	/*
2381	 * Not found, check current mappings returning immediately if found.
2382	 */
2383	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2384		if (pv->pv_pmap == pmap) {
2385			PMAP_UNLOCK(pmap);
2386			vm_page_unlock_queues();
2387			return TRUE;
2388		}
2389		loops++;
2390		if (loops >= 16)
2391			break;
2392	}
2393	PMAP_UNLOCK(pmap);
2394	vm_page_unlock_queues();
2395	return (FALSE);
2396}
2397
2398#define	PMAP_REMOVE_PAGES_CURPROC_ONLY
2399/*
2400 * Remove all pages from specified address space
2401 * this aids process exit speeds.  Also, this code
2402 * is special cased for current process only, but
2403 * can have the more generic (and slightly slower)
2404 * mode enabled.  This is much faster than pmap_remove
2405 * in the case of running down an entire address space.
2406 */
2407void
2408pmap_remove_pages(pmap_t pmap)
2409{
2410	pt_entry_t *pte, tpte;
2411	pv_entry_t pv, npv;
2412	vm_page_t m;
2413
2414#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2415	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2416		printf("warning: pmap_remove_pages called with non-current pmap\n");
2417		return;
2418	}
2419#endif
2420
2421	vm_page_lock_queues();
2422	PMAP_LOCK(pmap);
2423	sched_pin();
2424	//XXX need to be TAILQ_FOREACH_SAFE ?
2425	    for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2426	    pv;
2427	    pv = npv) {
2428
2429		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2430		if (!pmap_pte_v(pte))
2431			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2432		tpte = *pte;
2433
2434/*
2435 * We cannot remove wired pages from a process' mapping at this time
2436 */
2437		if (tpte & PTE_W) {
2438			npv = TAILQ_NEXT(pv, pv_plist);
2439			continue;
2440		}
2441		*pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2442
2443		m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte));
2444
2445		KASSERT(m < &vm_page_array[vm_page_array_size],
2446		    ("pmap_remove_pages: bad tpte %lx", tpte));
2447
2448		pv->pv_pmap->pm_stats.resident_count--;
2449
2450		/*
2451		 * Update the vm_page_t clean and reference bits.
2452		 */
2453		if (tpte & PTE_M) {
2454			vm_page_dirty(m);
2455		}
2456		npv = TAILQ_NEXT(pv, pv_plist);
2457		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2458
2459		m->md.pv_list_count--;
2460		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2461		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
2462			vm_page_flag_clear(m, PG_WRITEABLE);
2463		}
2464		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2465		free_pv_entry(pv);
2466	}
2467	sched_unpin();
2468	pmap_invalidate_all(pmap);
2469	PMAP_UNLOCK(pmap);
2470	vm_page_unlock_queues();
2471}
2472
2473/*
2474 * pmap_testbit tests bits in pte's
2475 * note that the testbit/changebit routines are inline,
2476 * and a lot of things compile-time evaluate.
2477 */
2478static boolean_t
2479pmap_testbit(vm_page_t m, int bit)
2480{
2481	pv_entry_t pv;
2482	pt_entry_t *pte;
2483	boolean_t rv = FALSE;
2484
2485	if (m->flags & PG_FICTITIOUS)
2486		return rv;
2487
2488	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2489		return rv;
2490
2491	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2492	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2493#if defined(PMAP_DIAGNOSTIC)
2494		if (!pv->pv_pmap) {
2495			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2496			continue;
2497		}
2498#endif
2499		PMAP_LOCK(pv->pv_pmap);
2500		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2501		rv = (*pte & bit) != 0;
2502		PMAP_UNLOCK(pv->pv_pmap);
2503		if (rv)
2504			break;
2505	}
2506	return (rv);
2507}
2508
2509/*
2510 * this routine is used to modify bits in ptes
2511 */
2512static __inline void
2513pmap_changebit(vm_page_t m, int bit, boolean_t setem)
2514{
2515	register pv_entry_t pv;
2516	register pt_entry_t *pte;
2517
2518	if (m->flags & PG_FICTITIOUS)
2519		return;
2520
2521	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2522	/*
2523	 * Loop over all current mappings setting/clearing as appropos If
2524	 * setting RO do we need to clear the VAC?
2525	 */
2526	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2527#if defined(PMAP_DIAGNOSTIC)
2528		if (!pv->pv_pmap) {
2529			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2530			continue;
2531		}
2532#endif
2533
2534		PMAP_LOCK(pv->pv_pmap);
2535		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2536
2537		if (setem) {
2538			*(int *)pte |= bit;
2539			pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2540		} else {
2541			vm_offset_t pbits = *(vm_offset_t *)pte;
2542
2543			if (pbits & bit) {
2544				if (bit == PTE_RW) {
2545					if (pbits & PTE_M) {
2546						vm_page_dirty(m);
2547					}
2548					*(int *)pte = (pbits & ~(PTE_M | PTE_RW)) |
2549					    PTE_RO;
2550				} else {
2551					*(int *)pte = pbits & ~bit;
2552				}
2553				pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2554			}
2555		}
2556		PMAP_UNLOCK(pv->pv_pmap);
2557	}
2558	if (!setem && bit == PTE_RW)
2559		vm_page_flag_clear(m, PG_WRITEABLE);
2560}
2561
2562/*
2563 *	pmap_page_wired_mappings:
2564 *
2565 *	Return the number of managed mappings to the given physical page
2566 *	that are wired.
2567 */
2568int
2569pmap_page_wired_mappings(vm_page_t m)
2570{
2571	pv_entry_t pv;
2572	int count;
2573
2574	count = 0;
2575	if ((m->flags & PG_FICTITIOUS) != 0)
2576		return (count);
2577	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2578	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
2579	    if (pv->pv_wired)
2580		count++;
2581	return (count);
2582}
2583
2584/*
2585 * Clear the write and modified bits in each of the given page's mappings.
2586 */
2587void
2588pmap_remove_write(vm_page_t m)
2589{
2590	pv_entry_t pv, npv;
2591	vm_offset_t va;
2592	pt_entry_t *pte;
2593
2594	if ((m->flags & PG_WRITEABLE) == 0)
2595		return;
2596
2597	/*
2598	 * Loop over all current mappings setting/clearing as appropos.
2599	 */
2600	for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) {
2601		npv = TAILQ_NEXT(pv, pv_plist);
2602		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2603
2604		if ((pte == NULL) || !mips_pg_v(*pte))
2605			panic("page on pm_pvlist has no pte\n");
2606
2607		va = pv->pv_va;
2608		pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE,
2609		    VM_PROT_READ | VM_PROT_EXECUTE);
2610	}
2611	vm_page_flag_clear(m, PG_WRITEABLE);
2612}
2613
2614/*
2615 *	pmap_ts_referenced:
2616 *
2617 *	Return the count of reference bits for a page, clearing all of them.
2618 */
2619int
2620pmap_ts_referenced(vm_page_t m)
2621{
2622	if (m->flags & PG_FICTITIOUS)
2623		return (0);
2624
2625	if (m->md.pv_flags & PV_TABLE_REF) {
2626		m->md.pv_flags &= ~PV_TABLE_REF;
2627		return 1;
2628	}
2629	return 0;
2630}
2631
2632/*
2633 *	pmap_is_modified:
2634 *
2635 *	Return whether or not the specified physical page was modified
2636 *	in any physical maps.
2637 */
2638boolean_t
2639pmap_is_modified(vm_page_t m)
2640{
2641	if (m->flags & PG_FICTITIOUS)
2642		return FALSE;
2643
2644	if (m->md.pv_flags & PV_TABLE_MOD)
2645		return TRUE;
2646	else
2647		return pmap_testbit(m, PTE_M);
2648}
2649
2650/* N/C */
2651
2652/*
2653 *	pmap_is_prefaultable:
2654 *
2655 *	Return whether or not the specified virtual address is elgible
2656 *	for prefault.
2657 */
2658boolean_t
2659pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2660{
2661	pt_entry_t *pte;
2662	boolean_t rv;
2663
2664	rv = FALSE;
2665	PMAP_LOCK(pmap);
2666	if (*pmap_pde(pmap, addr)) {
2667		pte = pmap_pte(pmap, addr);
2668		rv = (*pte == 0);
2669	}
2670	PMAP_UNLOCK(pmap);
2671	return (rv);
2672}
2673
2674/*
2675 *	Clear the modify bits on the specified physical page.
2676 */
2677void
2678pmap_clear_modify(vm_page_t m)
2679{
2680	if (m->flags & PG_FICTITIOUS)
2681		return;
2682	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2683	if (m->md.pv_flags & PV_TABLE_MOD) {
2684		pmap_changebit(m, PTE_M, FALSE);
2685		m->md.pv_flags &= ~PV_TABLE_MOD;
2686	}
2687}
2688
2689/*
2690 *	pmap_clear_reference:
2691 *
2692 *	Clear the reference bit on the specified physical page.
2693 */
2694void
2695pmap_clear_reference(vm_page_t m)
2696{
2697	if (m->flags & PG_FICTITIOUS)
2698		return;
2699
2700	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2701	if (m->md.pv_flags & PV_TABLE_REF) {
2702		m->md.pv_flags &= ~PV_TABLE_REF;
2703	}
2704}
2705
2706/*
2707 * Miscellaneous support routines follow
2708 */
2709
2710/*
2711 * Map a set of physical memory pages into the kernel virtual
2712 * address space. Return a pointer to where it is mapped. This
2713 * routine is intended to be used for mapping device memory,
2714 * NOT real memory.
2715 */
2716
2717/*
2718 * Map a set of physical memory pages into the kernel virtual
2719 * address space. Return a pointer to where it is mapped. This
2720 * routine is intended to be used for mapping device memory,
2721 * NOT real memory.
2722 */
2723void *
2724pmap_mapdev(vm_offset_t pa, vm_size_t size)
2725{
2726        vm_offset_t va, tmpva, offset;
2727
2728	/*
2729	 * KSEG1 maps only first 512M of phys address space. For
2730	 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2731	 */
2732	if (pa + size < MIPS_KSEG0_LARGEST_PHYS)
2733		return (void *)MIPS_PHYS_TO_KSEG1(pa);
2734	else {
2735		offset = pa & PAGE_MASK;
2736		size = roundup(size, PAGE_SIZE);
2737
2738		va = kmem_alloc_nofault(kernel_map, size);
2739		if (!va)
2740			panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2741		for (tmpva = va; size > 0;) {
2742			pmap_kenter(tmpva, pa);
2743			size -= PAGE_SIZE;
2744			tmpva += PAGE_SIZE;
2745			pa += PAGE_SIZE;
2746		}
2747	}
2748
2749	return ((void *)(va + offset));
2750}
2751
2752void
2753pmap_unmapdev(vm_offset_t va, vm_size_t size)
2754{
2755}
2756
2757/*
2758 * perform the pmap work for mincore
2759 */
2760int
2761pmap_mincore(pmap_t pmap, vm_offset_t addr)
2762{
2763
2764	pt_entry_t *ptep, pte;
2765	vm_page_t m;
2766	int val = 0;
2767
2768	PMAP_LOCK(pmap);
2769	ptep = pmap_pte(pmap, addr);
2770	pte = (ptep != NULL) ? *ptep : 0;
2771	PMAP_UNLOCK(pmap);
2772
2773	if (mips_pg_v(pte)) {
2774		vm_offset_t pa;
2775
2776		val = MINCORE_INCORE;
2777		pa = mips_tlbpfn_to_paddr(pte);
2778		if (!page_is_managed(pa))
2779			return val;
2780
2781		m = PHYS_TO_VM_PAGE(pa);
2782
2783		/*
2784		 * Modified by us
2785		 */
2786		if (pte & PTE_M)
2787			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2788		/*
2789		 * Modified by someone
2790		 */
2791		else {
2792			vm_page_lock_queues();
2793			if (m->dirty || pmap_is_modified(m))
2794				val |= MINCORE_MODIFIED_OTHER;
2795			vm_page_unlock_queues();
2796		}
2797		/*
2798		 * Referenced by us or someone
2799		 */
2800		vm_page_lock_queues();
2801		if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
2802			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2803			vm_page_flag_set(m, PG_REFERENCED);
2804		}
2805		vm_page_unlock_queues();
2806	}
2807	return val;
2808}
2809
2810void
2811pmap_activate(struct thread *td)
2812{
2813	pmap_t pmap, oldpmap;
2814	struct proc *p = td->td_proc;
2815
2816	critical_enter();
2817
2818	pmap = vmspace_pmap(p->p_vmspace);
2819	oldpmap = PCPU_GET(curpmap);
2820
2821	if (oldpmap)
2822		atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
2823	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2824	pmap_asid_alloc(pmap);
2825	if (td == curthread) {
2826		PCPU_SET(segbase, pmap->pm_segtab);
2827		MachSetPID(pmap->pm_asid[PCPU_GET(cpuid)].asid);
2828	}
2829	PCPU_SET(curpmap, pmap);
2830	critical_exit();
2831}
2832
2833/*
2834 *	Increase the starting virtual address of the given mapping if a
2835 *	different alignment might result in more superpage mappings.
2836 */
2837void
2838pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2839    vm_offset_t *addr, vm_size_t size)
2840{
2841	vm_offset_t superpage_offset;
2842
2843	if (size < NBSEG)
2844		return;
2845	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
2846		offset += ptoa(object->pg_color);
2847	superpage_offset = offset & SEGOFSET;
2848	if (size - ((NBSEG - superpage_offset) & SEGOFSET) < NBSEG ||
2849	    (*addr & SEGOFSET) == superpage_offset)
2850		return;
2851	if ((*addr & SEGOFSET) < superpage_offset)
2852		*addr = (*addr & ~SEGOFSET) + superpage_offset;
2853	else
2854		*addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset;
2855}
2856
2857int pmap_pid_dump(int pid);
2858
2859int
2860pmap_pid_dump(int pid)
2861{
2862	pmap_t pmap;
2863	struct proc *p;
2864	int npte = 0;
2865	int index;
2866
2867	sx_slock(&allproc_lock);
2868	LIST_FOREACH(p, &allproc, p_list) {
2869		if (p->p_pid != pid)
2870			continue;
2871
2872		if (p->p_vmspace) {
2873			int i, j;
2874
2875			printf("vmspace is %p\n",
2876			       p->p_vmspace);
2877			index = 0;
2878			pmap = vmspace_pmap(p->p_vmspace);
2879			printf("pmap asid:%x generation:%x\n",
2880			       pmap->pm_asid[0].asid,
2881			       pmap->pm_asid[0].gen);
2882			for (i = 0; i < NUSERPGTBLS; i++) {
2883				pd_entry_t *pde;
2884				pt_entry_t *pte;
2885				unsigned base = i << SEGSHIFT;
2886
2887				pde = &pmap->pm_segtab[i];
2888				if (pde && pmap_pde_v(pde)) {
2889					for (j = 0; j < 1024; j++) {
2890						unsigned va = base +
2891						(j << PAGE_SHIFT);
2892
2893						pte = pmap_pte(pmap, va);
2894						if (pte && pmap_pte_v(pte)) {
2895							vm_offset_t pa;
2896							vm_page_t m;
2897
2898							pa = mips_tlbpfn_to_paddr(*pte);
2899							m = PHYS_TO_VM_PAGE(pa);
2900							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
2901							    va, pa,
2902							    m->hold_count,
2903							    m->wire_count,
2904							    m->flags);
2905							npte++;
2906							index++;
2907							if (index >= 2) {
2908								index = 0;
2909								printf("\n");
2910							} else {
2911								printf(" ");
2912							}
2913						}
2914					}
2915				}
2916			}
2917		} else {
2918		  printf("Process pid:%d has no vm_space\n", pid);
2919		}
2920		break;
2921	}
2922	sx_sunlock(&allproc_lock);
2923	return npte;
2924}
2925
2926
2927#if defined(DEBUG)
2928
2929static void pads(pmap_t pm);
2930void pmap_pvdump(vm_offset_t pa);
2931
2932/* print address space of pmap*/
2933static void
2934pads(pmap_t pm)
2935{
2936	unsigned va, i, j;
2937	pt_entry_t *ptep;
2938
2939	if (pm == kernel_pmap)
2940		return;
2941	for (i = 0; i < NPTEPG; i++)
2942		if (pm->pm_segtab[i])
2943			for (j = 0; j < NPTEPG; j++) {
2944				va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
2945				if (pm == kernel_pmap && va < KERNBASE)
2946					continue;
2947				if (pm != kernel_pmap &&
2948				    va >= VM_MAXUSER_ADDRESS)
2949					continue;
2950				ptep = pmap_pte(pm, va);
2951				if (pmap_pte_v(ptep))
2952					printf("%x:%x ", va, *(int *)ptep);
2953			}
2954
2955}
2956
2957void
2958pmap_pvdump(vm_offset_t pa)
2959{
2960	register pv_entry_t pv;
2961	vm_page_t m;
2962
2963	printf("pa %x", pa);
2964	m = PHYS_TO_VM_PAGE(pa);
2965	for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
2966	    pv = TAILQ_NEXT(pv, pv_list)) {
2967		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
2968		pads(pv->pv_pmap);
2969	}
2970	printf(" ");
2971}
2972
2973/* N/C */
2974#endif
2975
2976
2977/*
2978 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
2979 * It takes almost as much or more time to search the TLB for a
2980 * specific ASID and flush those entries as it does to flush the entire TLB.
2981 * Therefore, when we allocate a new ASID, we just take the next number. When
2982 * we run out of numbers, we flush the TLB, increment the generation count
2983 * and start over. ASID zero is reserved for kernel use.
2984 */
2985static void
2986pmap_asid_alloc(pmap)
2987	pmap_t pmap;
2988{
2989	if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
2990	    pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
2991	else {
2992		if (PCPU_GET(next_asid) == pmap_max_asid) {
2993			MIPS_TBIAP();
2994			PCPU_SET(asid_generation,
2995			    (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
2996			if (PCPU_GET(asid_generation) == 0) {
2997				PCPU_SET(asid_generation, 1);
2998			}
2999			PCPU_SET(next_asid, 1);	/* 0 means invalid */
3000		}
3001		pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3002		pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3003		PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3004	}
3005
3006#ifdef DEBUG
3007	if (pmapdebug & (PDB_FOLLOW | PDB_TLBPID)) {
3008		if (curproc)
3009			printf("pmap_asid_alloc: curproc %d '%s' ",
3010			    curproc->p_pid, curproc->p_comm);
3011		else
3012			printf("pmap_asid_alloc: curproc <none> ");
3013		printf("segtab %p asid %d\n", pmap->pm_segtab,
3014		    pmap->pm_asid[PCPU_GET(cpuid)].asid);
3015	}
3016#endif
3017}
3018
3019int
3020page_is_managed(vm_offset_t pa)
3021{
3022	vm_offset_t pgnum = mips_btop(pa);
3023
3024	if (pgnum >= first_page && (pgnum < (first_page + vm_page_array_size))) {
3025		vm_page_t m;
3026
3027		m = PHYS_TO_VM_PAGE(pa);
3028		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
3029			return 1;
3030	}
3031	return 0;
3032}
3033
3034static int
3035init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot)
3036{
3037	int rw = 0;
3038
3039	if (!(prot & VM_PROT_WRITE))
3040		rw = PTE_ROPAGE;
3041	else {
3042		if (va >= VM_MIN_KERNEL_ADDRESS) {
3043			/*
3044			 * Don't bother to trap on kernel writes, just
3045			 * record page as dirty.
3046			 */
3047			rw = PTE_RWPAGE;
3048			vm_page_dirty(m);
3049		} else if ((m->md.pv_flags & PV_TABLE_MOD) || m->dirty)
3050			rw = PTE_RWPAGE;
3051		else
3052			rw = PTE_CWPAGE;
3053	}
3054	return rw;
3055}
3056
3057/*
3058 *	pmap_page_is_free:
3059 *
3060 *	Called when a page is freed to allow pmap to clean up
3061 *	any extra state associated with the page.  In this case
3062 *	clear modified/referenced bits.
3063 */
3064void
3065pmap_page_is_free(vm_page_t m)
3066{
3067
3068	m->md.pv_flags = 0;
3069}
3070
3071/*
3072 *	pmap_set_modified:
3073 *
3074 *	Sets the page modified and reference bits for the specified page.
3075 */
3076void
3077pmap_set_modified(vm_offset_t pa)
3078{
3079
3080	PHYS_TO_VM_PAGE(pa)->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD);
3081}
3082
3083#include <machine/db_machdep.h>
3084
3085/*
3086 *  Dump the translation buffer (TLB) in readable form.
3087 */
3088
3089void
3090db_dump_tlb(int first, int last)
3091{
3092	struct tlb tlb;
3093	int tlbno;
3094
3095	tlbno = first;
3096
3097	while (tlbno <= last) {
3098		MachTLBRead(tlbno, &tlb);
3099		if (tlb.tlb_lo0 & PTE_V || tlb.tlb_lo1 & PTE_V) {
3100			printf("TLB %2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00));
3101		} else {
3102			printf("TLB*%2d vad 0x%08x ", tlbno, (tlb.tlb_hi & 0xffffff00));
3103		}
3104		printf("0=0x%08x ", pfn_to_vad(tlb.tlb_lo0));
3105		printf("%c", tlb.tlb_lo0 & PTE_M ? 'M' : ' ');
3106		printf("%c", tlb.tlb_lo0 & PTE_G ? 'G' : ' ');
3107		printf(" atr %x ", (tlb.tlb_lo0 >> 3) & 7);
3108		printf("1=0x%08x ", pfn_to_vad(tlb.tlb_lo1));
3109		printf("%c", tlb.tlb_lo1 & PTE_M ? 'M' : ' ');
3110		printf("%c", tlb.tlb_lo1 & PTE_G ? 'G' : ' ');
3111		printf(" atr %x ", (tlb.tlb_lo1 >> 3) & 7);
3112		printf(" sz=%x pid=%x\n", tlb.tlb_mask,
3113		       (tlb.tlb_hi & 0x000000ff)
3114		       );
3115		tlbno++;
3116	}
3117}
3118
3119#ifdef DDB
3120#include <sys/kernel.h>
3121#include <ddb/ddb.h>
3122
3123DB_SHOW_COMMAND(tlb, ddb_dump_tlb)
3124{
3125	db_dump_tlb(0, num_tlbentries - 1);
3126}
3127
3128#endif
3129
3130/*
3131 *	Routine:	pmap_kextract
3132 *	Function:
3133 *		Extract the physical page address associated
3134 *		virtual address.
3135 */
3136 /* PMAP_INLINE */ vm_offset_t
3137pmap_kextract(vm_offset_t va)
3138{
3139	vm_offset_t pa = 0;
3140
3141	if (va < MIPS_CACHED_MEMORY_ADDR) {
3142		/* user virtual address */
3143		pt_entry_t *ptep;
3144
3145		if (curproc && curproc->p_vmspace) {
3146			ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3147			if (ptep)
3148				pa = mips_tlbpfn_to_paddr(*ptep) |
3149				    (va & PAGE_MASK);
3150		}
3151	} else if (va >= MIPS_CACHED_MEMORY_ADDR &&
3152	    va < MIPS_UNCACHED_MEMORY_ADDR)
3153		pa = MIPS_CACHED_TO_PHYS(va);
3154	else if (va >= MIPS_UNCACHED_MEMORY_ADDR &&
3155	    va < MIPS_KSEG2_START)
3156		pa = MIPS_UNCACHED_TO_PHYS(va);
3157#ifdef VM_ALLOC_WIRED_TLB_PG_POOL
3158	else if (need_wired_tlb_page_pool && ((va >= VM_MIN_KERNEL_ADDRESS) &&
3159	    (va < (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET))))
3160		pa = MIPS_CACHED_TO_PHYS(va);
3161#endif
3162	else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) {
3163		pt_entry_t *ptep;
3164
3165		if (kernel_pmap) {
3166			if (va >= (vm_offset_t)virtual_sys_start) {
3167				/* Its inside the virtual address range */
3168				ptep = pmap_pte(kernel_pmap, va);
3169				if (ptep)
3170					pa = mips_tlbpfn_to_paddr(*ptep) |
3171					    (va & PAGE_MASK);
3172			} else {
3173				int i;
3174
3175				/*
3176				 * its inside the special mapping area, I
3177				 * don't think this should happen, but if it
3178				 * does I want it toa all work right :-)
3179				 * Note if it does happen, we assume the
3180				 * caller has the lock? FIXME, this needs to
3181				 * be checked FIXEM - RRS.
3182				 */
3183				for (i = 0; i < MAXCPU; i++) {
3184					if ((sysmap_lmem[i].valid1) && ((vm_offset_t)sysmap_lmem[i].CADDR1 == va)) {
3185						pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP1);
3186						break;
3187					}
3188					if ((sysmap_lmem[i].valid2) && ((vm_offset_t)sysmap_lmem[i].CADDR2 == va)) {
3189						pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP2);
3190						break;
3191					}
3192				}
3193			}
3194		}
3195	}
3196	return pa;
3197}
3198