pmap.c revision 45960
190618Stmm/*
290618Stmm * Copyright (c) 1991 Regents of the University of California.
390618Stmm * All rights reserved.
490618Stmm * Copyright (c) 1994 John S. Dyson
590618Stmm * All rights reserved.
690618Stmm * Copyright (c) 1994 David Greenman
790618Stmm * All rights reserved.
890618Stmm *
990618Stmm * This code is derived from software contributed to Berkeley by
1090618Stmm * the Systems Programming Group of the University of Utah Computer
1190618Stmm * Science Department and William Jolitz of UUNET Technologies Inc.
1290618Stmm *
1390618Stmm * Redistribution and use in source and binary forms, with or without
1490618Stmm * modification, are permitted provided that the following conditions
1590618Stmm * are met:
1690618Stmm * 1. Redistributions of source code must retain the above copyright
1790618Stmm *    notice, this list of conditions and the following disclaimer.
1890618Stmm * 2. Redistributions in binary form must reproduce the above copyright
1990618Stmm *    notice, this list of conditions and the following disclaimer in the
2090618Stmm *    documentation and/or other materials provided with the distribution.
2190618Stmm * 3. All advertising materials mentioning features or use of this software
2290618Stmm *    must display the following acknowledgement:
2390618Stmm *	This product includes software developed by the University of
2490618Stmm *	California, Berkeley and its contributors.
2590618Stmm * 4. Neither the name of the University nor the names of its contributors
2690618Stmm *    may be used to endorse or promote products derived from this software
2790618Stmm *    without specific prior written permission.
2890618Stmm *
2990618Stmm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
3090618Stmm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3190618Stmm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3290618Stmm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3390618Stmm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3490618Stmm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3590618Stmm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36139825Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3790618Stmm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3890618Stmm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3990618Stmm * SUCH DAMAGE.
4090618Stmm *
4190618Stmm *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
4290618Stmm *	$Id: pmap.c,v 1.231 1999/04/19 18:45:21 alc Exp $
4390618Stmm */
4490618Stmm
4590618Stmm/*
4690618Stmm *	Manages physical address maps.
4790618Stmm *
4890618Stmm *	In addition to hardware address maps, this
4990618Stmm *	module is called upon to provide software-use-only
5090618Stmm *	maps which may or may not be stored in the same
5190618Stmm *	form as hardware maps.  These pseudo-maps are
5290618Stmm *	used to store intermediate results from copy
5390618Stmm *	operations to and from address spaces.
5490618Stmm *
5590618Stmm *	Since the information managed by this module is
5690618Stmm *	also stored by the logical address mapping module,
5790618Stmm *	this module may throw away valid virtual-to-physical
5890618Stmm *	mappings at almost any time.  However, invalidations
5990618Stmm *	of virtual-to-physical mappings must be done as
6090618Stmm *	requested.
6190618Stmm *
6290618Stmm *	In order to cope with hardware architectures which
6390618Stmm *	make virtual-to-physical map invalidates expensive,
6490618Stmm *	this module may delay invalidate or reduced protection
6590618Stmm *	operations until such time as they are actually
6690618Stmm *	necessary.  This module is given full information as
6790618Stmm *	to which processors are currently using which maps,
6890618Stmm *	and to when physical maps must be made correct.
6990618Stmm */
7090618Stmm
7190618Stmm#include "opt_disable_pse.h"
7290618Stmm#include "opt_pmap.h"
73139825Simp#include "opt_msgbuf.h"
7490618Stmm
7590618Stmm#include <sys/param.h>
76167308Smarius#include <sys/systm.h>
7790618Stmm#include <sys/proc.h>
7890618Stmm#include <sys/msgbuf.h>
7990618Stmm#include <sys/vmmeter.h>
8090618Stmm#include <sys/mman.h>
8190618Stmm
8290618Stmm#include <vm/vm.h>
8390618Stmm#include <vm/vm_param.h>
8490618Stmm#include <vm/vm_prot.h>
8590618Stmm#include <sys/lock.h>
8690618Stmm#include <vm/vm_kern.h>
8790618Stmm#include <vm/vm_page.h>
8890618Stmm#include <vm/vm_map.h>
8990618Stmm#include <vm/vm_object.h>
9090618Stmm#include <vm/vm_extern.h>
9190618Stmm#include <vm/vm_pageout.h>
9290618Stmm#include <vm/vm_pager.h>
9390618Stmm#include <vm/vm_zone.h>
9490618Stmm
9590618Stmm#include <sys/user.h>
9690618Stmm
9790618Stmm#include <machine/cputypes.h>
9890618Stmm#include <machine/md_var.h>
9990618Stmm#include <machine/specialreg.h>
10090618Stmm#if defined(SMP) || defined(APIC_IO)
101145185Smarius#include <machine/smp.h>
102145185Smarius#include <machine/apic.h>
103145185Smarius#endif /* SMP || APIC_IO */
10490618Stmm
105145185Smarius#define PMAP_KEEP_PDIRS
10690618Stmm#ifndef PMAP_SHPGPERPROC
107131376Smarius#define PMAP_SHPGPERPROC 200
10890618Stmm#endif
10990618Stmm
11090618Stmm#if defined(DIAGNOSTIC)
11190618Stmm#define PMAP_DIAGNOSTIC
11290618Stmm#endif
113130068Sphk
114107477Stmm#define MINPV 2048
11590618Stmm
11690618Stmm#if !defined(PMAP_DIAGNOSTIC)
117133589Smarius#define PMAP_INLINE __inline
118152684Smarius#else
119119338Simp#define PMAP_INLINE
12090618Stmm#endif
12190618Stmm
122116541Stmm/*
12390618Stmm * Get PDEs and PTEs for user/kernel address space
12490618Stmm */
12590618Stmm#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
12690618Stmm#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
12790618Stmm
12890618Stmm#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
12990618Stmm#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
13090618Stmm#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
13190618Stmm#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
13290618Stmm#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
13390618Stmm
13490618Stmm#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
13590618Stmm#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
13690618Stmm
137145186Smarius/*
13890618Stmm * Given a map and a machine independent protection code,
13990618Stmm * convert to a vax protection code.
140152684Smarius */
14190618Stmm#define pte_prot(m, p)	(protection_codes[p])
14290618Stmmstatic int protection_codes[8];
14390618Stmm
14490618Stmm#define	pa_index(pa)		atop((pa) - vm_first_phys)
14590618Stmm#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
14690618Stmm
14790618Stmmstatic struct pmap kernel_pmap_store;
14890618Stmmpmap_t kernel_pmap;
14990618Stmmextern pd_entry_t my_idlePTD;
15090618Stmm
15190618Stmmvm_offset_t avail_start;	/* PA of first available physical page */
15290618Stmmvm_offset_t avail_end;		/* PA of last available physical page */
15390618Stmmvm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
15490618Stmmvm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
15590618Stmmstatic boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
15690618Stmmstatic vm_offset_t vm_first_phys;
15790618Stmmstatic int pgeflag;		/* PG_G or-in */
15890618Stmmstatic int pseflag;		/* PG_PS or-in */
15990618Stmmstatic int pv_npg;
16090618Stmm
16190618Stmmstatic vm_object_t kptobj;
16290618Stmm
16390618Stmmstatic int nkpt;
164145185Smariusvm_offset_t kernel_vm_end;
16590618Stmm
16690618Stmm/*
167145185Smarius * Data for the pv entry allocation mechanism
168145185Smarius */
16990618Stmmstatic vm_zone_t pvzone;
17090618Stmmstatic struct vm_zone pvzone_store;
17190618Stmmstatic struct vm_object pvzone_obj;
17290618Stmmstatic int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
17390618Stmmstatic int pmap_pagedaemon_waken = 0;
17490618Stmmstatic struct pv_entry *pvinit;
17590618Stmm
17690618Stmm/*
17790618Stmm * All those kernel PT submaps that BSD is so fond of
178145185Smarius */
179166901Spisopt_entry_t *CMAP1 = 0;
180145185Smariusstatic pt_entry_t *CMAP2, *ptmmap;
181145185Smariusstatic pv_table_t *pv_table;
18290618Stmmcaddr_t CADDR1 = 0, ptvmmap = 0;
18390618Stmmstatic caddr_t CADDR2;
18490618Stmmstatic pt_entry_t *msgbufmap;
18590618Stmmstruct msgbuf *msgbufp=0;
18690618Stmm
18790618Stmm/* AIO support */
18890618Stmmextern struct vmspace *aiovmspace;
189145185Smarius
190145186Smarius#ifdef SMP
191145185Smariusextern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
192145185Smariusextern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
193145185Smariusextern pd_entry_t *IdlePTDS[];
194145185Smariusextern pt_entry_t SMP_prvpt[];
195145185Smarius#endif
196145185Smarius
197145185Smarius#ifdef SMP
198145185Smariusextern unsigned int prv_PPAGE1[];
199145185Smariusextern pt_entry_t *prv_PMAP1;
200145185Smarius#else
201167308Smariusstatic pt_entry_t *PMAP1 = 0;
202152684Smariusstatic unsigned *PADDR1 = 0;
20390618Stmm#endif
204146391Smarius
205152684Smariusstatic PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
206152684Smariusstatic unsigned * get_ptbase __P((pmap_t pmap));
207152684Smariusstatic pv_entry_t get_pv_entry __P((void));
208167308Smariusstatic void	i386_protection_init __P((void));
20990618Stmmstatic __inline void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
210167308Smariusstatic void	pmap_clearbit __P((vm_offset_t pa, int bit));
211167308Smarius
212152684Smariusstatic PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
21390618Stmmstatic void	pmap_remove_all __P((vm_offset_t pa));
21490618Stmmstatic vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
21590618Stmm				      vm_offset_t pa, vm_page_t mpte));
21690618Stmmstatic int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
217145186Smarius					vm_offset_t sva));
218154870Smariusstatic void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
219154870Smariusstatic int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
220154870Smarius					vm_offset_t va));
22190618Stmmstatic boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
22290618Stmmstatic void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
22390618Stmm		vm_page_t mpte, vm_offset_t pa));
22490618Stmm
22590618Stmmstatic vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
22690618Stmm
22790618Stmmstatic int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
22890618Stmmstatic vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
22990618Stmmstatic unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
23090618Stmmstatic vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
23190618Stmmstatic int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
23290618Stmmstatic vm_offset_t pmap_kmem_choose(vm_offset_t addr);
23390618Stmm
234167308Smariusstatic unsigned pdir4mb;
23590618Stmm
236133589Smarius/*
237152684Smarius *	Routine:	pmap_pte
238152684Smarius *	Function:
239152684Smarius *		Extract the page table entry associated
240152684Smarius *		with the given map/virtual_address pair.
241152684Smarius */
242152684Smarius
243133589SmariusPMAP_INLINE unsigned *
24490618Stmmpmap_pte(pmap, va)
24590618Stmm	register pmap_t pmap;
24690618Stmm	vm_offset_t va;
24790618Stmm{
24890618Stmm	unsigned *pdeaddr;
24990618Stmm
25090618Stmm	if (pmap) {
25190618Stmm		pdeaddr = (unsigned *) pmap_pde(pmap, va);
25290618Stmm		if (*pdeaddr & PG_PS)
25390618Stmm			return pdeaddr;
25490618Stmm		if (*pdeaddr) {
25590618Stmm			return get_ptbase(pmap) + i386_btop(va);
25690618Stmm		}
25790618Stmm	}
25890618Stmm	return (0);
25990618Stmm}
260146391Smarius
261146391Smarius/*
262146391Smarius * Move the kernel virtual free pointer to the next
263146391Smarius * 4MB.  This is used to help improve performance
264146391Smarius * by using a large (4MB) page for much of the kernel
265146391Smarius * (.text, .data, .bss)
26690618Stmm */
267146391Smariusstatic vm_offset_t
268146391Smariuspmap_kmem_choose(vm_offset_t addr) {
269146391Smarius	vm_offset_t newaddr = addr;
270146391Smarius#ifndef DISABLE_PSE
271146391Smarius	if (cpu_feature & CPUID_PSE) {
272146391Smarius		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
273146391Smarius	}
274146391Smarius#endif
275146391Smarius	return newaddr;
276146391Smarius}
277146391Smarius
278146391Smarius/*
279146391Smarius *	Bootstrap the system enough to run with virtual memory.
280146391Smarius *
28190618Stmm *	On the i386 this is called after mapping has already been enabled
28290618Stmm *	and just syncs the pmap module with what has already been done.
283167308Smarius *	[We can't call it easily with mapping off since the kernel is not
284145186Smarius *	mapped with PA == VA, hence we would have to relocate every address
285167308Smarius *	from the linked base (virtual) address "KERNBASE" to the actual
286145186Smarius *	(physical) address starting relative to 0]
287167308Smarius */
288145186Smariusvoid
289145186Smariuspmap_bootstrap(firstaddr, loadaddr)
290145186Smarius	vm_offset_t firstaddr;
291145186Smarius	vm_offset_t loadaddr;
292145186Smarius{
293145186Smarius	vm_offset_t va;
294145186Smarius	pt_entry_t *pte;
295145186Smarius#ifdef SMP
296145185Smarius	int i, j;
29790618Stmm#endif
29890618Stmm
29990618Stmm	avail_start = firstaddr;
300167308Smarius
30190618Stmm	/*
30290618Stmm	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
30390618Stmm	 * large. It should instead be correctly calculated in locore.s and
304152684Smarius	 * not based on 'first' (which is a physical address, not a virtual
305145185Smarius	 * address, for the start of unused physical memory). The kernel
30690618Stmm	 * page tables are NOT double mapped and thus should not be included
307167308Smarius	 * in this calculation.
30890618Stmm	 */
309145185Smarius	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
310167308Smarius	virtual_avail = pmap_kmem_choose(virtual_avail);
311145185Smarius
31290618Stmm	virtual_end = VM_MAX_KERNEL_ADDRESS;
313167308Smarius
314167308Smarius	/*
315167308Smarius	 * Initialize protection array.
316145185Smarius	 */
31790618Stmm	i386_protection_init();
31890618Stmm
31990618Stmm	/*
32090618Stmm	 * The kernel's pmap is statically allocated so we don't have to use
321145185Smarius	 * pmap_create, which is unlikely to work correctly at this part of
322146391Smarius	 * the boot sequence (XXX and which no longer exists).
32390618Stmm	 */
32490618Stmm	kernel_pmap = &kernel_pmap_store;
32590618Stmm
32690618Stmm	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
32790618Stmm	kernel_pmap->pm_count = 1;
32890618Stmm	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
32990618Stmm	TAILQ_INIT(&kernel_pmap->pm_pvlist);
33090618Stmm	nkpt = NKPT;
33190618Stmm
33290618Stmm	/*
33390618Stmm	 * Reserve some special page table entries/VA space for temporary
33490618Stmm	 * mapping of pages.
33590618Stmm	 */
33690618Stmm#define	SYSMAP(c, p, v, n)	\
33790618Stmm	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
33890618Stmm
33990618Stmm	va = virtual_avail;
340145185Smarius	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
34190618Stmm
34290618Stmm	/*
34390618Stmm	 * CMAP1/CMAP2 are used for zeroing and copying pages.
34490618Stmm	 */
345145185Smarius	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
34690618Stmm	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
34790618Stmm
34890618Stmm	/*
34990618Stmm	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
350167308Smarius	 * XXX ptmmap is not used.
35190618Stmm	 */
35290618Stmm	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
35390618Stmm
35490618Stmm	/*
35590618Stmm	 * msgbufp is used to map the system message buffer.
35690618Stmm	 * XXX msgbufmap is not used.
357167308Smarius	 */
358167308Smarius	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
359167308Smarius	       atop(round_page(MSGBUF_SIZE)))
360167308Smarius
361145185Smarius#if !defined(SMP)
36290618Stmm	/*
36390618Stmm	 * ptemap is used for pmap_pte_quick
36490618Stmm	 */
36590618Stmm	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
36690618Stmm#endif
367145185Smarius
36890618Stmm	virtual_avail = va;
36990618Stmm
37090618Stmm	*(int *) CMAP1 = *(int *) CMAP2 = 0;
37190618Stmm	*(int *) PTD = 0;
37290618Stmm
37390618Stmm
37490618Stmm	pgeflag = 0;
37590618Stmm#if !defined(SMP)
37690618Stmm	if (cpu_feature & CPUID_PGE) {
37790618Stmm		pgeflag = PG_G;
37890618Stmm	}
37990618Stmm#endif
38090618Stmm
38190618Stmm/*
38290618Stmm * Initialize the 4MB page size flag
38390618Stmm */
38490618Stmm	pseflag = 0;
38590618Stmm/*
38690618Stmm * The 4MB page version of the initial
38790618Stmm * kernel page mapping.
38890618Stmm */
38990618Stmm	pdir4mb = 0;
39090618Stmm
39190618Stmm#if !defined(DISABLE_PSE)
39290618Stmm	if (cpu_feature & CPUID_PSE) {
39390618Stmm		unsigned ptditmp;
394123865Sobrien		/*
39590618Stmm		 * Enable the PSE mode
39690618Stmm		 */
39790618Stmm		load_cr4(rcr4() | CR4_PSE);
398145185Smarius
399145185Smarius		/*
40090618Stmm		 * Note that we have enabled PSE mode
40190618Stmm		 */
402100188Stmm		pseflag = PG_PS;
403100188Stmm		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
404100188Stmm		ptditmp &= ~(NBPDR - 1);
405100188Stmm		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
406100188Stmm		pdir4mb = ptditmp;
407114484Stmm		/*
40890618Stmm		 * We can do the mapping here for the single processor
409116213Stmm		 * case.  We simply ignore the old page table page from
410167308Smarius		 * now on.
411167308Smarius		 */
412167308Smarius#if !defined(SMP)
413145185Smarius		PTD[KPTDI] = (pd_entry_t) ptditmp;
414116213Stmm		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
415116213Stmm		invltlb();
416116541Stmm#endif
417116213Stmm	}
418145185Smarius#endif
419167308Smarius
42090618Stmm#ifdef SMP
421167308Smarius	if (cpu_apic_address == 0)
422167308Smarius		panic("pmap_bootstrap: no local apic!");
423166034Smarius
424167308Smarius	/* 0 = private page */
425166901Spiso	/* 1 = page table page */
426166901Spiso	/* 2 = local apic */
427166034Smarius	/* 16-31 = io apics */
428107477Stmm	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
429167308Smarius	    (cpu_apic_address & PG_FRAME));
43090618Stmm
431167308Smarius	for (i = 0; i < mp_napics; i++) {
432167308Smarius		for (j = 0; j < 16; j++) {
433166034Smarius			/* same page frame as a previous IO apic? */
434167308Smarius			if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) ==
435166901Spiso			    (io_apic_address[0] & PG_FRAME)) {
436166901Spiso				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
437166034Smarius				break;
438107477Stmm			}
43990618Stmm			/* use this slot if available */
44090618Stmm			if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
44190618Stmm				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW |
44290618Stmm				    pgeflag | (io_apic_address[i] & PG_FRAME));
44390618Stmm				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
44490618Stmm				break;
44590618Stmm			}
44690618Stmm		}
44790618Stmm		if (j == 16)
448152684Smarius			panic("no space to map IO apic %d!", i);
44990618Stmm	}
450146391Smarius
451146391Smarius	/* BSP does this itself, AP's get it pre-set */
452146391Smarius	prv_CMAP1 = &SMP_prvpt[3 + UPAGES];
453146391Smarius	prv_CMAP2 = &SMP_prvpt[4 + UPAGES];
454146391Smarius	prv_CMAP3 = &SMP_prvpt[5 + UPAGES];
455146391Smarius	prv_PMAP1 = &SMP_prvpt[6 + UPAGES];
456146391Smarius#endif
457146391Smarius
458146391Smarius	invltlb();
459146391Smarius
460146391Smarius}
461146391Smarius
462152684Smarius/*
463152684Smarius * Set 4mb pdir for mp startup, and global flags
464152684Smarius */
465152684Smariusvoid
466152684Smariuspmap_set_opt(unsigned *pdir) {
467152684Smarius	int i;
468152684Smarius
469152684Smarius	if (pseflag && (cpu_feature & CPUID_PSE)) {
470152684Smarius		load_cr4(rcr4() | CR4_PSE);
47190618Stmm		if (pdir4mb) {
47290618Stmm			pdir[KPTDI] = pdir4mb;
473145186Smarius		}
47490618Stmm	}
47590618Stmm
47690618Stmm	if (pgeflag && (cpu_feature & CPUID_PGE)) {
477152684Smarius		load_cr4(rcr4() | CR4_PGE);
47890618Stmm		for(i = KPTDI; i < KPTDI + nkpt; i++) {
47990618Stmm			if (pdir[i]) {
48090618Stmm				pdir[i] |= PG_G;
48190618Stmm			}
48290618Stmm		}
48390618Stmm	}
484111119Simp}
485152684Smarius
486152684Smarius/*
48790618Stmm * Setup the PTD for the boot processor
488152684Smarius */
48990618Stmmvoid
49090618Stmmpmap_set_opt_bsp(void)
49190618Stmm{
49290618Stmm	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
493152684Smarius	pmap_set_opt((unsigned *)PTD);
494152684Smarius	invltlb();
495152684Smarius}
496152684Smarius
497152684Smarius/*
49890618Stmm *	Initialize the pmap module.
49990618Stmm *	Called by vm_init, to initialize any structures that the pmap
50090618Stmm *	system needs to map virtual memory.
50190618Stmm *	pmap_init has been enhanced to support in a fairly consistant
50290618Stmm *	way, discontiguous physical memory.
50390618Stmm */
50490618Stmmvoid
50590618Stmmpmap_init(phys_start, phys_end)
50690618Stmm	vm_offset_t phys_start, phys_end;
507152684Smarius{
508152684Smarius	vm_offset_t addr;
509152684Smarius	vm_size_t s;
510152684Smarius	int i;
511152684Smarius	int initial_pvs;
512152684Smarius
51390618Stmm	/*
51490618Stmm	 * object for kernel page table pages
51590618Stmm	 */
51690618Stmm	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
51790618Stmm
51890618Stmm	/*
51990618Stmm	 * calculate the number of pv_entries needed
52090618Stmm	 */
52190618Stmm	vm_first_phys = phys_avail[0];
52290618Stmm	for (i = 0; phys_avail[i + 1]; i += 2);
523145185Smarius	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
52490618Stmm
525145185Smarius	/*
526145185Smarius	 * Allocate memory for random pmap data structures.  Includes the
52790618Stmm	 * pv_head_table.
52890618Stmm	 */
52990618Stmm	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
53090618Stmm	s = round_page(s);
531145185Smarius
53290618Stmm	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
53390618Stmm	pv_table = (pv_table_t *) addr;
534107477Stmm	for(i = 0; i < pv_npg; i++) {
53590618Stmm		vm_offset_t pa;
53690618Stmm		TAILQ_INIT(&pv_table[i].pv_list);
537146391Smarius		pv_table[i].pv_list_count = 0;
53890618Stmm		pa = vm_first_phys + i * PAGE_SIZE;
53990618Stmm		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
54090618Stmm	}
54190618Stmm
54290618Stmm	/*
54390618Stmm	 * init the pv free list
54490618Stmm	 */
54590618Stmm	initial_pvs = pv_npg;
54690618Stmm	if (initial_pvs < MINPV)
54790618Stmm		initial_pvs = MINPV;
548145186Smarius	pvzone = &pvzone_store;
549145186Smarius	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
550145186Smarius		initial_pvs * sizeof (struct pv_entry));
55190618Stmm	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, pv_npg);
55290618Stmm
553152684Smarius	/*
554152684Smarius	 * Now it is safe to enable pv_table recording.
555152684Smarius	 */
556152684Smarius	pmap_initialized = TRUE;
55790618Stmm}
55890618Stmm
55990618Stmm/*
56090618Stmm * Initialize the address space (zone) for the pv_entries.  Set a
56190618Stmm * high water mark so that the system can recover from excessive
56290618Stmm * numbers of pv entries.
56390618Stmm */
564152684Smariusvoid
56590618Stmmpmap_init2() {
56690618Stmm	pv_entry_max = PMAP_SHPGPERPROC * maxproc + pv_npg;
56790618Stmm	pv_entry_high_water = 9 * (pv_entry_max / 10);
56890618Stmm	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
56990618Stmm}
57090618Stmm
57190618Stmm/*
57290618Stmm *	Used to map a range of physical addresses into kernel
57390618Stmm *	virtual address space.
574152684Smarius *
57590618Stmm *	For now, VM is already on, we only need to map the
57690618Stmm *	specified memory.
57790618Stmm */
57890618Stmmvm_offset_t
57990618Stmmpmap_map(virt, start, end, prot)
58090618Stmm	vm_offset_t virt;
58190618Stmm	vm_offset_t start;
582152684Smarius	vm_offset_t end;
58390618Stmm	int prot;
584152684Smarius{
585152684Smarius	while (start < end) {
586152684Smarius		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
587145186Smarius		virt += PAGE_SIZE;
588152684Smarius		start += PAGE_SIZE;
58990618Stmm	}
59090618Stmm	return (virt);
59190618Stmm}
59290618Stmm
59390618Stmm
594146391Smarius/***************************************************
59590618Stmm * Low level helper routines.....
59690618Stmm ***************************************************/
597146391Smarius
59890618Stmm#if defined(PMAP_DIAGNOSTIC)
59990618Stmm
60090618Stmm/*
60190618Stmm * This code checks for non-writeable/modified pages.
60290618Stmm * This should be an invalid condition.
60390618Stmm */
60490618Stmmstatic int
605145186Smariuspmap_nw_modified(pt_entry_t ptea) {
60690618Stmm	int pte;
607146391Smarius
608146391Smarius	pte = (int) ptea;
609146391Smarius
61090618Stmm	if ((pte & (PG_M|PG_RW)) == PG_M)
61190618Stmm		return 1;
61290618Stmm	else
61390618Stmm		return 0;
61490618Stmm}
61590618Stmm#endif
616146391Smarius
61790618Stmm
61890618Stmm/*
61990618Stmm * this routine defines the region(s) of memory that should
62090618Stmm * not be tested for the modified bit.
62190618Stmm */
62290618Stmmstatic PMAP_INLINE int
62390618Stmmpmap_track_modified( vm_offset_t va) {
62490618Stmm	if ((va < clean_sva) || (va >= clean_eva))
62590618Stmm		return 1;
62690618Stmm	else
62790618Stmm		return 0;
62890618Stmm}
629166901Spiso
63090618Stmmstatic PMAP_INLINE void
63190618Stmminvltlb_1pg( vm_offset_t va) {
63290618Stmm#if defined(I386_CPU)
63390618Stmm	if (cpu_class == CPUCLASS_386) {
63490618Stmm		invltlb();
63590618Stmm	} else
63690618Stmm#endif
637166901Spiso	{
63890618Stmm		invlpg(va);
63990618Stmm	}
64090618Stmm}
641145185Smarius
642166901Spisostatic __inline void
64390618Stmmpmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
64490618Stmm{
64590618Stmm#if defined(SMP)
64690618Stmm	if (pmap->pm_active & (1 << cpuid))
64790618Stmm		cpu_invlpg((void *)va);
64890618Stmm	if (pmap->pm_active & other_cpus)
64990618Stmm		smp_invltlb();
650159413Smarius#else
65190618Stmm	if (pmap->pm_active)
652166901Spiso		invltlb_1pg(va);
653166901Spiso#endif
654145185Smarius}
65590618Stmm
65690618Stmmstatic __inline void
657159413Smariuspmap_TLB_invalidate_all(pmap_t pmap)
65890618Stmm{
65990618Stmm#if defined(SMP)
660159413Smarius	if (pmap->pm_active & (1 << cpuid))
661107474Stmm		cpu_invltlb();
662107477Stmm	if (pmap->pm_active & other_cpus)
663107474Stmm		smp_invltlb();
664145185Smarius#else
665107474Stmm	if (pmap->pm_active)
666107474Stmm		invltlb();
667107474Stmm#endif
668107474Stmm}
669107474Stmm
670107474Stmmstatic unsigned *
671107474Stmmget_ptbase(pmap)
672146391Smarius	pmap_t pmap;
673107474Stmm{
674107474Stmm	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
675107474Stmm
676146391Smarius	/* are we current address space or kernel? */
677107474Stmm	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
678159413Smarius		return (unsigned *) PTmap;
679159413Smarius	}
680107474Stmm	/* otherwise, we are alternate address space */
681159413Smarius	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
682107474Stmm		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
683107474Stmm#if defined(SMP)
684107474Stmm		/* The page directory is not shared between CPUs */
685107474Stmm		cpu_invltlb();
686107474Stmm#else
687145185Smarius		invltlb();
688107474Stmm#endif
68990618Stmm	}
69090618Stmm	return (unsigned *) APTmap;
69190618Stmm}
692166901Spiso
69390618Stmm/*
69490618Stmm * Super fast pmap_pte routine best used when scanning
695159413Smarius * the pv lists.  This eliminates many coarse-grained
696166901Spiso * invltlb calls.  Note that many of the pv list
697166901Spiso * scans are across different pmaps.  It is very wasteful
698166901Spiso * to do an entire invltlb for checking a single mapping.
699166901Spiso */
700166901Spiso
701166901Spisostatic unsigned *
702166901Spisopmap_pte_quick(pmap, va)
70390618Stmm	register pmap_t pmap;
70490618Stmm	vm_offset_t va;
70590618Stmm{
70690618Stmm	unsigned pde, newpf;
70790618Stmm	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
70890618Stmm		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
70990618Stmm		unsigned index = i386_btop(va);
71090618Stmm		/* are we current address space or kernel? */
71190618Stmm		if ((pmap == kernel_pmap) ||
71290618Stmm			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
71390618Stmm			return (unsigned *) PTmap + index;
714107474Stmm		}
71590618Stmm		newpf = pde & PG_FRAME;
716107477Stmm#ifdef SMP
717107477Stmm		if ( ((* (unsigned *) prv_PMAP1) & PG_FRAME) != newpf) {
71890618Stmm			* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
719107477Stmm			cpu_invlpg(&prv_PPAGE1);
72090618Stmm		}
72190618Stmm		return prv_PPAGE1 + ((unsigned) index & (NPTEPG - 1));
72290618Stmm#else
72390618Stmm		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
724152684Smarius			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
725152684Smarius			invltlb_1pg((vm_offset_t) PADDR1);
72690618Stmm		}
72790618Stmm		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
72890618Stmm#endif
72990618Stmm	}
73090618Stmm	return (0);
73190618Stmm}
73290618Stmm
73390618Stmm/*
734145185Smarius *	Routine:	pmap_extract
73590618Stmm *	Function:
73690618Stmm *		Extract the physical page address associated
73790618Stmm *		with the given map/virtual_address pair.
73890618Stmm */
73990618Stmmvm_offset_t
74090618Stmmpmap_extract(pmap, va)
74190618Stmm	register pmap_t pmap;
74290618Stmm	vm_offset_t va;
74390618Stmm{
74490618Stmm	vm_offset_t rtval;
74590618Stmm	vm_offset_t pdirindex;
74690618Stmm	pdirindex = va >> PDRSHIFT;
74790618Stmm	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
74890618Stmm		unsigned *pte;
74990618Stmm		if ((rtval & PG_PS) != 0) {
75090618Stmm			rtval &= ~(NBPDR - 1);
751145186Smarius			rtval |= va & (NBPDR - 1);
75290618Stmm			return rtval;
75390618Stmm		}
75490618Stmm		pte = get_ptbase(pmap) + i386_btop(va);
755145186Smarius		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
756145186Smarius		return rtval;
75790618Stmm	}
758145185Smarius	return 0;
759145185Smarius
760145186Smarius}
761145186Smarius
762145186Smarius/*
763145186Smarius * determine if a page is managed (memory vs. device)
76490618Stmm */
76590618Stmmstatic PMAP_INLINE int
766145186Smariuspmap_is_managed(pa)
767145186Smarius	vm_offset_t pa;
76890618Stmm{
769145186Smarius	int i;
770145186Smarius
771145186Smarius	if (!pmap_initialized)
772145186Smarius		return 0;
773145186Smarius
774145186Smarius	for (i = 0; phys_avail[i + 1]; i += 2) {
775145186Smarius		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
776145186Smarius			return 1;
777145186Smarius	}
778145186Smarius	return 0;
779145186Smarius}
780145186Smarius
78190618Stmm
78290618Stmm/***************************************************
783145186Smarius * Low level mapping routines.....
784145186Smarius ***************************************************/
785145186Smarius
786145186Smarius/*
78790618Stmm * add a wired page to the kva
788145186Smarius * note that in order for the mapping to take effect -- you
78990618Stmm * should do a invltlb after doing the pmap_kenter...
79090618Stmm */
79190618StmmPMAP_INLINE void
79290618Stmmpmap_kenter(va, pa)
79390618Stmm	vm_offset_t va;
79490618Stmm	register vm_offset_t pa;
79590618Stmm{
79690618Stmm	register unsigned *pte;
79790618Stmm	unsigned npte, opte;
79890618Stmm
79990618Stmm	npte = pa | PG_RW | PG_V | pgeflag;
800159413Smarius	pte = (unsigned *)vtopte(va);
80190618Stmm	opte = *pte;
802159413Smarius	*pte = npte;
80390618Stmm	if (opte)
80490618Stmm		invltlb_1pg(va);
80590618Stmm}
80690618Stmm
80790618Stmm/*
80890618Stmm * remove a page from the kernel pagetables
809157896Simp */
81090618StmmPMAP_INLINE void
81190618Stmmpmap_kremove(va)
81290618Stmm	vm_offset_t va;
81390618Stmm{
81490618Stmm	register unsigned *pte;
81590618Stmm
81690618Stmm	pte = (unsigned *)vtopte(va);
81790618Stmm	*pte = 0;
818145186Smarius	invltlb_1pg(va);
819145186Smarius}
820145186Smarius
82190618Stmm/*
82290618Stmm * Add a list of wired pages to the kva
82390618Stmm * this routine is only used for temporary
82490618Stmm * kernel mappings that do not need to have
82590618Stmm * page modification or references recorded.
82690618Stmm * Note that old mappings are simply written
82790618Stmm * over.  The page *must* be wired.
82890618Stmm */
82990618Stmmvoid
830159413Smariuspmap_qenter(va, m, count)
831159413Smarius	vm_offset_t va;
83290618Stmm	vm_page_t *m;
833108798Stmm	int count;
834108798Stmm{
835108798Stmm	int i;
836108798Stmm
837159413Smarius	for (i = 0; i < count; i++) {
838159413Smarius		vm_offset_t tva = va + i * PAGE_SIZE;
839159413Smarius		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
840159413Smarius	}
841159413Smarius}
842159413Smarius
843159413Smarius/*
844159413Smarius * this routine jerks page mappings from the
845159413Smarius * kernel -- it is meant only for temporary mappings.
846159413Smarius */
847159413Smariusvoid
84890618Stmmpmap_qremove(va, count)
84990618Stmm	vm_offset_t va;
85090618Stmm	int count;
85190618Stmm{
85290618Stmm	int i;
85390618Stmm
85490618Stmm	for (i = 0; i < count; i++) {
85590618Stmm		pmap_kremove(va);
856108798Stmm		va += PAGE_SIZE;
857108798Stmm	}
858108798Stmm}
859108798Stmm
860159413Smariusstatic vm_page_t
861159413Smariuspmap_page_lookup(object, pindex)
862159413Smarius	vm_object_t object;
863159413Smarius	vm_pindex_t pindex;
86490618Stmm{
86590618Stmm	vm_page_t m;
86690618Stmmretry:
86790618Stmm	m = vm_page_lookup(object, pindex);
86890618Stmm	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
86990618Stmm		goto retry;
87090618Stmm	return m;
871145186Smarius}
872108798Stmm
873145186Smarius/*
87490618Stmm * Create the UPAGES for a new process.
875145186Smarius * This routine directly affects the fork perf for a process.
876145186Smarius */
87790618Stmmvoid
878145186Smariuspmap_new_proc(p)
879145186Smarius	struct proc *p;
880145186Smarius{
881108798Stmm	int i, updateneeded;
882108798Stmm	vm_object_t upobj;
88390618Stmm	vm_page_t m;
884145186Smarius	struct user *up;
885145186Smarius	unsigned *ptek, oldpte;
886108798Stmm
887145186Smarius	/*
888108798Stmm	 * allocate object for the upages
889145185Smarius	 */
890108798Stmm	if ((upobj = p->p_upages_obj) == NULL) {
891145185Smarius		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
892108798Stmm		p->p_upages_obj = upobj;
893108798Stmm	}
89490618Stmm
89590618Stmm	/* get a kernel virtual address for the UPAGES for this proc */
896167308Smarius	if ((up = p->p_addr) == NULL) {
897167308Smarius		up = (struct user *) kmem_alloc_pageable(kernel_map,
898167308Smarius				UPAGES * PAGE_SIZE);
899167308Smarius#if !defined(MAX_PERF)
900167308Smarius		if (up == NULL)
901167308Smarius			panic("pmap_new_proc: u_map allocation failed");
902167308Smarius#endif
903167308Smarius		p->p_addr = up;
904167308Smarius	}
905152684Smarius
906152684Smarius	ptek = (unsigned *) vtopte((vm_offset_t) up);
907152684Smarius
908152684Smarius	updateneeded = 0;
909152684Smarius	for(i=0;i<UPAGES;i++) {
910152684Smarius		/*
911152684Smarius		 * Get a kernel stack page
912152684Smarius		 */
913152684Smarius		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
91490618Stmm
91590618Stmm		/*
91690618Stmm		 * Wire the page
91790618Stmm		 */
91890618Stmm		m->wire_count++;
91990618Stmm		cnt.v_wire_count++;
92090618Stmm
92190618Stmm		oldpte = *(ptek + i);
922166901Spiso		/*
92390618Stmm		 * Enter the page into the kernel address space.
92490618Stmm		 */
92590618Stmm		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
92690618Stmm		if (oldpte) {
92790618Stmm			if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386)) {
928166901Spiso				invlpg((vm_offset_t) up + i * PAGE_SIZE);
92990618Stmm			} else {
93090618Stmm				updateneeded = 1;
93190618Stmm			}
932166901Spiso		}
93390618Stmm
93490618Stmm		vm_page_wakeup(m);
93590618Stmm		vm_page_flag_clear(m, PG_ZERO);
93690618Stmm		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
93790618Stmm		m->valid = VM_PAGE_BITS_ALL;
938166901Spiso	}
93990618Stmm	if (updateneeded)
94090618Stmm		invltlb();
94190618Stmm}
94290618Stmm
94390618Stmm/*
94490618Stmm * Dispose the UPAGES for a process that has exited.
94590618Stmm * This routine directly impacts the exit perf of a process.
94690618Stmm */
94790618Stmmvoid
94890618Stmmpmap_dispose_proc(p)
949145185Smarius	struct proc *p;
95090618Stmm{
951108815Stmm	int i;
952108815Stmm	vm_object_t upobj;
953108815Stmm	vm_page_t m;
95490618Stmm	unsigned *ptek, oldpte;
95590618Stmm
956133589Smarius	upobj = p->p_upages_obj;
957152684Smarius
958152684Smarius	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
959133589Smarius	for(i=0;i<UPAGES;i++) {
960152684Smarius
961133589Smarius		if ((m = vm_page_lookup(upobj, i)) == NULL)
962152684Smarius			panic("pmap_dispose_proc: upage already missing???");
963152684Smarius
964152684Smarius		vm_page_busy(m);
965152684Smarius
966152684Smarius		oldpte = *(ptek + i);
967152684Smarius		*(ptek + i) = 0;
968133589Smarius		if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386))
969			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
970		vm_page_unwire(m, 0);
971		vm_page_free(m);
972	}
973
974	if (cpu_class <= CPUCLASS_386)
975		invltlb();
976}
977
978/*
979 * Allow the UPAGES for a process to be prejudicially paged out.
980 */
981void
982pmap_swapout_proc(p)
983	struct proc *p;
984{
985	int i;
986	vm_object_t upobj;
987	vm_page_t m;
988
989	upobj = p->p_upages_obj;
990	/*
991	 * let the upages be paged
992	 */
993	for(i=0;i<UPAGES;i++) {
994		if ((m = vm_page_lookup(upobj, i)) == NULL)
995			panic("pmap_swapout_proc: upage already missing???");
996		vm_page_dirty(m);
997		vm_page_unwire(m, 0);
998		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
999	}
1000}
1001
1002/*
1003 * Bring the UPAGES for a specified process back in.
1004 */
1005void
1006pmap_swapin_proc(p)
1007	struct proc *p;
1008{
1009	int i,rv;
1010	vm_object_t upobj;
1011	vm_page_t m;
1012
1013	upobj = p->p_upages_obj;
1014	for(i=0;i<UPAGES;i++) {
1015
1016		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1017
1018		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
1019			VM_PAGE_TO_PHYS(m));
1020
1021		if (m->valid != VM_PAGE_BITS_ALL) {
1022			rv = vm_pager_get_pages(upobj, &m, 1, 0);
1023#if !defined(MAX_PERF)
1024			if (rv != VM_PAGER_OK)
1025				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1026#endif
1027			m = vm_page_lookup(upobj, i);
1028			m->valid = VM_PAGE_BITS_ALL;
1029		}
1030
1031		vm_page_wire(m);
1032		vm_page_wakeup(m);
1033		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
1034	}
1035}
1036
1037/***************************************************
1038 * Page table page management routines.....
1039 ***************************************************/
1040
1041/*
1042 * This routine unholds page table pages, and if the hold count
1043 * drops to zero, then it decrements the wire count.
1044 */
1045static int
1046_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1047
1048	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
1049		;
1050
1051	if (m->hold_count == 0) {
1052		vm_offset_t pteva;
1053		/*
1054		 * unmap the page table page
1055		 */
1056		pmap->pm_pdir[m->pindex] = 0;
1057		--pmap->pm_stats.resident_count;
1058		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1059			(((unsigned) PTDpde) & PG_FRAME)) {
1060			/*
1061			 * Do a invltlb to make the invalidated mapping
1062			 * take effect immediately.
1063			 */
1064			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1065			pmap_TLB_invalidate(pmap, pteva);
1066		}
1067
1068		if (pmap->pm_ptphint == m)
1069			pmap->pm_ptphint = NULL;
1070
1071		/*
1072		 * If the page is finally unwired, simply free it.
1073		 */
1074		--m->wire_count;
1075		if (m->wire_count == 0) {
1076
1077			vm_page_flash(m);
1078			vm_page_busy(m);
1079			vm_page_free_zero(m);
1080			--cnt.v_wire_count;
1081		}
1082		return 1;
1083	}
1084	return 0;
1085}
1086
1087static PMAP_INLINE int
1088pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1089	vm_page_unhold(m);
1090	if (m->hold_count == 0)
1091		return _pmap_unwire_pte_hold(pmap, m);
1092	else
1093		return 0;
1094}
1095
1096/*
1097 * After removing a page table entry, this routine is used to
1098 * conditionally free the page, and manage the hold/wire counts.
1099 */
1100static int
1101pmap_unuse_pt(pmap, va, mpte)
1102	pmap_t pmap;
1103	vm_offset_t va;
1104	vm_page_t mpte;
1105{
1106	unsigned ptepindex;
1107	if (va >= UPT_MIN_ADDRESS)
1108		return 0;
1109
1110	if (mpte == NULL) {
1111		ptepindex = (va >> PDRSHIFT);
1112		if (pmap->pm_ptphint &&
1113			(pmap->pm_ptphint->pindex == ptepindex)) {
1114			mpte = pmap->pm_ptphint;
1115		} else {
1116			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1117			pmap->pm_ptphint = mpte;
1118		}
1119	}
1120
1121	return pmap_unwire_pte_hold(pmap, mpte);
1122}
1123
1124#if !defined(SMP)
1125void
1126pmap_pinit0(pmap)
1127	struct pmap *pmap;
1128{
1129	pmap->pm_pdir =
1130		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1131	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
1132	pmap->pm_count = 1;
1133	pmap->pm_active = 0;
1134	pmap->pm_ptphint = NULL;
1135	TAILQ_INIT(&pmap->pm_pvlist);
1136	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1137}
1138#else
1139void
1140pmap_pinit0(pmap)
1141	struct pmap *pmap;
1142{
1143	pmap_pinit(pmap);
1144}
1145#endif
1146
1147/*
1148 * Initialize a preallocated and zeroed pmap structure,
1149 * such as one in a vmspace structure.
1150 */
1151void
1152pmap_pinit(pmap)
1153	register struct pmap *pmap;
1154{
1155	vm_page_t ptdpg;
1156
1157	/*
1158	 * No need to allocate page table space yet but we do need a valid
1159	 * page directory table.
1160	 */
1161	if (pmap->pm_pdir == NULL)
1162		pmap->pm_pdir =
1163			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1164
1165	/*
1166	 * allocate object for the ptes
1167	 */
1168	if (pmap->pm_pteobj == NULL)
1169		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1170
1171	/*
1172	 * allocate the page directory page
1173	 */
1174	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
1175			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1176
1177	ptdpg->wire_count = 1;
1178	++cnt.v_wire_count;
1179
1180
1181	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
1182	ptdpg->valid = VM_PAGE_BITS_ALL;
1183
1184	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1185	if ((ptdpg->flags & PG_ZERO) == 0)
1186		bzero(pmap->pm_pdir, PAGE_SIZE);
1187
1188	/* wire in kernel global address entries */
1189	/* XXX copies current process, does not fill in MPPTDI */
1190	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1191
1192	/* install self-referential address mapping entry */
1193	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1194		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1195
1196	pmap->pm_count = 1;
1197	pmap->pm_active = 0;
1198	pmap->pm_ptphint = NULL;
1199	TAILQ_INIT(&pmap->pm_pvlist);
1200	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1201}
1202
1203static int
1204pmap_release_free_page(pmap, p)
1205	struct pmap *pmap;
1206	vm_page_t p;
1207{
1208	unsigned *pde = (unsigned *) pmap->pm_pdir;
1209	/*
1210	 * This code optimizes the case of freeing non-busy
1211	 * page-table pages.  Those pages are zero now, and
1212	 * might as well be placed directly into the zero queue.
1213	 */
1214	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
1215		return 0;
1216
1217	vm_page_busy(p);
1218
1219	/*
1220	 * Remove the page table page from the processes address space.
1221	 */
1222	pde[p->pindex] = 0;
1223	pmap->pm_stats.resident_count--;
1224
1225#if !defined(MAX_PERF)
1226	if (p->hold_count)  {
1227		panic("pmap_release: freeing held page table page");
1228	}
1229#endif
1230	/*
1231	 * Page directory pages need to have the kernel
1232	 * stuff cleared, so they can go into the zero queue also.
1233	 */
1234	if (p->pindex == PTDPTDI) {
1235		bzero(pde + KPTDI, nkpt * PTESIZE);
1236#ifdef SMP
1237		pde[MPPTDI] = 0;
1238#endif
1239		pde[APTDPTDI] = 0;
1240		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1241	}
1242
1243	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1244		pmap->pm_ptphint = NULL;
1245
1246	p->wire_count--;
1247	cnt.v_wire_count--;
1248	vm_page_free_zero(p);
1249	return 1;
1250}
1251
1252/*
1253 * this routine is called if the page table page is not
1254 * mapped correctly.
1255 */
1256static vm_page_t
1257_pmap_allocpte(pmap, ptepindex)
1258	pmap_t	pmap;
1259	unsigned ptepindex;
1260{
1261	vm_offset_t pteva, ptepa;
1262	vm_page_t m;
1263
1264	/*
1265	 * Find or fabricate a new pagetable page
1266	 */
1267	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1268			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
1269
1270	if (m->queue != PQ_NONE) {
1271		int s = splvm();
1272		vm_page_unqueue(m);
1273		splx(s);
1274	}
1275
1276	if (m->wire_count == 0)
1277		cnt.v_wire_count++;
1278	m->wire_count++;
1279
1280	/*
1281	 * Increment the hold count for the page table page
1282	 * (denoting a new mapping.)
1283	 */
1284	m->hold_count++;
1285
1286	/*
1287	 * Map the pagetable page into the process address space, if
1288	 * it isn't already there.
1289	 */
1290
1291	pmap->pm_stats.resident_count++;
1292
1293	ptepa = VM_PAGE_TO_PHYS(m);
1294	pmap->pm_pdir[ptepindex] =
1295		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
1296
1297	/*
1298	 * Set the page table hint
1299	 */
1300	pmap->pm_ptphint = m;
1301
1302	/*
1303	 * Try to use the new mapping, but if we cannot, then
1304	 * do it with the routine that maps the page explicitly.
1305	 */
1306	if ((m->flags & PG_ZERO) == 0) {
1307		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1308			(((unsigned) PTDpde) & PG_FRAME)) {
1309			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1310			bzero((caddr_t) pteva, PAGE_SIZE);
1311		} else {
1312			pmap_zero_page(ptepa);
1313		}
1314	}
1315
1316	m->valid = VM_PAGE_BITS_ALL;
1317	vm_page_flag_clear(m, PG_ZERO);
1318	vm_page_flag_set(m, PG_MAPPED);
1319	vm_page_wakeup(m);
1320
1321	return m;
1322}
1323
1324static vm_page_t
1325pmap_allocpte(pmap, va)
1326	pmap_t	pmap;
1327	vm_offset_t va;
1328{
1329	unsigned ptepindex;
1330	vm_offset_t ptepa;
1331	vm_page_t m;
1332
1333	/*
1334	 * Calculate pagetable page index
1335	 */
1336	ptepindex = va >> PDRSHIFT;
1337
1338	/*
1339	 * Get the page directory entry
1340	 */
1341	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1342
1343	/*
1344	 * This supports switching from a 4MB page to a
1345	 * normal 4K page.
1346	 */
1347	if (ptepa & PG_PS) {
1348		pmap->pm_pdir[ptepindex] = 0;
1349		ptepa = 0;
1350		invltlb();
1351	}
1352
1353	/*
1354	 * If the page table page is mapped, we just increment the
1355	 * hold count, and activate it.
1356	 */
1357	if (ptepa) {
1358		/*
1359		 * In order to get the page table page, try the
1360		 * hint first.
1361		 */
1362		if (pmap->pm_ptphint &&
1363			(pmap->pm_ptphint->pindex == ptepindex)) {
1364			m = pmap->pm_ptphint;
1365		} else {
1366			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1367			pmap->pm_ptphint = m;
1368		}
1369		m->hold_count++;
1370		return m;
1371	}
1372	/*
1373	 * Here if the pte page isn't mapped, or if it has been deallocated.
1374	 */
1375	return _pmap_allocpte(pmap, ptepindex);
1376}
1377
1378
1379/***************************************************
1380* Pmap allocation/deallocation routines.
1381 ***************************************************/
1382
1383/*
1384 * Release any resources held by the given physical map.
1385 * Called when a pmap initialized by pmap_pinit is being released.
1386 * Should only be called if the map contains no valid mappings.
1387 */
1388void
1389pmap_release(pmap)
1390	register struct pmap *pmap;
1391{
1392	vm_page_t p,n,ptdpg;
1393	vm_object_t object = pmap->pm_pteobj;
1394	int curgeneration;
1395
1396#if defined(DIAGNOSTIC)
1397	if (object->ref_count != 1)
1398		panic("pmap_release: pteobj reference count != 1");
1399#endif
1400
1401	ptdpg = NULL;
1402retry:
1403	curgeneration = object->generation;
1404	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1405		n = TAILQ_NEXT(p, listq);
1406		if (p->pindex == PTDPTDI) {
1407			ptdpg = p;
1408			continue;
1409		}
1410		while (1) {
1411			if (!pmap_release_free_page(pmap, p) &&
1412				(object->generation != curgeneration))
1413				goto retry;
1414		}
1415	}
1416
1417	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1418		goto retry;
1419}
1420
1421/*
1422 * grow the number of kernel page table entries, if needed
1423 */
1424void
1425pmap_growkernel(vm_offset_t addr)
1426{
1427	struct proc *p;
1428	struct pmap *pmap;
1429	int s;
1430	vm_offset_t ptppaddr;
1431	vm_page_t nkpg;
1432#ifdef SMP
1433	int i;
1434#endif
1435	pd_entry_t newpdir;
1436
1437	s = splhigh();
1438	if (kernel_vm_end == 0) {
1439		kernel_vm_end = KERNBASE;
1440		nkpt = 0;
1441		while (pdir_pde(PTD, kernel_vm_end)) {
1442			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1443			nkpt++;
1444		}
1445	}
1446	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1447	while (kernel_vm_end < addr) {
1448		if (pdir_pde(PTD, kernel_vm_end)) {
1449			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1450			continue;
1451		}
1452
1453		/*
1454		 * This index is bogus, but out of the way
1455		 */
1456		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
1457#if !defined(MAX_PERF)
1458		if (!nkpg)
1459			panic("pmap_growkernel: no memory to grow kernel");
1460#endif
1461
1462		nkpt++;
1463
1464		vm_page_wire(nkpg);
1465		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1466		pmap_zero_page(ptppaddr);
1467		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1468		pdir_pde(PTD, kernel_vm_end) = newpdir;
1469
1470#ifdef SMP
1471		for (i = 0; i < mp_ncpus; i++) {
1472			if (IdlePTDS[i])
1473				pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir;
1474		}
1475#endif
1476
1477		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1478			if (p->p_vmspace) {
1479				pmap = vmspace_pmap(p->p_vmspace);
1480				*pmap_pde(pmap, kernel_vm_end) = newpdir;
1481			}
1482		}
1483		if (aiovmspace != NULL) {
1484			pmap = vmspace_pmap(aiovmspace);
1485			*pmap_pde(pmap, kernel_vm_end) = newpdir;
1486		}
1487		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
1488		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1489	}
1490	splx(s);
1491}
1492
1493/*
1494 *	Retire the given physical map from service.
1495 *	Should only be called if the map contains
1496 *	no valid mappings.
1497 */
1498void
1499pmap_destroy(pmap)
1500	register pmap_t pmap;
1501{
1502	int count;
1503
1504	if (pmap == NULL)
1505		return;
1506
1507	count = --pmap->pm_count;
1508	if (count == 0) {
1509		pmap_release(pmap);
1510#if !defined(MAX_PERF)
1511		panic("destroying a pmap is not yet implemented");
1512#endif
1513	}
1514}
1515
1516/*
1517 *	Add a reference to the specified pmap.
1518 */
1519void
1520pmap_reference(pmap)
1521	pmap_t pmap;
1522{
1523	if (pmap != NULL) {
1524		pmap->pm_count++;
1525	}
1526}
1527
1528/***************************************************
1529* page management routines.
1530 ***************************************************/
1531
1532/*
1533 * free the pv_entry back to the free list
1534 */
1535static PMAP_INLINE void
1536free_pv_entry(pv)
1537	pv_entry_t pv;
1538{
1539	pv_entry_count--;
1540	zfreei(pvzone, pv);
1541}
1542
1543/*
1544 * get a new pv_entry, allocating a block from the system
1545 * when needed.
1546 * the memory allocation is performed bypassing the malloc code
1547 * because of the possibility of allocations at interrupt time.
1548 */
1549static pv_entry_t
1550get_pv_entry(void)
1551{
1552	pv_entry_count++;
1553	if (pv_entry_high_water &&
1554		(pv_entry_count > pv_entry_high_water) &&
1555		(pmap_pagedaemon_waken == 0)) {
1556		pmap_pagedaemon_waken = 1;
1557		wakeup (&vm_pages_needed);
1558	}
1559	return zalloci(pvzone);
1560}
1561
1562/*
1563 * This routine is very drastic, but can save the system
1564 * in a pinch.
1565 */
1566void
1567pmap_collect() {
1568	pv_table_t *ppv;
1569	int i;
1570	vm_offset_t pa;
1571	vm_page_t m;
1572	static int warningdone=0;
1573
1574	if (pmap_pagedaemon_waken == 0)
1575		return;
1576
1577	if (warningdone < 5) {
1578		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1579		warningdone++;
1580	}
1581
1582	for(i = 0; i < pv_npg; i++) {
1583		if ((ppv = &pv_table[i]) == 0)
1584			continue;
1585		m = ppv->pv_vm_page;
1586		if ((pa = VM_PAGE_TO_PHYS(m)) == 0)
1587			continue;
1588		if (m->wire_count || m->hold_count || m->busy ||
1589			(m->flags & PG_BUSY))
1590			continue;
1591		pmap_remove_all(pa);
1592	}
1593	pmap_pagedaemon_waken = 0;
1594}
1595
1596
1597/*
1598 * If it is the first entry on the list, it is actually
1599 * in the header and we must copy the following entry up
1600 * to the header.  Otherwise we must search the list for
1601 * the entry.  In either case we free the now unused entry.
1602 */
1603
1604static int
1605pmap_remove_entry(pmap, ppv, va)
1606	struct pmap *pmap;
1607	pv_table_t *ppv;
1608	vm_offset_t va;
1609{
1610	pv_entry_t pv;
1611	int rtval;
1612	int s;
1613
1614	s = splvm();
1615	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1616		for (pv = TAILQ_FIRST(&ppv->pv_list);
1617			pv;
1618			pv = TAILQ_NEXT(pv, pv_list)) {
1619			if (pmap == pv->pv_pmap && va == pv->pv_va)
1620				break;
1621		}
1622	} else {
1623		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1624			pv;
1625			pv = TAILQ_NEXT(pv, pv_plist)) {
1626			if (va == pv->pv_va)
1627				break;
1628		}
1629	}
1630
1631	rtval = 0;
1632	if (pv) {
1633
1634		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1635		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1636		ppv->pv_list_count--;
1637		if (TAILQ_FIRST(&ppv->pv_list) == NULL)
1638			vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
1639
1640		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1641		free_pv_entry(pv);
1642	}
1643
1644	splx(s);
1645	return rtval;
1646}
1647
1648/*
1649 * Create a pv entry for page at pa for
1650 * (pmap, va).
1651 */
1652static void
1653pmap_insert_entry(pmap, va, mpte, pa)
1654	pmap_t pmap;
1655	vm_offset_t va;
1656	vm_page_t mpte;
1657	vm_offset_t pa;
1658{
1659
1660	int s;
1661	pv_entry_t pv;
1662	pv_table_t *ppv;
1663
1664	s = splvm();
1665	pv = get_pv_entry();
1666	pv->pv_va = va;
1667	pv->pv_pmap = pmap;
1668	pv->pv_ptem = mpte;
1669
1670	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1671
1672	ppv = pa_to_pvh(pa);
1673	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1674	ppv->pv_list_count++;
1675
1676	splx(s);
1677}
1678
1679/*
1680 * pmap_remove_pte: do the things to unmap a page in a process
1681 */
1682static int
1683pmap_remove_pte(pmap, ptq, va)
1684	struct pmap *pmap;
1685	unsigned *ptq;
1686	vm_offset_t va;
1687{
1688	unsigned oldpte;
1689	pv_table_t *ppv;
1690
1691	oldpte = loadandclear(ptq);
1692	if (oldpte & PG_W)
1693		pmap->pm_stats.wired_count -= 1;
1694	/*
1695	 * Machines that don't support invlpg, also don't support
1696	 * PG_G.
1697	 */
1698	if (oldpte & PG_G)
1699		invlpg(va);
1700	pmap->pm_stats.resident_count -= 1;
1701	if (oldpte & PG_MANAGED) {
1702		ppv = pa_to_pvh(oldpte);
1703		if (oldpte & PG_M) {
1704#if defined(PMAP_DIAGNOSTIC)
1705			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1706				printf(
1707	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1708				    va, oldpte);
1709			}
1710#endif
1711			if (pmap_track_modified(va))
1712				vm_page_dirty(ppv->pv_vm_page);
1713		}
1714		if (oldpte & PG_A)
1715			vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
1716		return pmap_remove_entry(pmap, ppv, va);
1717	} else {
1718		return pmap_unuse_pt(pmap, va, NULL);
1719	}
1720
1721	return 0;
1722}
1723
1724/*
1725 * Remove a single page from a process address space
1726 */
1727static void
1728pmap_remove_page(pmap, va)
1729	struct pmap *pmap;
1730	register vm_offset_t va;
1731{
1732	register unsigned *ptq;
1733
1734	/*
1735	 * if there is no pte for this address, just skip it!!!
1736	 */
1737	if (*pmap_pde(pmap, va) == 0) {
1738		return;
1739	}
1740
1741	/*
1742	 * get a local va for mappings for this pmap.
1743	 */
1744	ptq = get_ptbase(pmap) + i386_btop(va);
1745	if (*ptq) {
1746		(void) pmap_remove_pte(pmap, ptq, va);
1747		pmap_TLB_invalidate(pmap, va);
1748	}
1749	return;
1750}
1751
1752/*
1753 *	Remove the given range of addresses from the specified map.
1754 *
1755 *	It is assumed that the start and end are properly
1756 *	rounded to the page size.
1757 */
1758void
1759pmap_remove(pmap, sva, eva)
1760	struct pmap *pmap;
1761	register vm_offset_t sva;
1762	register vm_offset_t eva;
1763{
1764	register unsigned *ptbase;
1765	vm_offset_t pdnxt;
1766	vm_offset_t ptpaddr;
1767	vm_offset_t sindex, eindex;
1768	int anyvalid;
1769
1770	if (pmap == NULL)
1771		return;
1772
1773	if (pmap->pm_stats.resident_count == 0)
1774		return;
1775
1776	/*
1777	 * special handling of removing one page.  a very
1778	 * common operation and easy to short circuit some
1779	 * code.
1780	 */
1781	if (((sva + PAGE_SIZE) == eva) &&
1782		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1783		pmap_remove_page(pmap, sva);
1784		return;
1785	}
1786
1787	anyvalid = 0;
1788
1789	/*
1790	 * Get a local virtual address for the mappings that are being
1791	 * worked with.
1792	 */
1793	ptbase = get_ptbase(pmap);
1794
1795	sindex = i386_btop(sva);
1796	eindex = i386_btop(eva);
1797
1798	for (; sindex < eindex; sindex = pdnxt) {
1799		unsigned pdirindex;
1800
1801		/*
1802		 * Calculate index for next page table.
1803		 */
1804		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1805		if (pmap->pm_stats.resident_count == 0)
1806			break;
1807
1808		pdirindex = sindex / NPDEPG;
1809		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1810			pmap->pm_pdir[pdirindex] = 0;
1811			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1812			anyvalid++;
1813			continue;
1814		}
1815
1816		/*
1817		 * Weed out invalid mappings. Note: we assume that the page
1818		 * directory table is always allocated, and in kernel virtual.
1819		 */
1820		if (ptpaddr == 0)
1821			continue;
1822
1823		/*
1824		 * Limit our scan to either the end of the va represented
1825		 * by the current page table page, or to the end of the
1826		 * range being removed.
1827		 */
1828		if (pdnxt > eindex) {
1829			pdnxt = eindex;
1830		}
1831
1832		for ( ;sindex != pdnxt; sindex++) {
1833			vm_offset_t va;
1834			if (ptbase[sindex] == 0) {
1835				continue;
1836			}
1837			va = i386_ptob(sindex);
1838
1839			anyvalid++;
1840			if (pmap_remove_pte(pmap,
1841				ptbase + sindex, va))
1842				break;
1843		}
1844	}
1845
1846	if (anyvalid)
1847		pmap_TLB_invalidate_all(pmap);
1848}
1849
1850/*
1851 *	Routine:	pmap_remove_all
1852 *	Function:
1853 *		Removes this physical page from
1854 *		all physical maps in which it resides.
1855 *		Reflects back modify bits to the pager.
1856 *
1857 *	Notes:
1858 *		Original versions of this routine were very
1859 *		inefficient because they iteratively called
1860 *		pmap_remove (slow...)
1861 */
1862
1863static void
1864pmap_remove_all(pa)
1865	vm_offset_t pa;
1866{
1867	register pv_entry_t pv;
1868	pv_table_t *ppv;
1869	register unsigned *pte, tpte;
1870	int s;
1871
1872#if defined(PMAP_DIAGNOSTIC)
1873	/*
1874	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1875	 * pages!
1876	 */
1877	if (!pmap_is_managed(pa)) {
1878		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", pa);
1879	}
1880#endif
1881
1882	s = splvm();
1883	ppv = pa_to_pvh(pa);
1884	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1885		pv->pv_pmap->pm_stats.resident_count--;
1886
1887		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1888
1889		tpte = loadandclear(pte);
1890		if (tpte & PG_W)
1891			pv->pv_pmap->pm_stats.wired_count--;
1892
1893		if (tpte & PG_A)
1894			vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
1895
1896		/*
1897		 * Update the vm_page_t clean and reference bits.
1898		 */
1899		if (tpte & PG_M) {
1900#if defined(PMAP_DIAGNOSTIC)
1901			if (pmap_nw_modified((pt_entry_t) tpte)) {
1902				printf(
1903	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1904				    pv->pv_va, tpte);
1905			}
1906#endif
1907			if (pmap_track_modified(pv->pv_va))
1908				vm_page_dirty(ppv->pv_vm_page);
1909		}
1910		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
1911
1912		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1913		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1914		ppv->pv_list_count--;
1915		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1916		free_pv_entry(pv);
1917	}
1918
1919	vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
1920
1921	splx(s);
1922}
1923
1924/*
1925 *	Set the physical protection on the
1926 *	specified range of this map as requested.
1927 */
1928void
1929pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1930{
1931	register unsigned *ptbase;
1932	vm_offset_t pdnxt, ptpaddr;
1933	vm_pindex_t sindex, eindex;
1934	int anychanged;
1935
1936
1937	if (pmap == NULL)
1938		return;
1939
1940	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1941		pmap_remove(pmap, sva, eva);
1942		return;
1943	}
1944
1945	if (prot & VM_PROT_WRITE)
1946		return;
1947
1948	anychanged = 0;
1949
1950	ptbase = get_ptbase(pmap);
1951
1952	sindex = i386_btop(sva);
1953	eindex = i386_btop(eva);
1954
1955	for (; sindex < eindex; sindex = pdnxt) {
1956
1957		unsigned pdirindex;
1958
1959		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1960
1961		pdirindex = sindex / NPDEPG;
1962		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1963			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1964			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1965			anychanged++;
1966			continue;
1967		}
1968
1969		/*
1970		 * Weed out invalid mappings. Note: we assume that the page
1971		 * directory table is always allocated, and in kernel virtual.
1972		 */
1973		if (ptpaddr == 0)
1974			continue;
1975
1976		if (pdnxt > eindex) {
1977			pdnxt = eindex;
1978		}
1979
1980		for (; sindex != pdnxt; sindex++) {
1981
1982			unsigned pbits;
1983			pv_table_t *ppv;
1984
1985			pbits = ptbase[sindex];
1986
1987			if (pbits & PG_MANAGED) {
1988				ppv = NULL;
1989				if (pbits & PG_A) {
1990					ppv = pa_to_pvh(pbits);
1991					vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
1992					pbits &= ~PG_A;
1993				}
1994				if (pbits & PG_M) {
1995					if (pmap_track_modified(i386_ptob(sindex))) {
1996						if (ppv == NULL)
1997							ppv = pa_to_pvh(pbits);
1998						vm_page_dirty(ppv->pv_vm_page);
1999						pbits &= ~PG_M;
2000					}
2001				}
2002			}
2003
2004			pbits &= ~PG_RW;
2005
2006			if (pbits != ptbase[sindex]) {
2007				ptbase[sindex] = pbits;
2008				anychanged = 1;
2009			}
2010		}
2011	}
2012	if (anychanged)
2013		pmap_TLB_invalidate_all(pmap);
2014}
2015
2016/*
2017 *	Insert the given physical page (p) at
2018 *	the specified virtual address (v) in the
2019 *	target physical map with the protection requested.
2020 *
2021 *	If specified, the page will be wired down, meaning
2022 *	that the related pte can not be reclaimed.
2023 *
2024 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2025 *	or lose information.  That is, this routine must actually
2026 *	insert this page into the given map NOW.
2027 */
2028void
2029pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
2030	   boolean_t wired)
2031{
2032	register unsigned *pte;
2033	vm_offset_t opa;
2034	vm_offset_t origpte, newpte;
2035	vm_page_t mpte;
2036
2037	if (pmap == NULL)
2038		return;
2039
2040	va &= PG_FRAME;
2041#ifdef PMAP_DIAGNOSTIC
2042	if (va > VM_MAX_KERNEL_ADDRESS)
2043		panic("pmap_enter: toobig");
2044	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
2045		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
2046#endif
2047
2048	mpte = NULL;
2049	/*
2050	 * In the case that a page table page is not
2051	 * resident, we are creating it here.
2052	 */
2053	if (va < UPT_MIN_ADDRESS) {
2054		mpte = pmap_allocpte(pmap, va);
2055	}
2056#if 0 && defined(PMAP_DIAGNOSTIC)
2057	else {
2058		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
2059		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
2060			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
2061				pmap->pm_pdir[PTDPTDI], origpte, va);
2062		}
2063		if (smp_active) {
2064			pdeaddr = (vm_offset_t *) IdlePTDS[cpuid];
2065			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
2066				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
2067					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
2068				printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
2069				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
2070					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
2071			}
2072		}
2073	}
2074#endif
2075
2076	pte = pmap_pte(pmap, va);
2077
2078#if !defined(MAX_PERF)
2079	/*
2080	 * Page Directory table entry not valid, we need a new PT page
2081	 */
2082	if (pte == NULL) {
2083		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
2084			(void *)pmap->pm_pdir[PTDPTDI], va);
2085	}
2086#endif
2087
2088	origpte = *(vm_offset_t *)pte;
2089	pa &= PG_FRAME;
2090	opa = origpte & PG_FRAME;
2091
2092#if !defined(MAX_PERF)
2093	if (origpte & PG_PS)
2094		panic("pmap_enter: attempted pmap_enter on 4MB page");
2095#endif
2096
2097	/*
2098	 * Mapping has not changed, must be protection or wiring change.
2099	 */
2100	if (origpte && (opa == pa)) {
2101		/*
2102		 * Wiring change, just update stats. We don't worry about
2103		 * wiring PT pages as they remain resident as long as there
2104		 * are valid mappings in them. Hence, if a user page is wired,
2105		 * the PT page will be also.
2106		 */
2107		if (wired && ((origpte & PG_W) == 0))
2108			pmap->pm_stats.wired_count++;
2109		else if (!wired && (origpte & PG_W))
2110			pmap->pm_stats.wired_count--;
2111
2112#if defined(PMAP_DIAGNOSTIC)
2113		if (pmap_nw_modified((pt_entry_t) origpte)) {
2114			printf(
2115	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
2116			    va, origpte);
2117		}
2118#endif
2119
2120		/*
2121		 * Remove extra pte reference
2122		 */
2123		if (mpte)
2124			mpte->hold_count--;
2125
2126		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
2127			if ((origpte & PG_RW) == 0) {
2128				*pte |= PG_RW;
2129				invltlb_1pg(va);
2130			}
2131			return;
2132		}
2133
2134		/*
2135		 * We might be turning off write access to the page,
2136		 * so we go ahead and sense modify status.
2137		 */
2138		if (origpte & PG_MANAGED) {
2139			if ((origpte & PG_M) && pmap_track_modified(va)) {
2140				pv_table_t *ppv;
2141				ppv = pa_to_pvh(opa);
2142				vm_page_dirty(ppv->pv_vm_page);
2143			}
2144			pa |= PG_MANAGED;
2145		}
2146		goto validate;
2147	}
2148	/*
2149	 * Mapping has changed, invalidate old range and fall through to
2150	 * handle validating new mapping.
2151	 */
2152	if (opa) {
2153		int err;
2154		err = pmap_remove_pte(pmap, pte, va);
2155#if !defined(MAX_PERF)
2156		if (err)
2157			panic("pmap_enter: pte vanished, va: 0x%x", va);
2158#endif
2159	}
2160
2161	/*
2162	 * Enter on the PV list if part of our managed memory Note that we
2163	 * raise IPL while manipulating pv_table since pmap_enter can be
2164	 * called at interrupt time.
2165	 */
2166	if (pmap_is_managed(pa)) {
2167		pmap_insert_entry(pmap, va, mpte, pa);
2168		pa |= PG_MANAGED;
2169	}
2170
2171	/*
2172	 * Increment counters
2173	 */
2174	pmap->pm_stats.resident_count++;
2175	if (wired)
2176		pmap->pm_stats.wired_count++;
2177
2178validate:
2179	/*
2180	 * Now validate mapping with desired protection/wiring.
2181	 */
2182	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2183
2184	if (wired)
2185		newpte |= PG_W;
2186	if (va < UPT_MIN_ADDRESS)
2187		newpte |= PG_U;
2188	if (pmap == kernel_pmap)
2189		newpte |= pgeflag;
2190
2191	/*
2192	 * if the mapping or permission bits are different, we need
2193	 * to update the pte.
2194	 */
2195	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2196		*pte = newpte | PG_A;
2197		if (origpte)
2198			invltlb_1pg(va);
2199	}
2200}
2201
2202/*
2203 * this code makes some *MAJOR* assumptions:
2204 * 1. Current pmap & pmap exists.
2205 * 2. Not wired.
2206 * 3. Read access.
2207 * 4. No page table pages.
2208 * 5. Tlbflush is deferred to calling procedure.
2209 * 6. Page IS managed.
2210 * but is *MUCH* faster than pmap_enter...
2211 */
2212
2213static vm_page_t
2214pmap_enter_quick(pmap, va, pa, mpte)
2215	register pmap_t pmap;
2216	vm_offset_t va;
2217	register vm_offset_t pa;
2218	vm_page_t mpte;
2219{
2220	register unsigned *pte;
2221
2222	/*
2223	 * In the case that a page table page is not
2224	 * resident, we are creating it here.
2225	 */
2226	if (va < UPT_MIN_ADDRESS) {
2227		unsigned ptepindex;
2228		vm_offset_t ptepa;
2229
2230		/*
2231		 * Calculate pagetable page index
2232		 */
2233		ptepindex = va >> PDRSHIFT;
2234		if (mpte && (mpte->pindex == ptepindex)) {
2235			mpte->hold_count++;
2236		} else {
2237retry:
2238			/*
2239			 * Get the page directory entry
2240			 */
2241			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2242
2243			/*
2244			 * If the page table page is mapped, we just increment
2245			 * the hold count, and activate it.
2246			 */
2247			if (ptepa) {
2248#if !defined(MAX_PERF)
2249				if (ptepa & PG_PS)
2250					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2251#endif
2252				if (pmap->pm_ptphint &&
2253					(pmap->pm_ptphint->pindex == ptepindex)) {
2254					mpte = pmap->pm_ptphint;
2255				} else {
2256					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2257					pmap->pm_ptphint = mpte;
2258				}
2259				if (mpte == NULL)
2260					goto retry;
2261				mpte->hold_count++;
2262			} else {
2263				mpte = _pmap_allocpte(pmap, ptepindex);
2264			}
2265		}
2266	} else {
2267		mpte = NULL;
2268	}
2269
2270	/*
2271	 * This call to vtopte makes the assumption that we are
2272	 * entering the page into the current pmap.  In order to support
2273	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2274	 * But that isn't as quick as vtopte.
2275	 */
2276	pte = (unsigned *)vtopte(va);
2277	if (*pte) {
2278		if (mpte)
2279			pmap_unwire_pte_hold(pmap, mpte);
2280		return 0;
2281	}
2282
2283	/*
2284	 * Enter on the PV list if part of our managed memory Note that we
2285	 * raise IPL while manipulating pv_table since pmap_enter can be
2286	 * called at interrupt time.
2287	 */
2288	pmap_insert_entry(pmap, va, mpte, pa);
2289
2290	/*
2291	 * Increment counters
2292	 */
2293	pmap->pm_stats.resident_count++;
2294
2295	/*
2296	 * Now validate mapping with RO protection
2297	 */
2298	*pte = pa | PG_V | PG_U | PG_MANAGED;
2299
2300	return mpte;
2301}
2302
2303#define MAX_INIT_PT (96)
2304/*
2305 * pmap_object_init_pt preloads the ptes for a given object
2306 * into the specified pmap.  This eliminates the blast of soft
2307 * faults on process startup and immediately after an mmap.
2308 */
2309void
2310pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2311	pmap_t pmap;
2312	vm_offset_t addr;
2313	vm_object_t object;
2314	vm_pindex_t pindex;
2315	vm_size_t size;
2316	int limit;
2317{
2318	vm_offset_t tmpidx;
2319	int psize;
2320	vm_page_t p, mpte;
2321	int objpgs;
2322
2323	if (!pmap)
2324		return;
2325
2326	/*
2327	 * This code maps large physical mmap regions into the
2328	 * processor address space.  Note that some shortcuts
2329	 * are taken, but the code works.
2330	 */
2331	if (pseflag &&
2332		(object->type == OBJT_DEVICE) &&
2333		((addr & (NBPDR - 1)) == 0) &&
2334		((size & (NBPDR - 1)) == 0) ) {
2335		int i;
2336		vm_page_t m[1];
2337		unsigned int ptepindex;
2338		int npdes;
2339		vm_offset_t ptepa;
2340
2341		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2342			return;
2343
2344retry:
2345		p = vm_page_lookup(object, pindex);
2346		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
2347			goto retry;
2348
2349		if (p == NULL) {
2350			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2351			if (p == NULL)
2352				return;
2353			m[0] = p;
2354
2355			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2356				vm_page_free(p);
2357				return;
2358			}
2359
2360			p = vm_page_lookup(object, pindex);
2361			vm_page_wakeup(p);
2362		}
2363
2364		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2365		if (ptepa & (NBPDR - 1)) {
2366			return;
2367		}
2368
2369		p->valid = VM_PAGE_BITS_ALL;
2370
2371		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2372		npdes = size >> PDRSHIFT;
2373		for(i=0;i<npdes;i++) {
2374			pmap->pm_pdir[ptepindex] =
2375				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2376			ptepa += NBPDR;
2377			ptepindex += 1;
2378		}
2379		vm_page_flag_set(p, PG_MAPPED);
2380		invltlb();
2381		return;
2382	}
2383
2384	psize = i386_btop(size);
2385
2386	if ((object->type != OBJT_VNODE) ||
2387		(limit && (psize > MAX_INIT_PT) &&
2388			(object->resident_page_count > MAX_INIT_PT))) {
2389		return;
2390	}
2391
2392	if (psize + pindex > object->size)
2393		psize = object->size - pindex;
2394
2395	mpte = NULL;
2396	/*
2397	 * if we are processing a major portion of the object, then scan the
2398	 * entire thing.
2399	 */
2400	if (psize > (object->size >> 2)) {
2401		objpgs = psize;
2402
2403		for (p = TAILQ_FIRST(&object->memq);
2404		    ((objpgs > 0) && (p != NULL));
2405		    p = TAILQ_NEXT(p, listq)) {
2406
2407			tmpidx = p->pindex;
2408			if (tmpidx < pindex) {
2409				continue;
2410			}
2411			tmpidx -= pindex;
2412			if (tmpidx >= psize) {
2413				continue;
2414			}
2415			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2416				(p->busy == 0) &&
2417			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2418				if ((p->queue - p->pc) == PQ_CACHE)
2419					vm_page_deactivate(p);
2420				vm_page_busy(p);
2421				mpte = pmap_enter_quick(pmap,
2422					addr + i386_ptob(tmpidx),
2423					VM_PAGE_TO_PHYS(p), mpte);
2424				vm_page_flag_set(p, PG_MAPPED);
2425				vm_page_wakeup(p);
2426			}
2427			objpgs -= 1;
2428		}
2429	} else {
2430		/*
2431		 * else lookup the pages one-by-one.
2432		 */
2433		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2434			p = vm_page_lookup(object, tmpidx + pindex);
2435			if (p &&
2436			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2437				(p->busy == 0) &&
2438			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2439				if ((p->queue - p->pc) == PQ_CACHE)
2440					vm_page_deactivate(p);
2441				vm_page_busy(p);
2442				mpte = pmap_enter_quick(pmap,
2443					addr + i386_ptob(tmpidx),
2444					VM_PAGE_TO_PHYS(p), mpte);
2445				vm_page_flag_set(p, PG_MAPPED);
2446				vm_page_wakeup(p);
2447			}
2448		}
2449	}
2450	return;
2451}
2452
2453/*
2454 * pmap_prefault provides a quick way of clustering
2455 * pagefaults into a processes address space.  It is a "cousin"
2456 * of pmap_object_init_pt, except it runs at page fault time instead
2457 * of mmap time.
2458 */
2459#define PFBAK 4
2460#define PFFOR 4
2461#define PAGEORDER_SIZE (PFBAK+PFFOR)
2462
2463static int pmap_prefault_pageorder[] = {
2464	-PAGE_SIZE, PAGE_SIZE,
2465	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
2466	-3 * PAGE_SIZE, 3 * PAGE_SIZE
2467	-4 * PAGE_SIZE, 4 * PAGE_SIZE
2468};
2469
2470void
2471pmap_prefault(pmap, addra, entry)
2472	pmap_t pmap;
2473	vm_offset_t addra;
2474	vm_map_entry_t entry;
2475{
2476	int i;
2477	vm_offset_t starta;
2478	vm_offset_t addr;
2479	vm_pindex_t pindex;
2480	vm_page_t m, mpte;
2481	vm_object_t object;
2482
2483	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
2484		return;
2485
2486	object = entry->object.vm_object;
2487
2488	starta = addra - PFBAK * PAGE_SIZE;
2489	if (starta < entry->start) {
2490		starta = entry->start;
2491	} else if (starta > addra) {
2492		starta = 0;
2493	}
2494
2495	mpte = NULL;
2496	for (i = 0; i < PAGEORDER_SIZE; i++) {
2497		vm_object_t lobject;
2498		unsigned *pte;
2499
2500		addr = addra + pmap_prefault_pageorder[i];
2501		if (addr > addra + (PFFOR * PAGE_SIZE))
2502			addr = 0;
2503
2504		if (addr < starta || addr >= entry->end)
2505			continue;
2506
2507		if ((*pmap_pde(pmap, addr)) == NULL)
2508			continue;
2509
2510		pte = (unsigned *) vtopte(addr);
2511		if (*pte)
2512			continue;
2513
2514		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2515		lobject = object;
2516		for (m = vm_page_lookup(lobject, pindex);
2517		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2518		    lobject = lobject->backing_object) {
2519			if (lobject->backing_object_offset & PAGE_MASK)
2520				break;
2521			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2522			m = vm_page_lookup(lobject->backing_object, pindex);
2523		}
2524
2525		/*
2526		 * give-up when a page is not in memory
2527		 */
2528		if (m == NULL)
2529			break;
2530
2531		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2532			(m->busy == 0) &&
2533		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2534
2535			if ((m->queue - m->pc) == PQ_CACHE) {
2536				vm_page_deactivate(m);
2537			}
2538			vm_page_busy(m);
2539			mpte = pmap_enter_quick(pmap, addr,
2540				VM_PAGE_TO_PHYS(m), mpte);
2541			vm_page_flag_set(m, PG_MAPPED);
2542			vm_page_wakeup(m);
2543		}
2544	}
2545}
2546
2547/*
2548 *	Routine:	pmap_change_wiring
2549 *	Function:	Change the wiring attribute for a map/virtual-address
2550 *			pair.
2551 *	In/out conditions:
2552 *			The mapping must already exist in the pmap.
2553 */
2554void
2555pmap_change_wiring(pmap, va, wired)
2556	register pmap_t pmap;
2557	vm_offset_t va;
2558	boolean_t wired;
2559{
2560	register unsigned *pte;
2561
2562	if (pmap == NULL)
2563		return;
2564
2565	pte = pmap_pte(pmap, va);
2566
2567	if (wired && !pmap_pte_w(pte))
2568		pmap->pm_stats.wired_count++;
2569	else if (!wired && pmap_pte_w(pte))
2570		pmap->pm_stats.wired_count--;
2571
2572	/*
2573	 * Wiring is not a hardware characteristic so there is no need to
2574	 * invalidate TLB.
2575	 */
2576	pmap_pte_set_w(pte, wired);
2577}
2578
2579
2580
2581/*
2582 *	Copy the range specified by src_addr/len
2583 *	from the source map to the range dst_addr/len
2584 *	in the destination map.
2585 *
2586 *	This routine is only advisory and need not do anything.
2587 */
2588
2589void
2590pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2591	pmap_t dst_pmap, src_pmap;
2592	vm_offset_t dst_addr;
2593	vm_size_t len;
2594	vm_offset_t src_addr;
2595{
2596	vm_offset_t addr;
2597	vm_offset_t end_addr = src_addr + len;
2598	vm_offset_t pdnxt;
2599	unsigned src_frame, dst_frame;
2600
2601	if (dst_addr != src_addr)
2602		return;
2603
2604	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2605	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2606		return;
2607	}
2608
2609	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2610	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2611		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2612#if defined(SMP)
2613		/* The page directory is not shared between CPUs */
2614		cpu_invltlb();
2615#else
2616		invltlb();
2617#endif
2618	}
2619
2620	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2621		unsigned *src_pte, *dst_pte;
2622		vm_page_t dstmpte, srcmpte;
2623		vm_offset_t srcptepaddr;
2624		unsigned ptepindex;
2625
2626#if !defined(MAX_PERF)
2627		if (addr >= UPT_MIN_ADDRESS)
2628			panic("pmap_copy: invalid to pmap_copy page tables\n");
2629#endif
2630
2631		/*
2632		 * Don't let optional prefaulting of pages make us go
2633		 * way below the low water mark of free pages or way
2634		 * above high water mark of used pv entries.
2635		 */
2636		if (cnt.v_free_count < cnt.v_free_reserved ||
2637		    pv_entry_count > pv_entry_high_water)
2638			break;
2639
2640		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2641		ptepindex = addr >> PDRSHIFT;
2642
2643		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2644		if (srcptepaddr == 0)
2645			continue;
2646
2647		if (srcptepaddr & PG_PS) {
2648			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2649				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2650				dst_pmap->pm_stats.resident_count += NBPDR;
2651			}
2652			continue;
2653		}
2654
2655		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2656		if ((srcmpte == NULL) ||
2657			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2658			continue;
2659
2660		if (pdnxt > end_addr)
2661			pdnxt = end_addr;
2662
2663		src_pte = (unsigned *) vtopte(addr);
2664		dst_pte = (unsigned *) avtopte(addr);
2665		while (addr < pdnxt) {
2666			unsigned ptetemp;
2667			ptetemp = *src_pte;
2668			/*
2669			 * we only virtual copy managed pages
2670			 */
2671			if ((ptetemp & PG_MANAGED) != 0) {
2672				/*
2673				 * We have to check after allocpte for the
2674				 * pte still being around...  allocpte can
2675				 * block.
2676				 */
2677				dstmpte = pmap_allocpte(dst_pmap, addr);
2678				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2679					/*
2680					 * Clear the modified and
2681					 * accessed (referenced) bits
2682					 * during the copy.
2683					 */
2684					*dst_pte = ptetemp & ~(PG_M | PG_A);
2685					dst_pmap->pm_stats.resident_count++;
2686					pmap_insert_entry(dst_pmap, addr,
2687						dstmpte,
2688						(ptetemp & PG_FRAME));
2689	 			} else {
2690					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2691				}
2692				if (dstmpte->hold_count >= srcmpte->hold_count)
2693					break;
2694			}
2695			addr += PAGE_SIZE;
2696			src_pte++;
2697			dst_pte++;
2698		}
2699	}
2700}
2701
2702/*
2703 *	Routine:	pmap_kernel
2704 *	Function:
2705 *		Returns the physical map handle for the kernel.
2706 */
2707pmap_t
2708pmap_kernel()
2709{
2710	return (kernel_pmap);
2711}
2712
2713/*
2714 *	pmap_zero_page zeros the specified hardware page by mapping
2715 *	the page into KVM and using bzero to clear its contents.
2716 */
2717void
2718pmap_zero_page(phys)
2719	vm_offset_t phys;
2720{
2721#ifdef SMP
2722#if !defined(MAX_PERF)
2723	if (*(int *) prv_CMAP3)
2724		panic("pmap_zero_page: prv_CMAP3 busy");
2725#endif
2726
2727	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2728	cpu_invlpg(&prv_CPAGE3);
2729
2730#if defined(I686_CPU)
2731	if (cpu_class == CPUCLASS_686)
2732		i686_pagezero(&prv_CPAGE3);
2733	else
2734#endif
2735		bzero(&prv_CPAGE3, PAGE_SIZE);
2736
2737	*(int *) prv_CMAP3 = 0;
2738#else
2739#if !defined(MAX_PERF)
2740	if (*(int *) CMAP2)
2741		panic("pmap_zero_page: CMAP2 busy");
2742#endif
2743
2744	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2745	if (cpu_class == CPUCLASS_386) {
2746		invltlb();
2747	} else {
2748		invlpg((u_int)CADDR2);
2749	}
2750
2751#if defined(I686_CPU)
2752	if (cpu_class == CPUCLASS_686)
2753		i686_pagezero(CADDR2);
2754	else
2755#endif
2756		bzero(CADDR2, PAGE_SIZE);
2757	*(int *) CMAP2 = 0;
2758#endif
2759}
2760
2761/*
2762 *	pmap_zero_page_area zeros the specified hardware page by mapping
2763 *	the page into KVM and using bzero to clear its contents.
2764 *
2765 *	off and size may not cover an area beyond a single hardware page.
2766 */
2767void
2768pmap_zero_page_area(phys, off, size)
2769	vm_offset_t phys;
2770	int off;
2771	int size;
2772{
2773#ifdef SMP
2774#if !defined(MAX_PERF)
2775	if (*(int *) prv_CMAP3)
2776		panic("pmap_zero_page: prv_CMAP3 busy");
2777#endif
2778
2779	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2780	cpu_invlpg(&prv_CPAGE3);
2781
2782#if defined(I686_CPU)
2783	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
2784		i686_pagezero(&prv_CPAGE3);
2785	else
2786#endif
2787		bzero((char *)&prv_CPAGE3 + off, size);
2788
2789	*(int *) prv_CMAP3 = 0;
2790#else
2791#if !defined(MAX_PERF)
2792	if (*(int *) CMAP2)
2793		panic("pmap_zero_page: CMAP2 busy");
2794#endif
2795
2796	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2797	if (cpu_class == CPUCLASS_386) {
2798		invltlb();
2799	} else {
2800		invlpg((u_int)CADDR2);
2801	}
2802
2803#if defined(I686_CPU)
2804	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
2805		i686_pagezero(CADDR2);
2806	else
2807#endif
2808		bzero((char *)CADDR2 + off, size);
2809	*(int *) CMAP2 = 0;
2810#endif
2811}
2812
2813/*
2814 *	pmap_copy_page copies the specified (machine independent)
2815 *	page by mapping the page into virtual memory and using
2816 *	bcopy to copy the page, one machine dependent page at a
2817 *	time.
2818 */
2819void
2820pmap_copy_page(src, dst)
2821	vm_offset_t src;
2822	vm_offset_t dst;
2823{
2824#ifdef SMP
2825#if !defined(MAX_PERF)
2826	if (*(int *) prv_CMAP1)
2827		panic("pmap_copy_page: prv_CMAP1 busy");
2828	if (*(int *) prv_CMAP2)
2829		panic("pmap_copy_page: prv_CMAP2 busy");
2830#endif
2831
2832	*(int *) prv_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2833	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2834
2835	cpu_invlpg(&prv_CPAGE1);
2836	cpu_invlpg(&prv_CPAGE2);
2837
2838	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2839
2840	*(int *) prv_CMAP1 = 0;
2841	*(int *) prv_CMAP2 = 0;
2842#else
2843#if !defined(MAX_PERF)
2844	if (*(int *) CMAP1 || *(int *) CMAP2)
2845		panic("pmap_copy_page: CMAP busy");
2846#endif
2847
2848	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2849	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2850	if (cpu_class == CPUCLASS_386) {
2851		invltlb();
2852	} else {
2853		invlpg((u_int)CADDR1);
2854		invlpg((u_int)CADDR2);
2855	}
2856
2857	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2858
2859	*(int *) CMAP1 = 0;
2860	*(int *) CMAP2 = 0;
2861#endif
2862}
2863
2864
2865/*
2866 *	Routine:	pmap_pageable
2867 *	Function:
2868 *		Make the specified pages (by pmap, offset)
2869 *		pageable (or not) as requested.
2870 *
2871 *		A page which is not pageable may not take
2872 *		a fault; therefore, its page table entry
2873 *		must remain valid for the duration.
2874 *
2875 *		This routine is merely advisory; pmap_enter
2876 *		will specify that these pages are to be wired
2877 *		down (or not) as appropriate.
2878 */
2879void
2880pmap_pageable(pmap, sva, eva, pageable)
2881	pmap_t pmap;
2882	vm_offset_t sva, eva;
2883	boolean_t pageable;
2884{
2885}
2886
2887/*
2888 * this routine returns true if a physical page resides
2889 * in the given pmap.
2890 */
2891boolean_t
2892pmap_page_exists(pmap, pa)
2893	pmap_t pmap;
2894	vm_offset_t pa;
2895{
2896	register pv_entry_t pv;
2897	pv_table_t *ppv;
2898	int s;
2899
2900	if (!pmap_is_managed(pa))
2901		return FALSE;
2902
2903	s = splvm();
2904
2905	ppv = pa_to_pvh(pa);
2906	/*
2907	 * Not found, check current mappings returning immediately if found.
2908	 */
2909	for (pv = TAILQ_FIRST(&ppv->pv_list);
2910		pv;
2911		pv = TAILQ_NEXT(pv, pv_list)) {
2912		if (pv->pv_pmap == pmap) {
2913			splx(s);
2914			return TRUE;
2915		}
2916	}
2917	splx(s);
2918	return (FALSE);
2919}
2920
2921#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2922/*
2923 * Remove all pages from specified address space
2924 * this aids process exit speeds.  Also, this code
2925 * is special cased for current process only, but
2926 * can have the more generic (and slightly slower)
2927 * mode enabled.  This is much faster than pmap_remove
2928 * in the case of running down an entire address space.
2929 */
2930void
2931pmap_remove_pages(pmap, sva, eva)
2932	pmap_t pmap;
2933	vm_offset_t sva, eva;
2934{
2935	unsigned *pte, tpte;
2936	pv_table_t *ppv;
2937	pv_entry_t pv, npv;
2938	int s;
2939
2940#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2941	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
2942		printf("warning: pmap_remove_pages called with non-current pmap\n");
2943		return;
2944	}
2945#endif
2946
2947	s = splvm();
2948	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2949		pv;
2950		pv = npv) {
2951
2952		if (pv->pv_va >= eva || pv->pv_va < sva) {
2953			npv = TAILQ_NEXT(pv, pv_plist);
2954			continue;
2955		}
2956
2957#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2958		pte = (unsigned *)vtopte(pv->pv_va);
2959#else
2960		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2961#endif
2962		tpte = *pte;
2963
2964/*
2965 * We cannot remove wired pages from a process' mapping at this time
2966 */
2967		if (tpte & PG_W) {
2968			npv = TAILQ_NEXT(pv, pv_plist);
2969			continue;
2970		}
2971		*pte = 0;
2972
2973		ppv = pa_to_pvh(tpte);
2974
2975		pv->pv_pmap->pm_stats.resident_count--;
2976
2977		/*
2978		 * Update the vm_page_t clean and reference bits.
2979		 */
2980		if (tpte & PG_M) {
2981			vm_page_dirty(ppv->pv_vm_page);
2982		}
2983
2984
2985		npv = TAILQ_NEXT(pv, pv_plist);
2986		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2987
2988		ppv->pv_list_count--;
2989		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2990		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2991			vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
2992		}
2993
2994		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2995		free_pv_entry(pv);
2996	}
2997	splx(s);
2998	pmap_TLB_invalidate_all(pmap);
2999}
3000
3001/*
3002 * pmap_testbit tests bits in pte's
3003 * note that the testbit/changebit routines are inline,
3004 * and a lot of things compile-time evaluate.
3005 */
3006static boolean_t
3007pmap_testbit(pa, bit)
3008	register vm_offset_t pa;
3009	int bit;
3010{
3011	register pv_entry_t pv;
3012	pv_table_t *ppv;
3013	unsigned *pte;
3014	int s;
3015
3016	if (!pmap_is_managed(pa))
3017		return FALSE;
3018
3019	ppv = pa_to_pvh(pa);
3020	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
3021		return FALSE;
3022
3023	s = splvm();
3024
3025	for (pv = TAILQ_FIRST(&ppv->pv_list);
3026		pv;
3027		pv = TAILQ_NEXT(pv, pv_list)) {
3028
3029		/*
3030		 * if the bit being tested is the modified bit, then
3031		 * mark clean_map and ptes as never
3032		 * modified.
3033		 */
3034		if (bit & (PG_A|PG_M)) {
3035			if (!pmap_track_modified(pv->pv_va))
3036				continue;
3037		}
3038
3039#if defined(PMAP_DIAGNOSTIC)
3040		if (!pv->pv_pmap) {
3041			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
3042			continue;
3043		}
3044#endif
3045		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3046		if (*pte & bit) {
3047			splx(s);
3048			return TRUE;
3049		}
3050	}
3051	splx(s);
3052	return (FALSE);
3053}
3054
3055/*
3056 * this routine is used to modify bits in ptes
3057 */
3058static __inline void
3059pmap_changebit(pa, bit, setem)
3060	vm_offset_t pa;
3061	int bit;
3062	boolean_t setem;
3063{
3064	register pv_entry_t pv;
3065	pv_table_t *ppv;
3066	register unsigned *pte;
3067	int s;
3068
3069	if (!pmap_is_managed(pa))
3070		return;
3071
3072	s = splvm();
3073	ppv = pa_to_pvh(pa);
3074
3075	/*
3076	 * Loop over all current mappings setting/clearing as appropos If
3077	 * setting RO do we need to clear the VAC?
3078	 */
3079	for (pv = TAILQ_FIRST(&ppv->pv_list);
3080		pv;
3081		pv = TAILQ_NEXT(pv, pv_list)) {
3082
3083		/*
3084		 * don't write protect pager mappings
3085		 */
3086		if (!setem && (bit == PG_RW)) {
3087			if (!pmap_track_modified(pv->pv_va))
3088				continue;
3089		}
3090
3091#if defined(PMAP_DIAGNOSTIC)
3092		if (!pv->pv_pmap) {
3093			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
3094			continue;
3095		}
3096#endif
3097
3098		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3099
3100		if (setem) {
3101			*(int *)pte |= bit;
3102			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
3103		} else {
3104			vm_offset_t pbits = *(vm_offset_t *)pte;
3105			if (pbits & bit) {
3106				if (bit == PG_RW) {
3107					if (pbits & PG_M) {
3108						vm_page_dirty(ppv->pv_vm_page);
3109					}
3110					*(int *)pte = pbits & ~(PG_M|PG_RW);
3111				} else {
3112					*(int *)pte = pbits & ~bit;
3113				}
3114				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
3115			}
3116		}
3117	}
3118	splx(s);
3119}
3120
3121/*
3122 *	pmap_clearbit:
3123 *
3124 *	Clear a bit/bits in every pte mapping a given physical page.
3125 */
3126static void
3127pmap_clearbit(
3128	vm_offset_t pa,
3129	int	bit)
3130{
3131	pmap_changebit(pa, bit, FALSE);
3132}
3133
3134/*
3135 *      pmap_page_protect:
3136 *
3137 *      Lower the permission for all mappings to a given page.
3138 */
3139void
3140pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
3141{
3142	if ((prot & VM_PROT_WRITE) == 0) {
3143		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
3144			pmap_clearbit(phys, PG_RW);
3145		} else {
3146			pmap_remove_all(phys);
3147		}
3148	}
3149}
3150
3151vm_offset_t
3152pmap_phys_address(ppn)
3153	int ppn;
3154{
3155	return (i386_ptob(ppn));
3156}
3157
3158/*
3159 *	pmap_ts_referenced:
3160 *
3161 *	Return the count of reference bits for a page, clearing all of them.
3162 */
3163int
3164pmap_ts_referenced(vm_offset_t pa)
3165{
3166	register pv_entry_t pv, pvf, pvn;
3167	pv_table_t *ppv;
3168	unsigned *pte;
3169	int s;
3170	int rtval = 0;
3171
3172	if (!pmap_is_managed(pa))
3173		return (rtval);
3174
3175	s = splvm();
3176
3177	ppv = pa_to_pvh(pa);
3178
3179	if ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
3180
3181		pvf = pv;
3182
3183		do {
3184			pvn = TAILQ_NEXT(pv, pv_list);
3185
3186			TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
3187
3188			TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3189
3190			if (!pmap_track_modified(pv->pv_va))
3191				continue;
3192
3193			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3194
3195			if (pte && *pte & PG_A) {
3196				*pte &= ~PG_A;
3197
3198				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
3199
3200				rtval++;
3201				if (rtval > 4) {
3202					break;
3203				}
3204			}
3205		} while ((pv = pvn) != NULL && pv != pvf);
3206	}
3207	splx(s);
3208
3209	return (rtval);
3210}
3211
3212/*
3213 *	pmap_is_modified:
3214 *
3215 *	Return whether or not the specified physical page was modified
3216 *	in any physical maps.
3217 */
3218boolean_t
3219pmap_is_modified(vm_offset_t pa)
3220{
3221	return pmap_testbit((pa), PG_M);
3222}
3223
3224/*
3225 *	Clear the modify bits on the specified physical page.
3226 */
3227void
3228pmap_clear_modify(vm_offset_t pa)
3229{
3230	pmap_clearbit(pa, PG_M);
3231}
3232
3233/*
3234 *	pmap_clear_reference:
3235 *
3236 *	Clear the reference bit on the specified physical page.
3237 */
3238void
3239pmap_clear_reference(vm_offset_t pa)
3240{
3241	pmap_clearbit(pa, PG_A);
3242}
3243
3244/*
3245 * Miscellaneous support routines follow
3246 */
3247
3248static void
3249i386_protection_init()
3250{
3251	register int *kp, prot;
3252
3253	kp = protection_codes;
3254	for (prot = 0; prot < 8; prot++) {
3255		switch (prot) {
3256		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3257			/*
3258			 * Read access is also 0. There isn't any execute bit,
3259			 * so just make it readable.
3260			 */
3261		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3262		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3263		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3264			*kp++ = 0;
3265			break;
3266		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3267		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3268		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3269		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3270			*kp++ = PG_RW;
3271			break;
3272		}
3273	}
3274}
3275
3276/*
3277 * Map a set of physical memory pages into the kernel virtual
3278 * address space. Return a pointer to where it is mapped. This
3279 * routine is intended to be used for mapping device memory,
3280 * NOT real memory.
3281 */
3282void *
3283pmap_mapdev(pa, size)
3284	vm_offset_t pa;
3285	vm_size_t size;
3286{
3287	vm_offset_t va, tmpva;
3288	unsigned *pte;
3289
3290	size = roundup(size, PAGE_SIZE);
3291
3292	va = kmem_alloc_pageable(kernel_map, size);
3293#if !defined(MAX_PERF)
3294	if (!va)
3295		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3296#endif
3297
3298	pa = pa & PG_FRAME;
3299	for (tmpva = va; size > 0;) {
3300		pte = (unsigned *)vtopte(tmpva);
3301		*pte = pa | PG_RW | PG_V | pgeflag;
3302		size -= PAGE_SIZE;
3303		tmpva += PAGE_SIZE;
3304		pa += PAGE_SIZE;
3305	}
3306	invltlb();
3307
3308	return ((void *) va);
3309}
3310
3311/*
3312 * perform the pmap work for mincore
3313 */
3314int
3315pmap_mincore(pmap, addr)
3316	pmap_t pmap;
3317	vm_offset_t addr;
3318{
3319
3320	unsigned *ptep, pte;
3321	vm_page_t m;
3322	int val = 0;
3323
3324	ptep = pmap_pte(pmap, addr);
3325	if (ptep == 0) {
3326		return 0;
3327	}
3328
3329	if ((pte = *ptep) != 0) {
3330		pv_table_t *ppv;
3331		vm_offset_t pa;
3332
3333		val = MINCORE_INCORE;
3334		if ((pte & PG_MANAGED) == 0)
3335			return val;
3336
3337		pa = pte & PG_FRAME;
3338
3339		ppv = pa_to_pvh((pa & PG_FRAME));
3340		m = ppv->pv_vm_page;
3341
3342		/*
3343		 * Modified by us
3344		 */
3345		if (pte & PG_M)
3346			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3347		/*
3348		 * Modified by someone
3349		 */
3350		else if (m->dirty || pmap_is_modified(pa))
3351			val |= MINCORE_MODIFIED_OTHER;
3352		/*
3353		 * Referenced by us
3354		 */
3355		if (pte & PG_A)
3356			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3357
3358		/*
3359		 * Referenced by someone
3360		 */
3361		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) {
3362			val |= MINCORE_REFERENCED_OTHER;
3363			vm_page_flag_set(m, PG_REFERENCED);
3364		}
3365	}
3366	return val;
3367}
3368
3369void
3370pmap_activate(struct proc *p)
3371{
3372	pmap_t	pmap;
3373
3374	pmap = vmspace_pmap(p->p_vmspace);
3375#if defined(SMP)
3376	pmap->pm_active |= 1 << cpuid;
3377#else
3378	pmap->pm_active |= 1;
3379#endif
3380#if defined(SWTCH_OPTIM_STATS)
3381	tlb_flush_count++;
3382#endif
3383	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
3384}
3385
3386vm_offset_t
3387pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3388
3389	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3390		return addr;
3391	}
3392
3393	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3394	return addr;
3395}
3396
3397
3398#if defined(PMAP_DEBUG)
3399pmap_pid_dump(int pid) {
3400	pmap_t pmap;
3401	struct proc *p;
3402	int npte = 0;
3403	int index;
3404	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3405		if (p->p_pid != pid)
3406			continue;
3407
3408		if (p->p_vmspace) {
3409			int i,j;
3410			index = 0;
3411			pmap = vmspace_pmap(p->p_vmspace);
3412			for(i=0;i<1024;i++) {
3413				pd_entry_t *pde;
3414				unsigned *pte;
3415				unsigned base = i << PDRSHIFT;
3416
3417				pde = &pmap->pm_pdir[i];
3418				if (pde && pmap_pde_v(pde)) {
3419					for(j=0;j<1024;j++) {
3420						unsigned va = base + (j << PAGE_SHIFT);
3421						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3422							if (index) {
3423								index = 0;
3424								printf("\n");
3425							}
3426							return npte;
3427						}
3428						pte = pmap_pte_quick( pmap, va);
3429						if (pte && pmap_pte_v(pte)) {
3430							vm_offset_t pa;
3431							vm_page_t m;
3432							pa = *(int *)pte;
3433							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3434							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3435								va, pa, m->hold_count, m->wire_count, m->flags);
3436							npte++;
3437							index++;
3438							if (index >= 2) {
3439								index = 0;
3440								printf("\n");
3441							} else {
3442								printf(" ");
3443							}
3444						}
3445					}
3446				}
3447			}
3448		}
3449	}
3450	return npte;
3451}
3452#endif
3453
3454#if defined(DEBUG)
3455
3456static void	pads __P((pmap_t pm));
3457void		pmap_pvdump __P((vm_offset_t pa));
3458
3459/* print address space of pmap*/
3460static void
3461pads(pm)
3462	pmap_t pm;
3463{
3464	unsigned va, i, j;
3465	unsigned *ptep;
3466
3467	if (pm == kernel_pmap)
3468		return;
3469	for (i = 0; i < 1024; i++)
3470		if (pm->pm_pdir[i])
3471			for (j = 0; j < 1024; j++) {
3472				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3473				if (pm == kernel_pmap && va < KERNBASE)
3474					continue;
3475				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3476					continue;
3477				ptep = pmap_pte_quick(pm, va);
3478				if (pmap_pte_v(ptep))
3479					printf("%x:%x ", va, *(int *) ptep);
3480			};
3481
3482}
3483
3484void
3485pmap_pvdump(pa)
3486	vm_offset_t pa;
3487{
3488	pv_table_t *ppv;
3489	register pv_entry_t pv;
3490
3491	printf("pa %x", pa);
3492	ppv = pa_to_pvh(pa);
3493	for (pv = TAILQ_FIRST(&ppv->pv_list);
3494		pv;
3495		pv = TAILQ_NEXT(pv, pv_list)) {
3496#ifdef used_to_be
3497		printf(" -> pmap %p, va %x, flags %x",
3498		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
3499#endif
3500		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3501		pads(pv->pv_pmap);
3502	}
3503	printf(" ");
3504}
3505#endif
3506