pmap.c revision 38807
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.207 1998/08/23 10:16:25 bde Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_disable_pse.h"
72#include "opt_pmap.h"
73#include "opt_msgbuf.h"
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/proc.h>
78#include <sys/msgbuf.h>
79#include <sys/vmmeter.h>
80#include <sys/mman.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_prot.h>
85#include <sys/lock.h>
86#include <vm/vm_kern.h>
87#include <vm/vm_page.h>
88#include <vm/vm_map.h>
89#include <vm/vm_object.h>
90#include <vm/vm_extern.h>
91#include <vm/vm_pageout.h>
92#include <vm/vm_pager.h>
93#include <vm/vm_zone.h>
94
95#include <sys/user.h>
96
97#include <machine/cputypes.h>
98#include <machine/md_var.h>
99#include <machine/specialreg.h>
100#if defined(SMP) || defined(APIC_IO)
101#include <machine/smp.h>
102#include <machine/apic.h>
103#endif /* SMP || APIC_IO */
104
105#define PMAP_KEEP_PDIRS
106#ifndef PMAP_SHPGPERPROC
107#define PMAP_SHPGPERPROC 200
108#endif
109
110#if defined(DIAGNOSTIC)
111#define PMAP_DIAGNOSTIC
112#endif
113
114#define MINPV 2048
115
116#if !defined(PMAP_DIAGNOSTIC)
117#define PMAP_INLINE __inline
118#else
119#define PMAP_INLINE
120#endif
121
122/*
123 * Get PDEs and PTEs for user/kernel address space
124 */
125#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
126#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
127
128#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
129#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
130#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
131#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
132#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
133
134#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
135#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
136
137/*
138 * Given a map and a machine independent protection code,
139 * convert to a vax protection code.
140 */
141#define pte_prot(m, p)	(protection_codes[p])
142static int protection_codes[8];
143
144#define	pa_index(pa)		atop((pa) - vm_first_phys)
145#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
146
147static struct pmap kernel_pmap_store;
148pmap_t kernel_pmap;
149extern pd_entry_t my_idlePTD;
150
151vm_offset_t avail_start;	/* PA of first available physical page */
152vm_offset_t avail_end;		/* PA of last available physical page */
153vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
154vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
155static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
156static vm_offset_t vm_first_phys;
157static int pgeflag;		/* PG_G or-in */
158static int pseflag;		/* PG_PS or-in */
159static int pv_npg;
160
161static vm_object_t kptobj;
162
163static int nkpt;
164vm_offset_t kernel_vm_end;
165
166/*
167 * Data for the pv entry allocation mechanism
168 */
169static vm_zone_t pvzone;
170static struct vm_zone pvzone_store;
171static struct vm_object pvzone_obj;
172static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
173static int pmap_pagedaemon_waken = 0;
174static struct pv_entry *pvinit;
175
176/*
177 * All those kernel PT submaps that BSD is so fond of
178 */
179pt_entry_t *CMAP1 = 0;
180static pt_entry_t *CMAP2, *ptmmap;
181static pv_table_t *pv_table;
182caddr_t CADDR1 = 0, ptvmmap = 0;
183static caddr_t CADDR2;
184static pt_entry_t *msgbufmap;
185struct msgbuf *msgbufp=0;
186
187#ifdef SMP
188extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
189extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
190extern pd_entry_t *IdlePTDS[];
191extern pt_entry_t SMP_prvpt[];
192#endif
193
194#ifdef SMP
195extern unsigned int prv_PPAGE1[];
196extern pt_entry_t *prv_PMAP1;
197#else
198static pt_entry_t *PMAP1 = 0;
199static unsigned *PADDR1 = 0;
200#endif
201
202static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
203static unsigned * get_ptbase __P((pmap_t pmap));
204static pv_entry_t get_pv_entry __P((void));
205static void	i386_protection_init __P((void));
206static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
207
208static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
209static void	pmap_remove_all __P((vm_offset_t pa));
210static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
211				      vm_offset_t pa, vm_page_t mpte));
212static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
213					vm_offset_t sva));
214static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
215static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
216					vm_offset_t va));
217static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
218static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
219		vm_page_t mpte, vm_offset_t pa));
220
221static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
222
223static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
224static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
225static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
226static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
227static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
228static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
229void pmap_collect(void);
230
231static unsigned pdir4mb;
232
233/*
234 *	Routine:	pmap_pte
235 *	Function:
236 *		Extract the page table entry associated
237 *		with the given map/virtual_address pair.
238 */
239
240PMAP_INLINE unsigned *
241pmap_pte(pmap, va)
242	register pmap_t pmap;
243	vm_offset_t va;
244{
245	unsigned *pdeaddr;
246
247	if (pmap) {
248		pdeaddr = (unsigned *) pmap_pde(pmap, va);
249		if (*pdeaddr & PG_PS)
250			return pdeaddr;
251		if (*pdeaddr) {
252			return get_ptbase(pmap) + i386_btop(va);
253		}
254	}
255	return (0);
256}
257
258/*
259 * Move the kernel virtual free pointer to the next
260 * 4MB.  This is used to help improve performance
261 * by using a large (4MB) page for much of the kernel
262 * (.text, .data, .bss)
263 */
264static vm_offset_t
265pmap_kmem_choose(vm_offset_t addr) {
266	vm_offset_t newaddr = addr;
267#ifndef DISABLE_PSE
268	if (cpu_feature & CPUID_PSE) {
269		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
270	}
271#endif
272	return newaddr;
273}
274
275/*
276 *	Bootstrap the system enough to run with virtual memory.
277 *
278 *	On the i386 this is called after mapping has already been enabled
279 *	and just syncs the pmap module with what has already been done.
280 *	[We can't call it easily with mapping off since the kernel is not
281 *	mapped with PA == VA, hence we would have to relocate every address
282 *	from the linked base (virtual) address "KERNBASE" to the actual
283 *	(physical) address starting relative to 0]
284 */
285void
286pmap_bootstrap(firstaddr, loadaddr)
287	vm_offset_t firstaddr;
288	vm_offset_t loadaddr;
289{
290	vm_offset_t va;
291	pt_entry_t *pte;
292	int i, j;
293
294	avail_start = firstaddr;
295
296	/*
297	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
298	 * large. It should instead be correctly calculated in locore.s and
299	 * not based on 'first' (which is a physical address, not a virtual
300	 * address, for the start of unused physical memory). The kernel
301	 * page tables are NOT double mapped and thus should not be included
302	 * in this calculation.
303	 */
304	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
305	virtual_avail = pmap_kmem_choose(virtual_avail);
306
307	virtual_end = VM_MAX_KERNEL_ADDRESS;
308
309	/*
310	 * Initialize protection array.
311	 */
312	i386_protection_init();
313
314	/*
315	 * The kernel's pmap is statically allocated so we don't have to use
316	 * pmap_create, which is unlikely to work correctly at this part of
317	 * the boot sequence (XXX and which no longer exists).
318	 */
319	kernel_pmap = &kernel_pmap_store;
320
321	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
322
323	kernel_pmap->pm_count = 1;
324	TAILQ_INIT(&kernel_pmap->pm_pvlist);
325	nkpt = NKPT;
326
327	/*
328	 * Reserve some special page table entries/VA space for temporary
329	 * mapping of pages.
330	 */
331#define	SYSMAP(c, p, v, n)	\
332	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
333
334	va = virtual_avail;
335	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
336
337	/*
338	 * CMAP1/CMAP2 are used for zeroing and copying pages.
339	 */
340	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
341	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
342
343	/*
344	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
345	 * XXX ptmmap is not used.
346	 */
347	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
348
349	/*
350	 * msgbufp is used to map the system message buffer.
351	 * XXX msgbufmap is not used.
352	 */
353	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
354	       atop(round_page(MSGBUF_SIZE)))
355
356#if !defined(SMP)
357	/*
358	 * ptemap is used for pmap_pte_quick
359	 */
360	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
361#endif
362
363	virtual_avail = va;
364
365	*(int *) CMAP1 = *(int *) CMAP2 = 0;
366	*(int *) PTD = 0;
367
368
369	pgeflag = 0;
370#if !defined(SMP)
371	if (cpu_feature & CPUID_PGE) {
372		pgeflag = PG_G;
373	}
374#endif
375
376/*
377 * Initialize the 4MB page size flag
378 */
379	pseflag = 0;
380/*
381 * The 4MB page version of the initial
382 * kernel page mapping.
383 */
384	pdir4mb = 0;
385
386#if !defined(DISABLE_PSE)
387	if (cpu_feature & CPUID_PSE) {
388		unsigned ptditmp;
389		/*
390		 * Enable the PSE mode
391		 */
392		load_cr4(rcr4() | CR4_PSE);
393
394		/*
395		 * Note that we have enabled PSE mode
396		 */
397		pseflag = PG_PS;
398		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
399		ptditmp &= ~(NBPDR - 1);
400		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
401		pdir4mb = ptditmp;
402		/*
403		 * We can do the mapping here for the single processor
404		 * case.  We simply ignore the old page table page from
405		 * now on.
406		 */
407#if !defined(SMP)
408		PTD[KPTDI] = (pd_entry_t) ptditmp;
409		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
410		invltlb();
411#endif
412	}
413#endif
414
415#ifdef SMP
416	if (cpu_apic_address == 0)
417		panic("pmap_bootstrap: no local apic!");
418
419	/* 0 = private page */
420	/* 1 = page table page */
421	/* 2 = local apic */
422	/* 16-31 = io apics */
423	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
424	    (cpu_apic_address & PG_FRAME));
425
426	for (i = 0; i < mp_napics; i++) {
427		for (j = 0; j < 16; j++) {
428			/* same page frame as a previous IO apic? */
429			if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) ==
430			    (io_apic_address[0] & PG_FRAME)) {
431				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
432				break;
433			}
434			/* use this slot if available */
435			if (((vm_offset_t)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
436				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW |
437				    pgeflag | (io_apic_address[i] & PG_FRAME));
438				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
439				break;
440			}
441		}
442		if (j == 16)
443			panic("no space to map IO apic %d!", i);
444	}
445
446	/* BSP does this itself, AP's get it pre-set */
447	prv_CMAP1 = &SMP_prvpt[3 + UPAGES];
448	prv_CMAP2 = &SMP_prvpt[4 + UPAGES];
449	prv_CMAP3 = &SMP_prvpt[5 + UPAGES];
450	prv_PMAP1 = &SMP_prvpt[6 + UPAGES];
451#endif
452
453	invltlb();
454
455}
456
457void
458getmtrr()
459{
460	int i;
461
462	if (cpu == CPU_686) {
463		for(i = 0; i < NPPROVMTRR; i++) {
464			PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2);
465			PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2);
466		}
467	}
468}
469
470void
471putmtrr()
472{
473	int i;
474
475	if (cpu == CPU_686) {
476		wbinvd();
477		for(i = 0; i < NPPROVMTRR; i++) {
478			wrmsr(PPRO_VMTRRphysBase0 + i * 2, PPro_vmtrr[i].base);
479			wrmsr(PPRO_VMTRRphysMask0 + i * 2, PPro_vmtrr[i].mask);
480		}
481	}
482}
483
484void
485pmap_setvidram(void)
486{
487#if 0
488	if (cpu == CPU_686) {
489		wbinvd();
490		/*
491		 * Set memory between 0-640K to be WB
492		 */
493		wrmsr(0x250, 0x0606060606060606LL);
494		wrmsr(0x258, 0x0606060606060606LL);
495		/*
496		 * Set normal, PC video memory to be WC
497		 */
498		wrmsr(0x259, 0x0101010101010101LL);
499	}
500#endif
501}
502
503void
504pmap_setdevram(unsigned long long basea, vm_offset_t sizea)
505{
506	int i, free, skip;
507	unsigned basepage, basepaget;
508	unsigned long long base;
509	unsigned long long mask;
510
511	if (cpu != CPU_686)
512		return;
513
514	free = -1;
515	skip = 0;
516	basea &= ~0xfff;
517	base = basea | 0x1;
518	mask = (long long) (0xfffffffffLL - ((long) sizea - 1)) | (long long) 0x800;
519	mask &= ~0x7ff;
520
521	basepage = (long long) (base >> 12);
522	for(i = 0; i < NPPROVMTRR; i++) {
523		PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2);
524		PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2);
525		basepaget = (long long) (PPro_vmtrr[i].base >> 12);
526		if (basepage == basepaget)
527			skip = 1;
528		if ((PPro_vmtrr[i].mask & 0x800) == 0) {
529			if (free == -1)
530				free = i;
531		}
532	}
533
534	if (!skip && free != -1) {
535		wbinvd();
536		PPro_vmtrr[free].base = base;
537		PPro_vmtrr[free].mask = mask;
538		wrmsr(PPRO_VMTRRphysBase0 + free * 2, base);
539		wrmsr(PPRO_VMTRRphysMask0 + free * 2, mask);
540		printf(
541	"pmap: added WC mapping at page: 0x%x %x, size: %u mask: 0x%x %x\n",
542		    (u_int)(base >> 32), (u_int)base, sizea,
543		    (u_int)(mask >> 32), (u_int)mask);
544	}
545}
546
547/*
548 * Set 4mb pdir for mp startup, and global flags
549 */
550void
551pmap_set_opt(unsigned *pdir) {
552	int i;
553
554	if (pseflag && (cpu_feature & CPUID_PSE)) {
555		load_cr4(rcr4() | CR4_PSE);
556		if (pdir4mb) {
557			pdir[KPTDI] = pdir4mb;
558		}
559	}
560
561	if (pgeflag && (cpu_feature & CPUID_PGE)) {
562		load_cr4(rcr4() | CR4_PGE);
563		for(i = KPTDI; i < KPTDI + nkpt; i++) {
564			if (pdir[i]) {
565				pdir[i] |= PG_G;
566			}
567		}
568	}
569}
570
571/*
572 * Setup the PTD for the boot processor
573 */
574void
575pmap_set_opt_bsp(void)
576{
577	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
578	pmap_set_opt((unsigned *)PTD);
579	invltlb();
580}
581
582/*
583 *	Initialize the pmap module.
584 *	Called by vm_init, to initialize any structures that the pmap
585 *	system needs to map virtual memory.
586 *	pmap_init has been enhanced to support in a fairly consistant
587 *	way, discontiguous physical memory.
588 */
589void
590pmap_init(phys_start, phys_end)
591	vm_offset_t phys_start, phys_end;
592{
593	vm_offset_t addr;
594	vm_size_t s;
595	int i;
596	int initial_pvs;
597
598	/*
599	 * calculate the number of pv_entries needed
600	 */
601	vm_first_phys = phys_avail[0];
602	for (i = 0; phys_avail[i + 1]; i += 2);
603	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
604
605	/*
606	 * Allocate memory for random pmap data structures.  Includes the
607	 * pv_head_table.
608	 */
609	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
610	s = round_page(s);
611
612	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
613	pv_table = (pv_table_t *) addr;
614	for(i = 0; i < pv_npg; i++) {
615		vm_offset_t pa;
616		TAILQ_INIT(&pv_table[i].pv_list);
617		pv_table[i].pv_list_count = 0;
618		pa = vm_first_phys + i * PAGE_SIZE;
619		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
620	}
621
622	/*
623	 * init the pv free list
624	 */
625	initial_pvs = pv_npg;
626	if (initial_pvs < MINPV)
627		initial_pvs = MINPV;
628	pvzone = &pvzone_store;
629	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
630		initial_pvs * sizeof (struct pv_entry));
631	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, pv_npg);
632	/*
633	 * object for kernel page table pages
634	 */
635	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
636
637	/*
638	 * Now it is safe to enable pv_table recording.
639	 */
640	pmap_initialized = TRUE;
641}
642
643/*
644 * Initialize the address space (zone) for the pv_entries.  Set a
645 * high water mark so that the system can recover from excessive
646 * numbers of pv entries.
647 */
648void
649pmap_init2() {
650	pv_entry_max = PMAP_SHPGPERPROC * maxproc + pv_npg;
651	pv_entry_high_water = 9 * (pv_entry_max / 10);
652	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
653}
654
655/*
656 *	Used to map a range of physical addresses into kernel
657 *	virtual address space.
658 *
659 *	For now, VM is already on, we only need to map the
660 *	specified memory.
661 */
662vm_offset_t
663pmap_map(virt, start, end, prot)
664	vm_offset_t virt;
665	vm_offset_t start;
666	vm_offset_t end;
667	int prot;
668{
669	while (start < end) {
670		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
671		virt += PAGE_SIZE;
672		start += PAGE_SIZE;
673	}
674	return (virt);
675}
676
677
678/***************************************************
679 * Low level helper routines.....
680 ***************************************************/
681
682#if defined(PMAP_DIAGNOSTIC)
683
684/*
685 * This code checks for non-writeable/modified pages.
686 * This should be an invalid condition.
687 */
688static int
689pmap_nw_modified(pt_entry_t ptea) {
690	int pte;
691
692	pte = (int) ptea;
693
694	if ((pte & (PG_M|PG_RW)) == PG_M)
695		return 1;
696	else
697		return 0;
698}
699#endif
700
701
702/*
703 * this routine defines the region(s) of memory that should
704 * not be tested for the modified bit.
705 */
706static PMAP_INLINE int
707pmap_track_modified( vm_offset_t va) {
708	if ((va < clean_sva) || (va >= clean_eva))
709		return 1;
710	else
711		return 0;
712}
713
714static PMAP_INLINE void
715invltlb_1pg( vm_offset_t va) {
716#if defined(I386_CPU)
717	if (cpu_class == CPUCLASS_386) {
718		invltlb();
719	} else
720#endif
721	{
722		invlpg(va);
723	}
724}
725
726static PMAP_INLINE void
727invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
728#if defined(I386_CPU)
729	if (cpu_class == CPUCLASS_386) {
730		invltlb();
731	} else
732#endif
733	{
734		invlpg(va1);
735		invlpg(va2);
736	}
737}
738
739static unsigned *
740get_ptbase(pmap)
741	pmap_t pmap;
742{
743	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
744
745	/* are we current address space or kernel? */
746	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
747		return (unsigned *) PTmap;
748	}
749	/* otherwise, we are alternate address space */
750	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
751		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
752#if defined(SMP)
753		/* The page directory is not shared between CPUs */
754		cpu_invltlb();
755#else
756		invltlb();
757#endif
758	}
759	return (unsigned *) APTmap;
760}
761
762/*
763 * Super fast pmap_pte routine best used when scanning
764 * the pv lists.  This eliminates many coarse-grained
765 * invltlb calls.  Note that many of the pv list
766 * scans are across different pmaps.  It is very wasteful
767 * to do an entire invltlb for checking a single mapping.
768 */
769
770static unsigned *
771pmap_pte_quick(pmap, va)
772	register pmap_t pmap;
773	vm_offset_t va;
774{
775	unsigned pde, newpf;
776	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
777		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
778		unsigned index = i386_btop(va);
779		/* are we current address space or kernel? */
780		if ((pmap == kernel_pmap) ||
781			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
782			return (unsigned *) PTmap + index;
783		}
784		newpf = pde & PG_FRAME;
785#ifdef SMP
786		if ( ((* (unsigned *) prv_PMAP1) & PG_FRAME) != newpf) {
787			* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
788			cpu_invlpg(&prv_PPAGE1);
789		}
790		return prv_PPAGE1 + ((unsigned) index & (NPTEPG - 1));
791#else
792		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
793			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
794			invltlb_1pg((vm_offset_t) PADDR1);
795		}
796		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
797#endif
798	}
799	return (0);
800}
801
802/*
803 *	Routine:	pmap_extract
804 *	Function:
805 *		Extract the physical page address associated
806 *		with the given map/virtual_address pair.
807 */
808vm_offset_t
809pmap_extract(pmap, va)
810	register pmap_t pmap;
811	vm_offset_t va;
812{
813	vm_offset_t rtval;
814	vm_offset_t pdirindex;
815	pdirindex = va >> PDRSHIFT;
816	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
817		unsigned *pte;
818		if ((rtval & PG_PS) != 0) {
819			rtval &= ~(NBPDR - 1);
820			rtval |= va & (NBPDR - 1);
821			return rtval;
822		}
823		pte = get_ptbase(pmap) + i386_btop(va);
824		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
825		return rtval;
826	}
827	return 0;
828
829}
830
831/*
832 * determine if a page is managed (memory vs. device)
833 */
834static PMAP_INLINE int
835pmap_is_managed(pa)
836	vm_offset_t pa;
837{
838	int i;
839
840	if (!pmap_initialized)
841		return 0;
842
843	for (i = 0; phys_avail[i + 1]; i += 2) {
844		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
845			return 1;
846	}
847	return 0;
848}
849
850
851/***************************************************
852 * Low level mapping routines.....
853 ***************************************************/
854
855/*
856 * Add a list of wired pages to the kva
857 * this routine is only used for temporary
858 * kernel mappings that do not need to have
859 * page modification or references recorded.
860 * Note that old mappings are simply written
861 * over.  The page *must* be wired.
862 */
863void
864pmap_qenter(va, m, count)
865	vm_offset_t va;
866	vm_page_t *m;
867	int count;
868{
869	int i;
870	register unsigned *pte;
871
872	for (i = 0; i < count; i++) {
873		vm_offset_t tva = va + i * PAGE_SIZE;
874		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
875		unsigned opte;
876		pte = (unsigned *)vtopte(tva);
877		opte = *pte;
878		*pte = npte;
879		if (opte)
880			invltlb_1pg(tva);
881	}
882}
883
884/*
885 * this routine jerks page mappings from the
886 * kernel -- it is meant only for temporary mappings.
887 */
888void
889pmap_qremove(va, count)
890	vm_offset_t va;
891	int count;
892{
893	int i;
894	register unsigned *pte;
895
896	for (i = 0; i < count; i++) {
897		pte = (unsigned *)vtopte(va);
898		*pte = 0;
899		invltlb_1pg(va);
900		va += PAGE_SIZE;
901	}
902}
903
904/*
905 * add a wired page to the kva
906 * note that in order for the mapping to take effect -- you
907 * should do a invltlb after doing the pmap_kenter...
908 */
909PMAP_INLINE void
910pmap_kenter(va, pa)
911	vm_offset_t va;
912	register vm_offset_t pa;
913{
914	register unsigned *pte;
915	unsigned npte, opte;
916
917	npte = pa | PG_RW | PG_V | pgeflag;
918	pte = (unsigned *)vtopte(va);
919	opte = *pte;
920	*pte = npte;
921	if (opte)
922		invltlb_1pg(va);
923}
924
925/*
926 * remove a page from the kernel pagetables
927 */
928PMAP_INLINE void
929pmap_kremove(va)
930	vm_offset_t va;
931{
932	register unsigned *pte;
933
934	pte = (unsigned *)vtopte(va);
935	*pte = 0;
936	invltlb_1pg(va);
937}
938
939static vm_page_t
940pmap_page_lookup(object, pindex)
941	vm_object_t object;
942	vm_pindex_t pindex;
943{
944	vm_page_t m;
945retry:
946	m = vm_page_lookup(object, pindex);
947	if (m && vm_page_sleep(m, "pplookp", NULL))
948		goto retry;
949	return m;
950}
951
952/*
953 * Create the UPAGES for a new process.
954 * This routine directly affects the fork perf for a process.
955 */
956void
957pmap_new_proc(p)
958	struct proc *p;
959{
960	int i, updateneeded;
961	vm_object_t upobj;
962	vm_page_t m;
963	struct user *up;
964	unsigned *ptek, oldpte;
965
966	/*
967	 * allocate object for the upages
968	 */
969	if ((upobj = p->p_upages_obj) == NULL) {
970		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
971		p->p_upages_obj = upobj;
972	}
973
974	/* get a kernel virtual address for the UPAGES for this proc */
975	if ((up = p->p_addr) == NULL) {
976		up = (struct user *) kmem_alloc_pageable(kernel_map,
977				UPAGES * PAGE_SIZE);
978#if !defined(MAX_PERF)
979		if (up == NULL)
980			panic("pmap_new_proc: u_map allocation failed");
981#endif
982		p->p_addr = up;
983	}
984
985	ptek = (unsigned *) vtopte((vm_offset_t) up);
986
987	updateneeded = 0;
988	for(i=0;i<UPAGES;i++) {
989		/*
990		 * Get a kernel stack page
991		 */
992		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
993
994		/*
995		 * Wire the page
996		 */
997		m->wire_count++;
998		cnt.v_wire_count++;
999
1000		oldpte = *(ptek + i);
1001		/*
1002		 * Enter the page into the kernel address space.
1003		 */
1004		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
1005		if (oldpte) {
1006			if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386)) {
1007				invlpg((vm_offset_t) up + i * PAGE_SIZE);
1008			} else {
1009				updateneeded = 1;
1010			}
1011		}
1012
1013		vm_page_wakeup(m);
1014		m->flags &= ~PG_ZERO;
1015		m->flags |= PG_MAPPED | PG_WRITEABLE;
1016		m->valid = VM_PAGE_BITS_ALL;
1017	}
1018	if (updateneeded)
1019		invltlb();
1020}
1021
1022/*
1023 * Dispose the UPAGES for a process that has exited.
1024 * This routine directly impacts the exit perf of a process.
1025 */
1026void
1027pmap_dispose_proc(p)
1028	struct proc *p;
1029{
1030	int i;
1031	vm_object_t upobj;
1032	vm_page_t m;
1033	unsigned *ptek, oldpte;
1034
1035	upobj = p->p_upages_obj;
1036
1037	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
1038	for(i=0;i<UPAGES;i++) {
1039
1040		if ((m = vm_page_lookup(upobj, i)) == NULL)
1041			panic("pmap_dispose_proc: upage already missing???");
1042
1043		m->flags |= PG_BUSY;
1044
1045		oldpte = *(ptek + i);
1046		*(ptek + i) = 0;
1047		if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386))
1048			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
1049		vm_page_unwire(m);
1050		vm_page_free(m);
1051	}
1052
1053	if (cpu_class <= CPUCLASS_386)
1054		invltlb();
1055}
1056
1057/*
1058 * Allow the UPAGES for a process to be prejudicially paged out.
1059 */
1060void
1061pmap_swapout_proc(p)
1062	struct proc *p;
1063{
1064	int i;
1065	vm_object_t upobj;
1066	vm_page_t m;
1067
1068	upobj = p->p_upages_obj;
1069	/*
1070	 * let the upages be paged
1071	 */
1072	for(i=0;i<UPAGES;i++) {
1073		if ((m = vm_page_lookup(upobj, i)) == NULL)
1074			panic("pmap_swapout_proc: upage already missing???");
1075		m->dirty = VM_PAGE_BITS_ALL;
1076		vm_page_unwire(m);
1077		vm_page_deactivate(m);
1078		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
1079	}
1080}
1081
1082/*
1083 * Bring the UPAGES for a specified process back in.
1084 */
1085void
1086pmap_swapin_proc(p)
1087	struct proc *p;
1088{
1089	int i,rv;
1090	vm_object_t upobj;
1091	vm_page_t m;
1092
1093	upobj = p->p_upages_obj;
1094	for(i=0;i<UPAGES;i++) {
1095
1096		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1097
1098		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
1099			VM_PAGE_TO_PHYS(m));
1100
1101		if (m->valid != VM_PAGE_BITS_ALL) {
1102			rv = vm_pager_get_pages(upobj, &m, 1, 0);
1103#if !defined(MAX_PERF)
1104			if (rv != VM_PAGER_OK)
1105				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1106#endif
1107			m = vm_page_lookup(upobj, i);
1108			m->valid = VM_PAGE_BITS_ALL;
1109		}
1110
1111		vm_page_wire(m);
1112		vm_page_wakeup(m);
1113		m->flags |= PG_MAPPED | PG_WRITEABLE;
1114	}
1115}
1116
1117/***************************************************
1118 * Page table page management routines.....
1119 ***************************************************/
1120
1121/*
1122 * This routine unholds page table pages, and if the hold count
1123 * drops to zero, then it decrements the wire count.
1124 */
1125static int
1126_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1127	int s;
1128
1129	while (vm_page_sleep(m, "pmuwpt", NULL));
1130
1131	if (m->hold_count == 0) {
1132		vm_offset_t pteva;
1133		/*
1134		 * unmap the page table page
1135		 */
1136		pmap->pm_pdir[m->pindex] = 0;
1137		--pmap->pm_stats.resident_count;
1138		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1139			(((unsigned) PTDpde) & PG_FRAME)) {
1140			/*
1141			 * Do a invltlb to make the invalidated mapping
1142			 * take effect immediately.
1143			 */
1144			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1145			invltlb_1pg(pteva);
1146		}
1147
1148		if (pmap->pm_ptphint == m)
1149			pmap->pm_ptphint = NULL;
1150
1151		/*
1152		 * If the page is finally unwired, simply free it.
1153		 */
1154		--m->wire_count;
1155		if (m->wire_count == 0) {
1156
1157			if (m->flags & PG_WANTED) {
1158				m->flags &= ~PG_WANTED;
1159				wakeup(m);
1160			}
1161
1162			m->flags |= PG_BUSY;
1163			vm_page_free_zero(m);
1164			--cnt.v_wire_count;
1165		}
1166		return 1;
1167	}
1168	return 0;
1169}
1170
1171static PMAP_INLINE int
1172pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1173	vm_page_unhold(m);
1174	if (m->hold_count == 0)
1175		return _pmap_unwire_pte_hold(pmap, m);
1176	else
1177		return 0;
1178}
1179
1180/*
1181 * After removing a page table entry, this routine is used to
1182 * conditionally free the page, and manage the hold/wire counts.
1183 */
1184static int
1185pmap_unuse_pt(pmap, va, mpte)
1186	pmap_t pmap;
1187	vm_offset_t va;
1188	vm_page_t mpte;
1189{
1190	unsigned ptepindex;
1191	if (va >= UPT_MIN_ADDRESS)
1192		return 0;
1193
1194	if (mpte == NULL) {
1195		ptepindex = (va >> PDRSHIFT);
1196		if (pmap->pm_ptphint &&
1197			(pmap->pm_ptphint->pindex == ptepindex)) {
1198			mpte = pmap->pm_ptphint;
1199		} else {
1200			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1201			pmap->pm_ptphint = mpte;
1202		}
1203	}
1204
1205	return pmap_unwire_pte_hold(pmap, mpte);
1206}
1207
1208#if !defined(SMP)
1209void
1210pmap_pinit0(pmap)
1211	struct pmap *pmap;
1212{
1213	pmap->pm_pdir =
1214		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1215	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
1216	pmap->pm_flags = 0;
1217	pmap->pm_count = 1;
1218	pmap->pm_ptphint = NULL;
1219	TAILQ_INIT(&pmap->pm_pvlist);
1220	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1221}
1222#else
1223void
1224pmap_pinit0(pmap)
1225	struct pmap *pmap;
1226{
1227	pmap_pinit(pmap);
1228}
1229#endif
1230
1231/*
1232 * Initialize a preallocated and zeroed pmap structure,
1233 * such as one in a vmspace structure.
1234 */
1235void
1236pmap_pinit(pmap)
1237	register struct pmap *pmap;
1238{
1239	vm_page_t ptdpg;
1240
1241	/*
1242	 * No need to allocate page table space yet but we do need a valid
1243	 * page directory table.
1244	 */
1245	if (pmap->pm_pdir == NULL)
1246		pmap->pm_pdir =
1247			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1248
1249	/*
1250	 * allocate object for the ptes
1251	 */
1252	if (pmap->pm_pteobj == NULL)
1253		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1254
1255	/*
1256	 * allocate the page directory page
1257	 */
1258retry:
1259	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
1260			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1261
1262	ptdpg->wire_count = 1;
1263	++cnt.v_wire_count;
1264
1265	ptdpg->flags &= ~(PG_MAPPED | PG_BUSY);	/* not mapped normally */
1266	ptdpg->valid = VM_PAGE_BITS_ALL;
1267
1268	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1269	if ((ptdpg->flags & PG_ZERO) == 0)
1270		bzero(pmap->pm_pdir, PAGE_SIZE);
1271
1272	/* wire in kernel global address entries */
1273	/* XXX copies current process, does not fill in MPPTDI */
1274	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1275
1276	/* install self-referential address mapping entry */
1277	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1278		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1279
1280	pmap->pm_flags = 0;
1281	pmap->pm_count = 1;
1282	pmap->pm_ptphint = NULL;
1283	TAILQ_INIT(&pmap->pm_pvlist);
1284	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1285}
1286
1287static int
1288pmap_release_free_page(pmap, p)
1289	struct pmap *pmap;
1290	vm_page_t p;
1291{
1292	int s;
1293	unsigned *pde = (unsigned *) pmap->pm_pdir;
1294	/*
1295	 * This code optimizes the case of freeing non-busy
1296	 * page-table pages.  Those pages are zero now, and
1297	 * might as well be placed directly into the zero queue.
1298	 */
1299	if (vm_page_sleep(p, "pmaprl", NULL))
1300		return 0;
1301
1302	p->flags |= PG_BUSY;
1303
1304	/*
1305	 * Remove the page table page from the processes address space.
1306	 */
1307	pde[p->pindex] = 0;
1308	pmap->pm_stats.resident_count--;
1309
1310#if !defined(MAX_PERF)
1311	if (p->hold_count)  {
1312		panic("pmap_release: freeing held page table page");
1313	}
1314#endif
1315	/*
1316	 * Page directory pages need to have the kernel
1317	 * stuff cleared, so they can go into the zero queue also.
1318	 */
1319	if (p->pindex == PTDPTDI) {
1320		bzero(pde + KPTDI, nkpt * PTESIZE);
1321#ifdef SMP
1322		pde[MPPTDI] = 0;
1323#endif
1324		pde[APTDPTDI] = 0;
1325		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1326	}
1327
1328	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1329		pmap->pm_ptphint = NULL;
1330
1331	vm_page_free_zero(p);
1332	return 1;
1333}
1334
1335/*
1336 * this routine is called if the page table page is not
1337 * mapped correctly.
1338 */
1339static vm_page_t
1340_pmap_allocpte(pmap, ptepindex)
1341	pmap_t	pmap;
1342	unsigned ptepindex;
1343{
1344	vm_offset_t pteva, ptepa;
1345	vm_page_t m;
1346
1347	/*
1348	 * Find or fabricate a new pagetable page
1349	 */
1350	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1351			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
1352
1353	if (m->queue != PQ_NONE) {
1354		int s = splvm();
1355		vm_page_unqueue(m);
1356		splx(s);
1357	}
1358
1359	if (m->wire_count == 0)
1360		cnt.v_wire_count++;
1361	m->wire_count++;
1362
1363	/*
1364	 * Increment the hold count for the page table page
1365	 * (denoting a new mapping.)
1366	 */
1367	m->hold_count++;
1368
1369	/*
1370	 * Map the pagetable page into the process address space, if
1371	 * it isn't already there.
1372	 */
1373
1374	pmap->pm_stats.resident_count++;
1375
1376	ptepa = VM_PAGE_TO_PHYS(m);
1377	pmap->pm_pdir[ptepindex] =
1378		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
1379
1380	/*
1381	 * Set the page table hint
1382	 */
1383	pmap->pm_ptphint = m;
1384
1385	/*
1386	 * Try to use the new mapping, but if we cannot, then
1387	 * do it with the routine that maps the page explicitly.
1388	 */
1389	if ((m->flags & PG_ZERO) == 0) {
1390		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1391			(((unsigned) PTDpde) & PG_FRAME)) {
1392			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1393			bzero((caddr_t) pteva, PAGE_SIZE);
1394		} else {
1395			pmap_zero_page(ptepa);
1396		}
1397	}
1398
1399	m->valid = VM_PAGE_BITS_ALL;
1400	m->flags &= ~(PG_ZERO | PG_BUSY);
1401	m->flags |= PG_MAPPED;
1402
1403	return m;
1404}
1405
1406static vm_page_t
1407pmap_allocpte(pmap, va)
1408	pmap_t	pmap;
1409	vm_offset_t va;
1410{
1411	unsigned ptepindex;
1412	vm_offset_t ptepa;
1413	vm_page_t m;
1414
1415	/*
1416	 * Calculate pagetable page index
1417	 */
1418	ptepindex = va >> PDRSHIFT;
1419
1420	/*
1421	 * Get the page directory entry
1422	 */
1423	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1424
1425	/*
1426	 * This supports switching from a 4MB page to a
1427	 * normal 4K page.
1428	 */
1429	if (ptepa & PG_PS) {
1430		pmap->pm_pdir[ptepindex] = 0;
1431		ptepa = 0;
1432		invltlb();
1433	}
1434
1435	/*
1436	 * If the page table page is mapped, we just increment the
1437	 * hold count, and activate it.
1438	 */
1439	if (ptepa) {
1440		/*
1441		 * In order to get the page table page, try the
1442		 * hint first.
1443		 */
1444		if (pmap->pm_ptphint &&
1445			(pmap->pm_ptphint->pindex == ptepindex)) {
1446			m = pmap->pm_ptphint;
1447		} else {
1448			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1449			pmap->pm_ptphint = m;
1450		}
1451		m->hold_count++;
1452		return m;
1453	}
1454	/*
1455	 * Here if the pte page isn't mapped, or if it has been deallocated.
1456	 */
1457	return _pmap_allocpte(pmap, ptepindex);
1458}
1459
1460
1461/***************************************************
1462* Pmap allocation/deallocation routines.
1463 ***************************************************/
1464
1465/*
1466 * Release any resources held by the given physical map.
1467 * Called when a pmap initialized by pmap_pinit is being released.
1468 * Should only be called if the map contains no valid mappings.
1469 */
1470void
1471pmap_release(pmap)
1472	register struct pmap *pmap;
1473{
1474	vm_page_t p,n,ptdpg;
1475	vm_object_t object = pmap->pm_pteobj;
1476	int curgeneration;
1477
1478#if defined(DIAGNOSTIC)
1479	if (object->ref_count != 1)
1480		panic("pmap_release: pteobj reference count != 1");
1481#endif
1482
1483	ptdpg = NULL;
1484retry:
1485	curgeneration = object->generation;
1486	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1487		n = TAILQ_NEXT(p, listq);
1488		if (p->pindex == PTDPTDI) {
1489			ptdpg = p;
1490			continue;
1491		}
1492		while (1) {
1493			if (!pmap_release_free_page(pmap, p) &&
1494				(object->generation != curgeneration))
1495				goto retry;
1496		}
1497	}
1498
1499	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1500		goto retry;
1501}
1502
1503/*
1504 * grow the number of kernel page table entries, if needed
1505 */
1506void
1507pmap_growkernel(vm_offset_t addr)
1508{
1509	struct proc *p;
1510	struct pmap *pmap;
1511	int s;
1512	vm_offset_t ptppaddr;
1513	vm_page_t nkpg;
1514#ifdef SMP
1515	int i;
1516#endif
1517	pd_entry_t newpdir;
1518
1519	s = splhigh();
1520	if (kernel_vm_end == 0) {
1521		kernel_vm_end = KERNBASE;
1522		nkpt = 0;
1523		while (pdir_pde(PTD, kernel_vm_end)) {
1524			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1525			nkpt++;
1526		}
1527	}
1528	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1529	while (kernel_vm_end < addr) {
1530		if (pdir_pde(PTD, kernel_vm_end)) {
1531			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1532			continue;
1533		}
1534
1535		/*
1536		 * This index is bogus, but out of the way
1537		 */
1538		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
1539#if !defined(MAX_PERF)
1540		if (!nkpg)
1541			panic("pmap_growkernel: no memory to grow kernel");
1542#endif
1543
1544		nkpt++;
1545
1546		vm_page_wire(nkpg);
1547		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1548		pmap_zero_page(ptppaddr);
1549		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1550		pdir_pde(PTD, kernel_vm_end) = newpdir;
1551
1552#ifdef SMP
1553		for (i = 0; i < mp_ncpus; i++) {
1554			if (IdlePTDS[i])
1555				pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir;
1556		}
1557#endif
1558
1559		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1560			if (p->p_vmspace) {
1561				pmap = &p->p_vmspace->vm_pmap;
1562				*pmap_pde(pmap, kernel_vm_end) = newpdir;
1563			}
1564		}
1565		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
1566		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1567	}
1568	splx(s);
1569}
1570
1571/*
1572 *	Retire the given physical map from service.
1573 *	Should only be called if the map contains
1574 *	no valid mappings.
1575 */
1576void
1577pmap_destroy(pmap)
1578	register pmap_t pmap;
1579{
1580	int count;
1581
1582	if (pmap == NULL)
1583		return;
1584
1585	count = --pmap->pm_count;
1586	if (count == 0) {
1587		pmap_release(pmap);
1588#if !defined(MAX_PERF)
1589		panic("destroying a pmap is not yet implemented");
1590#endif
1591	}
1592}
1593
1594/*
1595 *	Add a reference to the specified pmap.
1596 */
1597void
1598pmap_reference(pmap)
1599	pmap_t pmap;
1600{
1601	if (pmap != NULL) {
1602		pmap->pm_count++;
1603	}
1604}
1605
1606/***************************************************
1607* page management routines.
1608 ***************************************************/
1609
1610/*
1611 * free the pv_entry back to the free list
1612 */
1613static PMAP_INLINE void
1614free_pv_entry(pv)
1615	pv_entry_t pv;
1616{
1617	pv_entry_count--;
1618	zfreei(pvzone, pv);
1619}
1620
1621/*
1622 * get a new pv_entry, allocating a block from the system
1623 * when needed.
1624 * the memory allocation is performed bypassing the malloc code
1625 * because of the possibility of allocations at interrupt time.
1626 */
1627static pv_entry_t
1628get_pv_entry(void)
1629{
1630	pv_entry_count++;
1631	if (pv_entry_high_water &&
1632		(pv_entry_count > pv_entry_high_water) &&
1633		(pmap_pagedaemon_waken == 0)) {
1634		pmap_pagedaemon_waken = 1;
1635		wakeup (&vm_pages_needed);
1636	}
1637	return zalloci(pvzone);
1638}
1639
1640/*
1641 * This routine is very drastic, but can save the system
1642 * in a pinch.
1643 */
1644void
1645pmap_collect() {
1646	pv_table_t *ppv;
1647	int i;
1648	vm_offset_t pa;
1649	vm_page_t m;
1650	static int warningdone=0;
1651
1652	if (pmap_pagedaemon_waken == 0)
1653		return;
1654
1655	if (warningdone < 5) {
1656		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1657		warningdone++;
1658	}
1659
1660	for(i = 0; i < pv_npg; i++) {
1661		if ((ppv = &pv_table[i]) == 0)
1662			continue;
1663		m = ppv->pv_vm_page;
1664		if ((pa = VM_PAGE_TO_PHYS(m)) == 0)
1665			continue;
1666		if (m->wire_count || m->hold_count || m->busy ||
1667			(m->flags & PG_BUSY))
1668			continue;
1669		pmap_remove_all(pa);
1670	}
1671	pmap_pagedaemon_waken = 0;
1672}
1673
1674
1675/*
1676 * If it is the first entry on the list, it is actually
1677 * in the header and we must copy the following entry up
1678 * to the header.  Otherwise we must search the list for
1679 * the entry.  In either case we free the now unused entry.
1680 */
1681
1682static int
1683pmap_remove_entry(pmap, ppv, va)
1684	struct pmap *pmap;
1685	pv_table_t *ppv;
1686	vm_offset_t va;
1687{
1688	pv_entry_t pv;
1689	int rtval;
1690	int s;
1691
1692	s = splvm();
1693	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1694		for (pv = TAILQ_FIRST(&ppv->pv_list);
1695			pv;
1696			pv = TAILQ_NEXT(pv, pv_list)) {
1697			if (pmap == pv->pv_pmap && va == pv->pv_va)
1698				break;
1699		}
1700	} else {
1701		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1702			pv;
1703			pv = TAILQ_NEXT(pv, pv_plist)) {
1704			if (va == pv->pv_va)
1705				break;
1706		}
1707	}
1708
1709	rtval = 0;
1710	if (pv) {
1711
1712		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1713		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1714		ppv->pv_list_count--;
1715		if (TAILQ_FIRST(&ppv->pv_list) == NULL)
1716			ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
1717
1718		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1719		free_pv_entry(pv);
1720	}
1721
1722	splx(s);
1723	return rtval;
1724}
1725
1726/*
1727 * Create a pv entry for page at pa for
1728 * (pmap, va).
1729 */
1730static void
1731pmap_insert_entry(pmap, va, mpte, pa)
1732	pmap_t pmap;
1733	vm_offset_t va;
1734	vm_page_t mpte;
1735	vm_offset_t pa;
1736{
1737
1738	int s;
1739	pv_entry_t pv;
1740	pv_table_t *ppv;
1741
1742	s = splvm();
1743	pv = get_pv_entry();
1744	pv->pv_va = va;
1745	pv->pv_pmap = pmap;
1746	pv->pv_ptem = mpte;
1747
1748	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1749
1750	ppv = pa_to_pvh(pa);
1751	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1752	ppv->pv_list_count++;
1753
1754	splx(s);
1755}
1756
1757/*
1758 * pmap_remove_pte: do the things to unmap a page in a process
1759 */
1760static int
1761pmap_remove_pte(pmap, ptq, va)
1762	struct pmap *pmap;
1763	unsigned *ptq;
1764	vm_offset_t va;
1765{
1766	unsigned oldpte;
1767	pv_table_t *ppv;
1768
1769	oldpte = *ptq;
1770	*ptq = 0;
1771	if (oldpte & PG_W)
1772		pmap->pm_stats.wired_count -= 1;
1773	/*
1774	 * Machines that don't support invlpg, also don't support
1775	 * PG_G.
1776	 */
1777	if (oldpte & PG_G)
1778		invlpg(va);
1779	pmap->pm_stats.resident_count -= 1;
1780	if (oldpte & PG_MANAGED) {
1781		ppv = pa_to_pvh(oldpte);
1782		if (oldpte & PG_M) {
1783#if defined(PMAP_DIAGNOSTIC)
1784			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1785				printf(
1786	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1787				    va, oldpte);
1788			}
1789#endif
1790			if (pmap_track_modified(va))
1791				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1792		}
1793		if (oldpte & PG_A)
1794			ppv->pv_vm_page->flags |= PG_REFERENCED;
1795		return pmap_remove_entry(pmap, ppv, va);
1796	} else {
1797		return pmap_unuse_pt(pmap, va, NULL);
1798	}
1799
1800	return 0;
1801}
1802
1803/*
1804 * Remove a single page from a process address space
1805 */
1806static void
1807pmap_remove_page(pmap, va)
1808	struct pmap *pmap;
1809	register vm_offset_t va;
1810{
1811	register unsigned *ptq;
1812
1813	/*
1814	 * if there is no pte for this address, just skip it!!!
1815	 */
1816	if (*pmap_pde(pmap, va) == 0) {
1817		return;
1818	}
1819
1820	/*
1821	 * get a local va for mappings for this pmap.
1822	 */
1823	ptq = get_ptbase(pmap) + i386_btop(va);
1824	if (*ptq) {
1825		(void) pmap_remove_pte(pmap, ptq, va);
1826		invltlb_1pg(va);
1827	}
1828	return;
1829}
1830
1831/*
1832 *	Remove the given range of addresses from the specified map.
1833 *
1834 *	It is assumed that the start and end are properly
1835 *	rounded to the page size.
1836 */
1837void
1838pmap_remove(pmap, sva, eva)
1839	struct pmap *pmap;
1840	register vm_offset_t sva;
1841	register vm_offset_t eva;
1842{
1843	register unsigned *ptbase;
1844	vm_offset_t pdnxt;
1845	vm_offset_t ptpaddr;
1846	vm_offset_t sindex, eindex;
1847	int anyvalid;
1848
1849	if (pmap == NULL)
1850		return;
1851
1852	if (pmap->pm_stats.resident_count == 0)
1853		return;
1854
1855	/*
1856	 * special handling of removing one page.  a very
1857	 * common operation and easy to short circuit some
1858	 * code.
1859	 */
1860	if (((sva + PAGE_SIZE) == eva) &&
1861		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1862		pmap_remove_page(pmap, sva);
1863		return;
1864	}
1865
1866	anyvalid = 0;
1867
1868	/*
1869	 * Get a local virtual address for the mappings that are being
1870	 * worked with.
1871	 */
1872	ptbase = get_ptbase(pmap);
1873
1874	sindex = i386_btop(sva);
1875	eindex = i386_btop(eva);
1876
1877	for (; sindex < eindex; sindex = pdnxt) {
1878		unsigned pdirindex;
1879
1880		/*
1881		 * Calculate index for next page table.
1882		 */
1883		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1884		if (pmap->pm_stats.resident_count == 0)
1885			break;
1886
1887		pdirindex = sindex / NPDEPG;
1888		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1889			pmap->pm_pdir[pdirindex] = 0;
1890			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1891			anyvalid++;
1892			continue;
1893		}
1894
1895		/*
1896		 * Weed out invalid mappings. Note: we assume that the page
1897		 * directory table is always allocated, and in kernel virtual.
1898		 */
1899		if (ptpaddr == 0)
1900			continue;
1901
1902		/*
1903		 * Limit our scan to either the end of the va represented
1904		 * by the current page table page, or to the end of the
1905		 * range being removed.
1906		 */
1907		if (pdnxt > eindex) {
1908			pdnxt = eindex;
1909		}
1910
1911		for ( ;sindex != pdnxt; sindex++) {
1912			vm_offset_t va;
1913			if (ptbase[sindex] == 0) {
1914				continue;
1915			}
1916			va = i386_ptob(sindex);
1917
1918			anyvalid++;
1919			if (pmap_remove_pte(pmap,
1920				ptbase + sindex, va))
1921				break;
1922		}
1923	}
1924
1925	if (anyvalid) {
1926		invltlb();
1927	}
1928}
1929
1930/*
1931 *	Routine:	pmap_remove_all
1932 *	Function:
1933 *		Removes this physical page from
1934 *		all physical maps in which it resides.
1935 *		Reflects back modify bits to the pager.
1936 *
1937 *	Notes:
1938 *		Original versions of this routine were very
1939 *		inefficient because they iteratively called
1940 *		pmap_remove (slow...)
1941 */
1942
1943static void
1944pmap_remove_all(pa)
1945	vm_offset_t pa;
1946{
1947	register pv_entry_t pv;
1948	pv_table_t *ppv;
1949	register unsigned *pte, tpte;
1950	int nmodify;
1951	int update_needed;
1952	int s;
1953
1954	nmodify = 0;
1955	update_needed = 0;
1956#if defined(PMAP_DIAGNOSTIC)
1957	/*
1958	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1959	 * pages!
1960	 */
1961	if (!pmap_is_managed(pa)) {
1962		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", pa);
1963	}
1964#endif
1965
1966	s = splvm();
1967	ppv = pa_to_pvh(pa);
1968	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1969		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1970
1971		pv->pv_pmap->pm_stats.resident_count--;
1972
1973		tpte = *pte;
1974		*pte = 0;
1975		if (tpte & PG_W)
1976			pv->pv_pmap->pm_stats.wired_count--;
1977
1978		if (tpte & PG_A)
1979			ppv->pv_vm_page->flags |= PG_REFERENCED;
1980
1981		/*
1982		 * Update the vm_page_t clean and reference bits.
1983		 */
1984		if (tpte & PG_M) {
1985#if defined(PMAP_DIAGNOSTIC)
1986			if (pmap_nw_modified((pt_entry_t) tpte)) {
1987				printf(
1988	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1989				    pv->pv_va, tpte);
1990			}
1991#endif
1992			if (pmap_track_modified(pv->pv_va))
1993				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1994		}
1995		if (!update_needed &&
1996			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1997			(pv->pv_pmap == kernel_pmap))) {
1998			update_needed = 1;
1999		}
2000
2001		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2002		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2003		ppv->pv_list_count--;
2004		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2005		free_pv_entry(pv);
2006	}
2007
2008	ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
2009
2010	if (update_needed)
2011		invltlb();
2012
2013	splx(s);
2014	return;
2015}
2016
2017/*
2018 *	Set the physical protection on the
2019 *	specified range of this map as requested.
2020 */
2021void
2022pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
2023{
2024	register unsigned *ptbase;
2025	vm_offset_t pdnxt, ptpaddr;
2026	vm_pindex_t sindex, eindex;
2027	int anychanged;
2028
2029
2030	if (pmap == NULL)
2031		return;
2032
2033	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2034		pmap_remove(pmap, sva, eva);
2035		return;
2036	}
2037
2038	if (prot & VM_PROT_WRITE)
2039		return;
2040
2041	anychanged = 0;
2042
2043	ptbase = get_ptbase(pmap);
2044
2045	sindex = i386_btop(sva);
2046	eindex = i386_btop(eva);
2047
2048	for (; sindex < eindex; sindex = pdnxt) {
2049
2050		unsigned pdirindex;
2051
2052		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
2053
2054		pdirindex = sindex / NPDEPG;
2055		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
2056			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
2057			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
2058			anychanged++;
2059			continue;
2060		}
2061
2062		/*
2063		 * Weed out invalid mappings. Note: we assume that the page
2064		 * directory table is always allocated, and in kernel virtual.
2065		 */
2066		if (ptpaddr == 0)
2067			continue;
2068
2069		if (pdnxt > eindex) {
2070			pdnxt = eindex;
2071		}
2072
2073		for (; sindex != pdnxt; sindex++) {
2074
2075			unsigned pbits;
2076			pv_table_t *ppv;
2077
2078			pbits = ptbase[sindex];
2079
2080			if (pbits & PG_MANAGED) {
2081				ppv = NULL;
2082				if (pbits & PG_A) {
2083					ppv = pa_to_pvh(pbits);
2084					ppv->pv_vm_page->flags |= PG_REFERENCED;
2085					pbits &= ~PG_A;
2086				}
2087				if (pbits & PG_M) {
2088					if (pmap_track_modified(i386_ptob(sindex))) {
2089						if (ppv == NULL)
2090							ppv = pa_to_pvh(pbits);
2091						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2092						pbits &= ~PG_M;
2093					}
2094				}
2095			}
2096
2097			pbits &= ~PG_RW;
2098
2099			if (pbits != ptbase[sindex]) {
2100				ptbase[sindex] = pbits;
2101				anychanged = 1;
2102			}
2103		}
2104	}
2105	if (anychanged)
2106		invltlb();
2107}
2108
2109/*
2110 *	Insert the given physical page (p) at
2111 *	the specified virtual address (v) in the
2112 *	target physical map with the protection requested.
2113 *
2114 *	If specified, the page will be wired down, meaning
2115 *	that the related pte can not be reclaimed.
2116 *
2117 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2118 *	or lose information.  That is, this routine must actually
2119 *	insert this page into the given map NOW.
2120 */
2121void
2122pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
2123	   boolean_t wired)
2124{
2125	register unsigned *pte;
2126	vm_offset_t opa;
2127	vm_offset_t origpte, newpte;
2128	vm_page_t mpte;
2129
2130	if (pmap == NULL)
2131		return;
2132
2133	va &= PG_FRAME;
2134#ifdef PMAP_DIAGNOSTIC
2135	if (va > VM_MAX_KERNEL_ADDRESS)
2136		panic("pmap_enter: toobig");
2137	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
2138		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
2139#endif
2140
2141	mpte = NULL;
2142	/*
2143	 * In the case that a page table page is not
2144	 * resident, we are creating it here.
2145	 */
2146	if (va < UPT_MIN_ADDRESS) {
2147		mpte = pmap_allocpte(pmap, va);
2148	}
2149#if 0 && defined(PMAP_DIAGNOSTIC)
2150	else {
2151		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
2152		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
2153			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
2154				pmap->pm_pdir[PTDPTDI], origpte, va);
2155		}
2156		if (smp_active) {
2157			pdeaddr = (vm_offset_t *) IdlePTDS[cpuid];
2158			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
2159				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
2160					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
2161				printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
2162				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
2163					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
2164			}
2165		}
2166	}
2167#endif
2168
2169	pte = pmap_pte(pmap, va);
2170
2171#if !defined(MAX_PERF)
2172	/*
2173	 * Page Directory table entry not valid, we need a new PT page
2174	 */
2175	if (pte == NULL) {
2176		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
2177			(void *)pmap->pm_pdir[PTDPTDI], va);
2178	}
2179#endif
2180
2181	origpte = *(vm_offset_t *)pte;
2182	pa &= PG_FRAME;
2183	opa = origpte & PG_FRAME;
2184
2185#if !defined(MAX_PERF)
2186	if (origpte & PG_PS)
2187		panic("pmap_enter: attempted pmap_enter on 4MB page");
2188#endif
2189
2190	/*
2191	 * Mapping has not changed, must be protection or wiring change.
2192	 */
2193	if (origpte && (opa == pa)) {
2194		/*
2195		 * Wiring change, just update stats. We don't worry about
2196		 * wiring PT pages as they remain resident as long as there
2197		 * are valid mappings in them. Hence, if a user page is wired,
2198		 * the PT page will be also.
2199		 */
2200		if (wired && ((origpte & PG_W) == 0))
2201			pmap->pm_stats.wired_count++;
2202		else if (!wired && (origpte & PG_W))
2203			pmap->pm_stats.wired_count--;
2204
2205#if defined(PMAP_DIAGNOSTIC)
2206		if (pmap_nw_modified((pt_entry_t) origpte)) {
2207			printf(
2208	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
2209			    va, origpte);
2210		}
2211#endif
2212
2213		/*
2214		 * Remove extra pte reference
2215		 */
2216		if (mpte)
2217			mpte->hold_count--;
2218
2219		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
2220			if ((origpte & PG_RW) == 0) {
2221				*pte |= PG_RW;
2222				invltlb_1pg(va);
2223			}
2224			return;
2225		}
2226
2227		/*
2228		 * We might be turning off write access to the page,
2229		 * so we go ahead and sense modify status.
2230		 */
2231		if (origpte & PG_MANAGED) {
2232			if ((origpte & PG_M) && pmap_track_modified(va)) {
2233				pv_table_t *ppv;
2234				ppv = pa_to_pvh(opa);
2235				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2236			}
2237			pa |= PG_MANAGED;
2238		}
2239		goto validate;
2240	}
2241	/*
2242	 * Mapping has changed, invalidate old range and fall through to
2243	 * handle validating new mapping.
2244	 */
2245	if (opa) {
2246		int err;
2247		err = pmap_remove_pte(pmap, pte, va);
2248#if !defined(MAX_PERF)
2249		if (err)
2250			panic("pmap_enter: pte vanished, va: 0x%x", va);
2251#endif
2252	}
2253
2254	/*
2255	 * Enter on the PV list if part of our managed memory Note that we
2256	 * raise IPL while manipulating pv_table since pmap_enter can be
2257	 * called at interrupt time.
2258	 */
2259	if (pmap_is_managed(pa)) {
2260		pmap_insert_entry(pmap, va, mpte, pa);
2261		pa |= PG_MANAGED;
2262	}
2263
2264	/*
2265	 * Increment counters
2266	 */
2267	pmap->pm_stats.resident_count++;
2268	if (wired)
2269		pmap->pm_stats.wired_count++;
2270
2271validate:
2272	/*
2273	 * Now validate mapping with desired protection/wiring.
2274	 */
2275	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2276
2277	if (wired)
2278		newpte |= PG_W;
2279	if (va < UPT_MIN_ADDRESS)
2280		newpte |= PG_U;
2281	if (pmap == kernel_pmap)
2282		newpte |= pgeflag;
2283
2284	/*
2285	 * if the mapping or permission bits are different, we need
2286	 * to update the pte.
2287	 */
2288	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2289		*pte = newpte | PG_A;
2290		if (origpte)
2291			invltlb_1pg(va);
2292	}
2293}
2294
2295/*
2296 * this code makes some *MAJOR* assumptions:
2297 * 1. Current pmap & pmap exists.
2298 * 2. Not wired.
2299 * 3. Read access.
2300 * 4. No page table pages.
2301 * 5. Tlbflush is deferred to calling procedure.
2302 * 6. Page IS managed.
2303 * but is *MUCH* faster than pmap_enter...
2304 */
2305
2306static vm_page_t
2307pmap_enter_quick(pmap, va, pa, mpte)
2308	register pmap_t pmap;
2309	vm_offset_t va;
2310	register vm_offset_t pa;
2311	vm_page_t mpte;
2312{
2313	register unsigned *pte;
2314
2315	/*
2316	 * In the case that a page table page is not
2317	 * resident, we are creating it here.
2318	 */
2319	if (va < UPT_MIN_ADDRESS) {
2320		unsigned ptepindex;
2321		vm_offset_t ptepa;
2322
2323		/*
2324		 * Calculate pagetable page index
2325		 */
2326		ptepindex = va >> PDRSHIFT;
2327		if (mpte && (mpte->pindex == ptepindex)) {
2328			mpte->hold_count++;
2329		} else {
2330retry:
2331			/*
2332			 * Get the page directory entry
2333			 */
2334			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2335
2336			/*
2337			 * If the page table page is mapped, we just increment
2338			 * the hold count, and activate it.
2339			 */
2340			if (ptepa) {
2341#if !defined(MAX_PERF)
2342				if (ptepa & PG_PS)
2343					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2344#endif
2345				if (pmap->pm_ptphint &&
2346					(pmap->pm_ptphint->pindex == ptepindex)) {
2347					mpte = pmap->pm_ptphint;
2348				} else {
2349					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2350					pmap->pm_ptphint = mpte;
2351				}
2352				if (mpte == NULL)
2353					goto retry;
2354				mpte->hold_count++;
2355			} else {
2356				mpte = _pmap_allocpte(pmap, ptepindex);
2357			}
2358		}
2359	} else {
2360		mpte = NULL;
2361	}
2362
2363	/*
2364	 * This call to vtopte makes the assumption that we are
2365	 * entering the page into the current pmap.  In order to support
2366	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2367	 * But that isn't as quick as vtopte.
2368	 */
2369	pte = (unsigned *)vtopte(va);
2370	if (*pte) {
2371		if (mpte)
2372			pmap_unwire_pte_hold(pmap, mpte);
2373		return 0;
2374	}
2375
2376	/*
2377	 * Enter on the PV list if part of our managed memory Note that we
2378	 * raise IPL while manipulating pv_table since pmap_enter can be
2379	 * called at interrupt time.
2380	 */
2381	pmap_insert_entry(pmap, va, mpte, pa);
2382
2383	/*
2384	 * Increment counters
2385	 */
2386	pmap->pm_stats.resident_count++;
2387
2388	/*
2389	 * Now validate mapping with RO protection
2390	 */
2391	*pte = pa | PG_V | PG_U | PG_MANAGED;
2392
2393	return mpte;
2394}
2395
2396#define MAX_INIT_PT (96)
2397/*
2398 * pmap_object_init_pt preloads the ptes for a given object
2399 * into the specified pmap.  This eliminates the blast of soft
2400 * faults on process startup and immediately after an mmap.
2401 */
2402void
2403pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2404	pmap_t pmap;
2405	vm_offset_t addr;
2406	vm_object_t object;
2407	vm_pindex_t pindex;
2408	vm_size_t size;
2409	int limit;
2410{
2411	vm_offset_t tmpidx;
2412	int psize;
2413	vm_page_t p, mpte;
2414	int objpgs;
2415
2416	if (!pmap)
2417		return;
2418
2419	/*
2420	 * This code maps large physical mmap regions into the
2421	 * processor address space.  Note that some shortcuts
2422	 * are taken, but the code works.
2423	 */
2424	if (pseflag &&
2425		(object->type == OBJT_DEVICE) &&
2426		((addr & (NBPDR - 1)) == 0) &&
2427		((size & (NBPDR - 1)) == 0) ) {
2428		int i;
2429		int s;
2430		vm_page_t m[1];
2431		unsigned int ptepindex;
2432		int npdes;
2433		vm_offset_t ptepa;
2434
2435		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2436			return;
2437
2438retry:
2439		p = vm_page_lookup(object, pindex);
2440		if (p && vm_page_sleep(p, "init4p", NULL))
2441			goto retry;
2442
2443		if (p == NULL) {
2444			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2445			if (p == NULL)
2446				return;
2447			m[0] = p;
2448
2449			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2450				vm_page_free(p);
2451				return;
2452			}
2453
2454			p = vm_page_lookup(object, pindex);
2455			vm_page_wakeup(p);
2456		}
2457
2458		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2459		if (ptepa & (NBPDR - 1)) {
2460			return;
2461		}
2462
2463		p->valid = VM_PAGE_BITS_ALL;
2464
2465		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2466		npdes = size >> PDRSHIFT;
2467		for(i=0;i<npdes;i++) {
2468			pmap->pm_pdir[ptepindex] =
2469				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2470			ptepa += NBPDR;
2471			ptepindex += 1;
2472		}
2473		p->flags |= PG_MAPPED;
2474		invltlb();
2475		return;
2476	}
2477
2478	psize = i386_btop(size);
2479
2480	if ((object->type != OBJT_VNODE) ||
2481		(limit && (psize > MAX_INIT_PT) &&
2482			(object->resident_page_count > MAX_INIT_PT))) {
2483		return;
2484	}
2485
2486	if (psize + pindex > object->size)
2487		psize = object->size - pindex;
2488
2489	mpte = NULL;
2490	/*
2491	 * if we are processing a major portion of the object, then scan the
2492	 * entire thing.
2493	 */
2494	if (psize > (object->size >> 2)) {
2495		objpgs = psize;
2496
2497		for (p = TAILQ_FIRST(&object->memq);
2498		    ((objpgs > 0) && (p != NULL));
2499		    p = TAILQ_NEXT(p, listq)) {
2500
2501			tmpidx = p->pindex;
2502			if (tmpidx < pindex) {
2503				continue;
2504			}
2505			tmpidx -= pindex;
2506			if (tmpidx >= psize) {
2507				continue;
2508			}
2509			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2510				(p->busy == 0) &&
2511			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2512				if ((p->queue - p->pc) == PQ_CACHE)
2513					vm_page_deactivate(p);
2514				p->flags |= PG_BUSY;
2515				mpte = pmap_enter_quick(pmap,
2516					addr + i386_ptob(tmpidx),
2517					VM_PAGE_TO_PHYS(p), mpte);
2518				p->flags |= PG_MAPPED;
2519				vm_page_wakeup(p);
2520			}
2521			objpgs -= 1;
2522		}
2523	} else {
2524		/*
2525		 * else lookup the pages one-by-one.
2526		 */
2527		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2528			p = vm_page_lookup(object, tmpidx + pindex);
2529			if (p &&
2530			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2531				(p->busy == 0) &&
2532			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2533				if ((p->queue - p->pc) == PQ_CACHE)
2534					vm_page_deactivate(p);
2535				p->flags |= PG_BUSY;
2536				mpte = pmap_enter_quick(pmap,
2537					addr + i386_ptob(tmpidx),
2538					VM_PAGE_TO_PHYS(p), mpte);
2539				p->flags |= PG_MAPPED;
2540				vm_page_wakeup(p);
2541			}
2542		}
2543	}
2544	return;
2545}
2546
2547/*
2548 * pmap_prefault provides a quick way of clustering
2549 * pagefaults into a processes address space.  It is a "cousin"
2550 * of pmap_object_init_pt, except it runs at page fault time instead
2551 * of mmap time.
2552 */
2553#define PFBAK 4
2554#define PFFOR 4
2555#define PAGEORDER_SIZE (PFBAK+PFFOR)
2556
2557static int pmap_prefault_pageorder[] = {
2558	-PAGE_SIZE, PAGE_SIZE,
2559	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
2560	-3 * PAGE_SIZE, 3 * PAGE_SIZE
2561	-4 * PAGE_SIZE, 4 * PAGE_SIZE
2562};
2563
2564void
2565pmap_prefault(pmap, addra, entry)
2566	pmap_t pmap;
2567	vm_offset_t addra;
2568	vm_map_entry_t entry;
2569{
2570	int i;
2571	vm_offset_t starta;
2572	vm_offset_t addr;
2573	vm_pindex_t pindex;
2574	vm_page_t m, mpte;
2575	vm_object_t object;
2576
2577	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2578		return;
2579
2580	object = entry->object.vm_object;
2581
2582	starta = addra - PFBAK * PAGE_SIZE;
2583	if (starta < entry->start) {
2584		starta = entry->start;
2585	} else if (starta > addra) {
2586		starta = 0;
2587	}
2588
2589	mpte = NULL;
2590	for (i = 0; i < PAGEORDER_SIZE; i++) {
2591		vm_object_t lobject;
2592		unsigned *pte;
2593
2594		addr = addra + pmap_prefault_pageorder[i];
2595		if (addr > addra + (PFFOR * PAGE_SIZE))
2596			addr = 0;
2597
2598		if (addr < starta || addr >= entry->end)
2599			continue;
2600
2601		if ((*pmap_pde(pmap, addr)) == NULL)
2602			continue;
2603
2604		pte = (unsigned *) vtopte(addr);
2605		if (*pte)
2606			continue;
2607
2608		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2609		lobject = object;
2610		for (m = vm_page_lookup(lobject, pindex);
2611		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2612		    lobject = lobject->backing_object) {
2613			if (lobject->backing_object_offset & PAGE_MASK)
2614				break;
2615			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2616			m = vm_page_lookup(lobject->backing_object, pindex);
2617		}
2618
2619		/*
2620		 * give-up when a page is not in memory
2621		 */
2622		if (m == NULL)
2623			break;
2624
2625		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2626			(m->busy == 0) &&
2627		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2628
2629			if ((m->queue - m->pc) == PQ_CACHE) {
2630				vm_page_deactivate(m);
2631			}
2632			m->flags |= PG_BUSY;
2633			mpte = pmap_enter_quick(pmap, addr,
2634				VM_PAGE_TO_PHYS(m), mpte);
2635			m->flags |= PG_MAPPED;
2636			vm_page_wakeup(m);
2637		}
2638	}
2639}
2640
2641/*
2642 *	Routine:	pmap_change_wiring
2643 *	Function:	Change the wiring attribute for a map/virtual-address
2644 *			pair.
2645 *	In/out conditions:
2646 *			The mapping must already exist in the pmap.
2647 */
2648void
2649pmap_change_wiring(pmap, va, wired)
2650	register pmap_t pmap;
2651	vm_offset_t va;
2652	boolean_t wired;
2653{
2654	register unsigned *pte;
2655
2656	if (pmap == NULL)
2657		return;
2658
2659	pte = pmap_pte(pmap, va);
2660
2661	if (wired && !pmap_pte_w(pte))
2662		pmap->pm_stats.wired_count++;
2663	else if (!wired && pmap_pte_w(pte))
2664		pmap->pm_stats.wired_count--;
2665
2666	/*
2667	 * Wiring is not a hardware characteristic so there is no need to
2668	 * invalidate TLB.
2669	 */
2670	pmap_pte_set_w(pte, wired);
2671}
2672
2673
2674
2675/*
2676 *	Copy the range specified by src_addr/len
2677 *	from the source map to the range dst_addr/len
2678 *	in the destination map.
2679 *
2680 *	This routine is only advisory and need not do anything.
2681 */
2682
2683void
2684pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2685	pmap_t dst_pmap, src_pmap;
2686	vm_offset_t dst_addr;
2687	vm_size_t len;
2688	vm_offset_t src_addr;
2689{
2690	vm_offset_t addr;
2691	vm_offset_t end_addr = src_addr + len;
2692	vm_offset_t pdnxt;
2693	unsigned src_frame, dst_frame;
2694
2695	if (dst_addr != src_addr)
2696		return;
2697
2698	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2699	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2700		return;
2701	}
2702
2703	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2704	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2705		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2706		invltlb();
2707	}
2708
2709	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2710		unsigned *src_pte, *dst_pte;
2711		vm_page_t dstmpte, srcmpte;
2712		vm_offset_t srcptepaddr;
2713		unsigned ptepindex;
2714
2715#if !defined(MAX_PERF)
2716		if (addr >= UPT_MIN_ADDRESS)
2717			panic("pmap_copy: invalid to pmap_copy page tables\n");
2718#endif
2719
2720		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2721		ptepindex = addr >> PDRSHIFT;
2722
2723		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2724		if (srcptepaddr == 0)
2725			continue;
2726
2727		if (srcptepaddr & PG_PS) {
2728			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2729				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2730				dst_pmap->pm_stats.resident_count += NBPDR;
2731			}
2732			continue;
2733		}
2734
2735		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2736		if ((srcmpte == NULL) ||
2737			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2738			continue;
2739
2740		if (pdnxt > end_addr)
2741			pdnxt = end_addr;
2742
2743		src_pte = (unsigned *) vtopte(addr);
2744		dst_pte = (unsigned *) avtopte(addr);
2745		while (addr < pdnxt) {
2746			unsigned ptetemp;
2747			ptetemp = *src_pte;
2748			/*
2749			 * we only virtual copy managed pages
2750			 */
2751			if ((ptetemp & PG_MANAGED) != 0) {
2752				/*
2753				 * We have to check after allocpte for the
2754				 * pte still being around...  allocpte can
2755				 * block.
2756				 */
2757				dstmpte = pmap_allocpte(dst_pmap, addr);
2758				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2759					/*
2760					 * Clear the modified and
2761					 * accessed (referenced) bits
2762					 * during the copy.
2763					 */
2764					*dst_pte = ptetemp & ~(PG_M | PG_A);
2765					dst_pmap->pm_stats.resident_count++;
2766					pmap_insert_entry(dst_pmap, addr,
2767						dstmpte,
2768						(ptetemp & PG_FRAME));
2769	 			} else {
2770					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2771				}
2772				if (dstmpte->hold_count >= srcmpte->hold_count)
2773					break;
2774			}
2775			addr += PAGE_SIZE;
2776			src_pte++;
2777			dst_pte++;
2778		}
2779	}
2780}
2781
2782/*
2783 *	Routine:	pmap_kernel
2784 *	Function:
2785 *		Returns the physical map handle for the kernel.
2786 */
2787pmap_t
2788pmap_kernel()
2789{
2790	return (kernel_pmap);
2791}
2792
2793/*
2794 *	pmap_zero_page zeros the specified (machine independent)
2795 *	page by mapping the page into virtual memory and using
2796 *	bzero to clear its contents, one machine dependent page
2797 *	at a time.
2798 */
2799void
2800pmap_zero_page(phys)
2801	vm_offset_t phys;
2802{
2803#ifdef SMP
2804#if !defined(MAX_PERF)
2805	if (*(int *) prv_CMAP3)
2806		panic("pmap_zero_page: prv_CMAP3 busy");
2807#endif
2808
2809	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2810	cpu_invlpg(&prv_CPAGE3);
2811
2812#if defined(I686_CPU)
2813	if (cpu == CPU_686)
2814		i686_pagezero(&prv_CPAGE3);
2815	else
2816#endif
2817		bzero(&prv_CPAGE3, PAGE_SIZE);
2818
2819	*(int *) prv_CMAP3 = 0;
2820#else
2821#if !defined(MAX_PERF)
2822	if (*(int *) CMAP2)
2823		panic("pmap_zero_page: CMAP2 busy");
2824#endif
2825
2826	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2827	if (cpu_class == CPUCLASS_386) {
2828		invltlb();
2829	} else {
2830		invlpg((u_int)CADDR2);
2831	}
2832
2833#if defined(I686_CPU)
2834	if (cpu == CPU_686)
2835		i686_pagezero(CADDR2);
2836	else
2837#endif
2838		bzero(CADDR2, PAGE_SIZE);
2839	*(int *) CMAP2 = 0;
2840#endif
2841}
2842
2843/*
2844 *	pmap_copy_page copies the specified (machine independent)
2845 *	page by mapping the page into virtual memory and using
2846 *	bcopy to copy the page, one machine dependent page at a
2847 *	time.
2848 */
2849void
2850pmap_copy_page(src, dst)
2851	vm_offset_t src;
2852	vm_offset_t dst;
2853{
2854#ifdef SMP
2855#if !defined(MAX_PERF)
2856	if (*(int *) prv_CMAP1)
2857		panic("pmap_copy_page: prv_CMAP1 busy");
2858	if (*(int *) prv_CMAP2)
2859		panic("pmap_copy_page: prv_CMAP2 busy");
2860#endif
2861
2862	*(int *) prv_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2863	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2864
2865	cpu_invlpg(&prv_CPAGE1);
2866	cpu_invlpg(&prv_CPAGE2);
2867
2868	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2869
2870	*(int *) prv_CMAP1 = 0;
2871	*(int *) prv_CMAP2 = 0;
2872#else
2873#if !defined(MAX_PERF)
2874	if (*(int *) CMAP1 || *(int *) CMAP2)
2875		panic("pmap_copy_page: CMAP busy");
2876#endif
2877
2878	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2879	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2880	if (cpu_class == CPUCLASS_386) {
2881		invltlb();
2882	} else {
2883		invlpg((u_int)CADDR1);
2884		invlpg((u_int)CADDR2);
2885	}
2886
2887	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2888
2889	*(int *) CMAP1 = 0;
2890	*(int *) CMAP2 = 0;
2891#endif
2892}
2893
2894
2895/*
2896 *	Routine:	pmap_pageable
2897 *	Function:
2898 *		Make the specified pages (by pmap, offset)
2899 *		pageable (or not) as requested.
2900 *
2901 *		A page which is not pageable may not take
2902 *		a fault; therefore, its page table entry
2903 *		must remain valid for the duration.
2904 *
2905 *		This routine is merely advisory; pmap_enter
2906 *		will specify that these pages are to be wired
2907 *		down (or not) as appropriate.
2908 */
2909void
2910pmap_pageable(pmap, sva, eva, pageable)
2911	pmap_t pmap;
2912	vm_offset_t sva, eva;
2913	boolean_t pageable;
2914{
2915}
2916
2917/*
2918 * this routine returns true if a physical page resides
2919 * in the given pmap.
2920 */
2921boolean_t
2922pmap_page_exists(pmap, pa)
2923	pmap_t pmap;
2924	vm_offset_t pa;
2925{
2926	register pv_entry_t pv;
2927	pv_table_t *ppv;
2928	int s;
2929
2930	if (!pmap_is_managed(pa))
2931		return FALSE;
2932
2933	s = splvm();
2934
2935	ppv = pa_to_pvh(pa);
2936	/*
2937	 * Not found, check current mappings returning immediately if found.
2938	 */
2939	for (pv = TAILQ_FIRST(&ppv->pv_list);
2940		pv;
2941		pv = TAILQ_NEXT(pv, pv_list)) {
2942		if (pv->pv_pmap == pmap) {
2943			splx(s);
2944			return TRUE;
2945		}
2946	}
2947	splx(s);
2948	return (FALSE);
2949}
2950
2951#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2952/*
2953 * Remove all pages from specified address space
2954 * this aids process exit speeds.  Also, this code
2955 * is special cased for current process only, but
2956 * can have the more generic (and slightly slower)
2957 * mode enabled.  This is much faster than pmap_remove
2958 * in the case of running down an entire address space.
2959 */
2960void
2961pmap_remove_pages(pmap, sva, eva)
2962	pmap_t pmap;
2963	vm_offset_t sva, eva;
2964{
2965	unsigned *pte, tpte;
2966	pv_table_t *ppv;
2967	pv_entry_t pv, npv;
2968	int s;
2969
2970#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2971	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2972		printf("warning: pmap_remove_pages called with non-current pmap\n");
2973		return;
2974	}
2975#endif
2976
2977	s = splvm();
2978	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2979		pv;
2980		pv = npv) {
2981
2982		if (pv->pv_va >= eva || pv->pv_va < sva) {
2983			npv = TAILQ_NEXT(pv, pv_plist);
2984			continue;
2985		}
2986
2987#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2988		pte = (unsigned *)vtopte(pv->pv_va);
2989#else
2990		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2991#endif
2992		tpte = *pte;
2993
2994/*
2995 * We cannot remove wired pages from a process' mapping at this time
2996 */
2997		if (tpte & PG_W) {
2998			npv = TAILQ_NEXT(pv, pv_plist);
2999			continue;
3000		}
3001		*pte = 0;
3002
3003		ppv = pa_to_pvh(tpte);
3004
3005		pv->pv_pmap->pm_stats.resident_count--;
3006
3007		/*
3008		 * Update the vm_page_t clean and reference bits.
3009		 */
3010		if (tpte & PG_M) {
3011			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
3012		}
3013
3014
3015		npv = TAILQ_NEXT(pv, pv_plist);
3016		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
3017
3018		ppv->pv_list_count--;
3019		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
3020		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3021			ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
3022		}
3023
3024		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
3025		free_pv_entry(pv);
3026	}
3027	splx(s);
3028	invltlb();
3029}
3030
3031/*
3032 * pmap_testbit tests bits in pte's
3033 * note that the testbit/changebit routines are inline,
3034 * and a lot of things compile-time evaluate.
3035 */
3036static boolean_t
3037pmap_testbit(pa, bit)
3038	register vm_offset_t pa;
3039	int bit;
3040{
3041	register pv_entry_t pv;
3042	pv_table_t *ppv;
3043	unsigned *pte;
3044	int s;
3045
3046	if (!pmap_is_managed(pa))
3047		return FALSE;
3048
3049	ppv = pa_to_pvh(pa);
3050	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
3051		return FALSE;
3052
3053	s = splvm();
3054
3055	for (pv = TAILQ_FIRST(&ppv->pv_list);
3056		pv;
3057		pv = TAILQ_NEXT(pv, pv_list)) {
3058
3059		/*
3060		 * if the bit being tested is the modified bit, then
3061		 * mark clean_map and ptes as never
3062		 * modified.
3063		 */
3064		if (bit & (PG_A|PG_M)) {
3065			if (!pmap_track_modified(pv->pv_va))
3066				continue;
3067		}
3068
3069#if defined(PMAP_DIAGNOSTIC)
3070		if (!pv->pv_pmap) {
3071			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
3072			continue;
3073		}
3074#endif
3075		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3076		if (*pte & bit) {
3077			splx(s);
3078			return TRUE;
3079		}
3080	}
3081	splx(s);
3082	return (FALSE);
3083}
3084
3085/*
3086 * this routine is used to modify bits in ptes
3087 */
3088static void
3089pmap_changebit(pa, bit, setem)
3090	vm_offset_t pa;
3091	int bit;
3092	boolean_t setem;
3093{
3094	register pv_entry_t pv;
3095	pv_table_t *ppv;
3096	register unsigned *pte;
3097	int changed;
3098	int s;
3099
3100	if (!pmap_is_managed(pa))
3101		return;
3102
3103	s = splvm();
3104	changed = 0;
3105	ppv = pa_to_pvh(pa);
3106
3107	/*
3108	 * Loop over all current mappings setting/clearing as appropos If
3109	 * setting RO do we need to clear the VAC?
3110	 */
3111	for (pv = TAILQ_FIRST(&ppv->pv_list);
3112		pv;
3113		pv = TAILQ_NEXT(pv, pv_list)) {
3114
3115		/*
3116		 * don't write protect pager mappings
3117		 */
3118		if (!setem && (bit == PG_RW)) {
3119			if (!pmap_track_modified(pv->pv_va))
3120				continue;
3121		}
3122
3123#if defined(PMAP_DIAGNOSTIC)
3124		if (!pv->pv_pmap) {
3125			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
3126			continue;
3127		}
3128#endif
3129
3130		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3131
3132		if (setem) {
3133			*(int *)pte |= bit;
3134			changed = 1;
3135		} else {
3136			vm_offset_t pbits = *(vm_offset_t *)pte;
3137			if (pbits & bit) {
3138				changed = 1;
3139				if (bit == PG_RW) {
3140					if (pbits & PG_M) {
3141						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
3142					}
3143					*(int *)pte = pbits & ~(PG_M|PG_RW);
3144				} else {
3145					*(int *)pte = pbits & ~bit;
3146				}
3147			}
3148		}
3149	}
3150	splx(s);
3151	if (changed)
3152		invltlb();
3153}
3154
3155/*
3156 *      pmap_page_protect:
3157 *
3158 *      Lower the permission for all mappings to a given page.
3159 */
3160void
3161pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
3162{
3163	if ((prot & VM_PROT_WRITE) == 0) {
3164		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
3165			pmap_changebit(phys, PG_RW, FALSE);
3166		} else {
3167			pmap_remove_all(phys);
3168		}
3169	}
3170}
3171
3172vm_offset_t
3173pmap_phys_address(ppn)
3174	int ppn;
3175{
3176	return (i386_ptob(ppn));
3177}
3178
3179/*
3180 *	pmap_ts_referenced:
3181 *
3182 *	Return the count of reference bits for a page, clearing all of them.
3183 *
3184 */
3185int
3186pmap_ts_referenced(vm_offset_t pa)
3187{
3188	register pv_entry_t pv;
3189	pv_table_t *ppv;
3190	unsigned *pte;
3191	int s;
3192	int rtval = 0;
3193
3194	if (!pmap_is_managed(pa))
3195		return FALSE;
3196
3197	s = splvm();
3198
3199	ppv = pa_to_pvh(pa);
3200
3201	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3202		splx(s);
3203		return 0;
3204	}
3205
3206	/*
3207	 * Not found, check current mappings returning immediately if found.
3208	 */
3209	for (pv = TAILQ_FIRST(&ppv->pv_list);
3210		pv;
3211		pv = TAILQ_NEXT(pv, pv_list)) {
3212
3213		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
3214		/*
3215		 * if the bit being tested is the modified bit, then
3216		 * mark clean_map and ptes as never
3217		 * modified.
3218		 */
3219		if (!pmap_track_modified(pv->pv_va)) {
3220			TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3221			continue;
3222		}
3223
3224		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3225		if (pte == NULL) {
3226			TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3227			continue;
3228		}
3229
3230		if (*pte & PG_A) {
3231			rtval++;
3232			*pte &= ~PG_A;
3233			if (rtval > 4) {
3234				TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3235				break;
3236			}
3237		}
3238		TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3239	}
3240
3241	splx(s);
3242	if (rtval) {
3243		invltlb();
3244	}
3245	return (rtval);
3246}
3247
3248/*
3249 *	pmap_is_modified:
3250 *
3251 *	Return whether or not the specified physical page was modified
3252 *	in any physical maps.
3253 */
3254boolean_t
3255pmap_is_modified(vm_offset_t pa)
3256{
3257	return pmap_testbit((pa), PG_M);
3258}
3259
3260/*
3261 *	Clear the modify bits on the specified physical page.
3262 */
3263void
3264pmap_clear_modify(vm_offset_t pa)
3265{
3266	pmap_changebit((pa), PG_M, FALSE);
3267}
3268
3269/*
3270 *	pmap_clear_reference:
3271 *
3272 *	Clear the reference bit on the specified physical page.
3273 */
3274void
3275pmap_clear_reference(vm_offset_t pa)
3276{
3277	pmap_changebit((pa), PG_A, FALSE);
3278}
3279
3280/*
3281 * Miscellaneous support routines follow
3282 */
3283
3284static void
3285i386_protection_init()
3286{
3287	register int *kp, prot;
3288
3289	kp = protection_codes;
3290	for (prot = 0; prot < 8; prot++) {
3291		switch (prot) {
3292		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3293			/*
3294			 * Read access is also 0. There isn't any execute bit,
3295			 * so just make it readable.
3296			 */
3297		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3298		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3299		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3300			*kp++ = 0;
3301			break;
3302		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3303		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3304		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3305		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3306			*kp++ = PG_RW;
3307			break;
3308		}
3309	}
3310}
3311
3312/*
3313 * Map a set of physical memory pages into the kernel virtual
3314 * address space. Return a pointer to where it is mapped. This
3315 * routine is intended to be used for mapping device memory,
3316 * NOT real memory.
3317 */
3318void *
3319pmap_mapdev(pa, size)
3320	vm_offset_t pa;
3321	vm_size_t size;
3322{
3323	vm_offset_t va, tmpva;
3324	unsigned *pte;
3325
3326	size = roundup(size, PAGE_SIZE);
3327
3328	va = kmem_alloc_pageable(kernel_map, size);
3329#if !defined(MAX_PERF)
3330	if (!va)
3331		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3332#endif
3333
3334	pa = pa & PG_FRAME;
3335	for (tmpva = va; size > 0;) {
3336		pte = (unsigned *)vtopte(tmpva);
3337		*pte = pa | PG_RW | PG_V | pgeflag;
3338		size -= PAGE_SIZE;
3339		tmpva += PAGE_SIZE;
3340		pa += PAGE_SIZE;
3341	}
3342	invltlb();
3343
3344	return ((void *) va);
3345}
3346
3347/*
3348 * perform the pmap work for mincore
3349 */
3350int
3351pmap_mincore(pmap, addr)
3352	pmap_t pmap;
3353	vm_offset_t addr;
3354{
3355
3356	unsigned *ptep, pte;
3357	vm_page_t m;
3358	int val = 0;
3359
3360	ptep = pmap_pte(pmap, addr);
3361	if (ptep == 0) {
3362		return 0;
3363	}
3364
3365	if (pte = *ptep) {
3366		pv_table_t *ppv;
3367		vm_offset_t pa;
3368
3369		val = MINCORE_INCORE;
3370		if ((pte & PG_MANAGED) == 0)
3371			return val;
3372
3373		pa = pte & PG_FRAME;
3374
3375		ppv = pa_to_pvh((pa & PG_FRAME));
3376		m = ppv->pv_vm_page;
3377
3378		/*
3379		 * Modified by us
3380		 */
3381		if (pte & PG_M)
3382			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3383		/*
3384		 * Modified by someone
3385		 */
3386		else if (m->dirty || pmap_is_modified(pa))
3387			val |= MINCORE_MODIFIED_OTHER;
3388		/*
3389		 * Referenced by us
3390		 */
3391		if (pte & PG_A)
3392			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3393
3394		/*
3395		 * Referenced by someone
3396		 */
3397		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) {
3398			val |= MINCORE_REFERENCED_OTHER;
3399			m->flags |= PG_REFERENCED;
3400		}
3401	}
3402	return val;
3403}
3404
3405void
3406pmap_activate(struct proc *p)
3407{
3408#if defined(SWTCH_OPTIM_STATS)
3409	tlb_flush_count++;
3410#endif
3411	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3412		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3413}
3414
3415vm_offset_t
3416pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3417
3418	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3419		return addr;
3420	}
3421
3422	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3423	return addr;
3424}
3425
3426
3427#if defined(PMAP_DEBUG)
3428pmap_pid_dump(int pid) {
3429	pmap_t pmap;
3430	struct proc *p;
3431	int npte = 0;
3432	int index;
3433	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3434		if (p->p_pid != pid)
3435			continue;
3436
3437		if (p->p_vmspace) {
3438			int i,j;
3439			index = 0;
3440			pmap = &p->p_vmspace->vm_pmap;
3441			for(i=0;i<1024;i++) {
3442				pd_entry_t *pde;
3443				unsigned *pte;
3444				unsigned base = i << PDRSHIFT;
3445
3446				pde = &pmap->pm_pdir[i];
3447				if (pde && pmap_pde_v(pde)) {
3448					for(j=0;j<1024;j++) {
3449						unsigned va = base + (j << PAGE_SHIFT);
3450						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3451							if (index) {
3452								index = 0;
3453								printf("\n");
3454							}
3455							return npte;
3456						}
3457						pte = pmap_pte_quick( pmap, va);
3458						if (pte && pmap_pte_v(pte)) {
3459							vm_offset_t pa;
3460							vm_page_t m;
3461							pa = *(int *)pte;
3462							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3463							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3464								va, pa, m->hold_count, m->wire_count, m->flags);
3465							npte++;
3466							index++;
3467							if (index >= 2) {
3468								index = 0;
3469								printf("\n");
3470							} else {
3471								printf(" ");
3472							}
3473						}
3474					}
3475				}
3476			}
3477		}
3478	}
3479	return npte;
3480}
3481#endif
3482
3483#if defined(DEBUG)
3484
3485static void	pads __P((pmap_t pm));
3486static void	pmap_pvdump __P((vm_offset_t pa));
3487
3488/* print address space of pmap*/
3489static void
3490pads(pm)
3491	pmap_t pm;
3492{
3493	unsigned va, i, j;
3494	unsigned *ptep;
3495
3496	if (pm == kernel_pmap)
3497		return;
3498	for (i = 0; i < 1024; i++)
3499		if (pm->pm_pdir[i])
3500			for (j = 0; j < 1024; j++) {
3501				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3502				if (pm == kernel_pmap && va < KERNBASE)
3503					continue;
3504				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3505					continue;
3506				ptep = pmap_pte_quick(pm, va);
3507				if (pmap_pte_v(ptep))
3508					printf("%x:%x ", va, *(int *) ptep);
3509			};
3510
3511}
3512
3513static void
3514pmap_pvdump(pa)
3515	vm_offset_t pa;
3516{
3517	pv_table_t *ppv;
3518	register pv_entry_t pv;
3519
3520	printf("pa %x", pa);
3521	ppv = pa_to_pvh(pa);
3522	for (pv = TAILQ_FIRST(&ppv->pv_list);
3523		pv;
3524		pv = TAILQ_NEXT(pv, pv_list)) {
3525#ifdef used_to_be
3526		printf(" -> pmap %p, va %x, flags %x",
3527		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
3528#endif
3529		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3530		pads(pv->pv_pmap);
3531	}
3532	printf(" ");
3533}
3534#endif
3535