pmap.c revision 31709
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.173 1997/11/20 19:30:31 bde Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/proc.h>
74#include <sys/msgbuf.h>
75#include <sys/vmmeter.h>
76#include <sys/mman.h>
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <sys/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_page.h>
84#include <vm/vm_map.h>
85#include <vm/vm_object.h>
86#include <vm/vm_extern.h>
87#include <vm/vm_pageout.h>
88#include <vm/vm_pager.h>
89#include <vm/vm_zone.h>
90
91#include <sys/user.h>
92
93#include <machine/cputypes.h>
94#include <machine/md_var.h>
95#include <machine/specialreg.h>
96#if defined(SMP) || defined(APIC_IO)
97#include <machine/smp.h>
98#include <machine/apic.h>
99#endif /* SMP || APIC_IO */
100
101#define PMAP_KEEP_PDIRS
102#ifndef PMAP_SHPGPERPROC
103#define PMAP_SHPGPERPROC 200
104#endif
105
106#if defined(DIAGNOSTIC)
107#define PMAP_DIAGNOSTIC
108#endif
109
110#define MINPV 2048
111
112#if !defined(PMAP_DIAGNOSTIC)
113#define PMAP_INLINE __inline
114#else
115#define PMAP_INLINE
116#endif
117
118#define PTPHINT
119
120/*
121 * Get PDEs and PTEs for user/kernel address space
122 */
123#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
124#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
125
126#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
127#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
128#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
129#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
130#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
131
132#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
133#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
134
135/*
136 * Given a map and a machine independent protection code,
137 * convert to a vax protection code.
138 */
139#define pte_prot(m, p)	(protection_codes[p])
140static int protection_codes[8];
141
142#define	pa_index(pa)		atop((pa) - vm_first_phys)
143#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
144
145static struct pmap kernel_pmap_store;
146pmap_t kernel_pmap;
147extern pd_entry_t my_idlePTD;
148
149vm_offset_t avail_start;	/* PA of first available physical page */
150vm_offset_t avail_end;		/* PA of last available physical page */
151vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
152vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
153static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
154static vm_offset_t vm_first_phys;
155int pgeflag;		/* PG_G or-in */
156int pseflag;		/* PG_PS or-in */
157int pv_npg;
158
159int nkpt;
160vm_offset_t kernel_vm_end;
161
162/*
163 * Data for the pv entry allocation mechanism
164 */
165vm_zone_t pvzone;
166struct vm_zone pvzone_store;
167struct vm_object pvzone_obj;
168int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
169int pmap_pagedaemon_waken = 0;
170struct pv_entry *pvinit;
171
172/*
173 * All those kernel PT submaps that BSD is so fond of
174 */
175pt_entry_t *CMAP1 = 0;
176static pt_entry_t *CMAP2, *ptmmap;
177static pv_table_t *pv_table;
178caddr_t CADDR1 = 0, ptvmmap = 0;
179static caddr_t CADDR2;
180static pt_entry_t *msgbufmap;
181struct msgbuf *msgbufp=0;
182
183#ifdef SMP
184extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
185extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
186extern pd_entry_t *IdlePTDS[];
187extern pt_entry_t SMP_prvpt[];
188#endif
189
190pt_entry_t *PMAP1 = 0;
191unsigned *PADDR1 = 0;
192
193static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
194static unsigned * get_ptbase __P((pmap_t pmap));
195static pv_entry_t get_pv_entry __P((void));
196static void	i386_protection_init __P((void));
197static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
198
199static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
200static void	pmap_remove_all __P((vm_offset_t pa));
201static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
202				      vm_offset_t pa, vm_page_t mpte));
203static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
204					vm_offset_t sva));
205static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
206static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
207					vm_offset_t va));
208static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
209static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
210		vm_page_t mpte, vm_offset_t pa));
211
212static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
213
214static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
215static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
216static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
217static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
218static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
219static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
220vm_offset_t pmap_kmem_choose(vm_offset_t addr) ;
221void pmap_collect(void);
222
223#define PDSTACKMAX 6
224static vm_offset_t pdstack[PDSTACKMAX];
225static int pdstackptr;
226unsigned pdir4mb;
227
228/*
229 *	Routine:	pmap_pte
230 *	Function:
231 *		Extract the page table entry associated
232 *		with the given map/virtual_address pair.
233 */
234
235PMAP_INLINE unsigned *
236pmap_pte(pmap, va)
237	register pmap_t pmap;
238	vm_offset_t va;
239{
240	unsigned *pdeaddr;
241
242	if (pmap) {
243		pdeaddr = (unsigned *) pmap_pde(pmap, va);
244		if (*pdeaddr & PG_PS)
245			return pdeaddr;
246		if (*pdeaddr) {
247			return get_ptbase(pmap) + i386_btop(va);
248		}
249	}
250	return (0);
251}
252
253/*
254 * Move the kernel virtual free pointer to the next
255 * 4MB.  This is used to help improve performance
256 * by using a large (4MB) page for much of the kernel
257 * (.text, .data, .bss)
258 */
259vm_offset_t
260pmap_kmem_choose(vm_offset_t addr) {
261	vm_offset_t newaddr = addr;
262#ifndef DISABLE_PSE
263	if (cpu_feature & CPUID_PSE) {
264		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
265	}
266#endif
267	return newaddr;
268}
269
270/*
271 *	Bootstrap the system enough to run with virtual memory.
272 *
273 *	On the i386 this is called after mapping has already been enabled
274 *	and just syncs the pmap module with what has already been done.
275 *	[We can't call it easily with mapping off since the kernel is not
276 *	mapped with PA == VA, hence we would have to relocate every address
277 *	from the linked base (virtual) address "KERNBASE" to the actual
278 *	(physical) address starting relative to 0]
279 */
280void
281pmap_bootstrap(firstaddr, loadaddr)
282	vm_offset_t firstaddr;
283	vm_offset_t loadaddr;
284{
285	vm_offset_t va;
286	pt_entry_t *pte;
287	int i, j;
288
289	avail_start = firstaddr;
290
291	/*
292	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
293	 * large. It should instead be correctly calculated in locore.s and
294	 * not based on 'first' (which is a physical address, not a virtual
295	 * address, for the start of unused physical memory). The kernel
296	 * page tables are NOT double mapped and thus should not be included
297	 * in this calculation.
298	 */
299	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
300	virtual_avail = pmap_kmem_choose(virtual_avail);
301
302	virtual_end = VM_MAX_KERNEL_ADDRESS;
303
304	/*
305	 * Initialize protection array.
306	 */
307	i386_protection_init();
308
309	/*
310	 * The kernel's pmap is statically allocated so we don't have to use
311	 * pmap_create, which is unlikely to work correctly at this part of
312	 * the boot sequence (XXX and which no longer exists).
313	 */
314	kernel_pmap = &kernel_pmap_store;
315
316	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
317
318	kernel_pmap->pm_count = 1;
319	TAILQ_INIT(&kernel_pmap->pm_pvlist);
320	nkpt = NKPT;
321
322	/*
323	 * Reserve some special page table entries/VA space for temporary
324	 * mapping of pages.
325	 */
326#define	SYSMAP(c, p, v, n)	\
327	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
328
329	va = virtual_avail;
330	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
331
332	/*
333	 * CMAP1/CMAP2 are used for zeroing and copying pages.
334	 */
335	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
336	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
337
338	/*
339	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
340	 * XXX ptmmap is not used.
341	 */
342	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
343
344	/*
345	 * msgbufp is used to map the system message buffer.
346	 * XXX msgbufmap is not used.
347	 */
348	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
349	       atop(round_page(sizeof(struct msgbuf))))
350
351	/*
352	 * ptemap is used for pmap_pte_quick
353	 */
354	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
355
356	virtual_avail = va;
357
358	*(int *) CMAP1 = *(int *) CMAP2 = 0;
359	*(int *) PTD = 0;
360
361
362	pgeflag = 0;
363#if !defined(SMP)
364	if (cpu_feature & CPUID_PGE) {
365		pgeflag = PG_G;
366	}
367#endif
368
369/*
370 * Initialize the 4MB page size flag
371 */
372	pseflag = 0;
373/*
374 * The 4MB page version of the initial
375 * kernel page mapping.
376 */
377	pdir4mb = 0;
378
379#if !defined(DISABLE_PSE)
380	if (cpu_feature & CPUID_PSE) {
381		unsigned ptditmp;
382		/*
383		 * Enable the PSE mode
384		 */
385		load_cr4(rcr4() | CR4_PSE);
386
387		/*
388		 * Note that we have enabled PSE mode
389		 */
390		pseflag = PG_PS;
391		ptditmp = (unsigned) kernel_pmap->pm_pdir[KPTDI];
392		ptditmp &= ~(NBPDR - 1);
393		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
394		pdir4mb = ptditmp;
395		/*
396		 * We can do the mapping here for the single processor
397		 * case.  We simply ignore the old page table page from
398		 * now on.
399		 */
400#if !defined(SMP)
401		PTD[KPTDI] = (pd_entry_t) ptditmp;
402		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
403		invltlb();
404#endif
405	}
406#endif
407
408#ifdef SMP
409	if (cpu_apic_address == 0)
410		panic("pmap_bootstrap: no local apic!");
411
412	/* 0 = private page */
413	/* 1 = page table page */
414	/* 2 = local apic */
415	/* 16-31 = io apics */
416	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME));
417
418	for (i = 0; i < mp_napics; i++) {
419		for (j = 0; j < 16; j++) {
420			/* same page frame as a previous IO apic? */
421			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) ==
422			    ((u_long)io_apic_address[0] & PG_FRAME)) {
423				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
424				break;
425			}
426			/* use this slot if available */
427			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
428				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
429				    ((u_long)io_apic_address[i] & PG_FRAME));
430				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
431				break;
432			}
433		}
434		if (j == 16)
435			panic("no space to map IO apic %d!", i);
436	}
437
438	/* BSP does this itself, AP's get it pre-set */
439	prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[3 + UPAGES];
440	prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[4 + UPAGES];
441	prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[5 + UPAGES];
442#endif
443
444	invltlb();
445
446}
447
448/*
449 * Set 4mb pdir for mp startup, and global flags
450 */
451void
452pmap_set_opt(unsigned *pdir) {
453	int i;
454
455	if (pseflag && (cpu_feature & CPUID_PSE)) {
456		load_cr4(rcr4() | CR4_PSE);
457		if (pdir4mb) {
458			(unsigned) pdir[KPTDI] = pdir4mb;
459		}
460	}
461
462	if (pgeflag && (cpu_feature & CPUID_PGE)) {
463		load_cr4(rcr4() | CR4_PGE);
464		for(i = KPTDI; i < KPTDI + nkpt; i++) {
465			if (pdir[i]) {
466				pdir[i] |= PG_G;
467			}
468		}
469	}
470}
471
472/*
473 * Setup the PTD for the boot processor
474 */
475void
476pmap_set_opt_bsp(void)
477{
478	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
479	pmap_set_opt((unsigned *)PTD);
480	invltlb();
481}
482
483/*
484 *	Initialize the pmap module.
485 *	Called by vm_init, to initialize any structures that the pmap
486 *	system needs to map virtual memory.
487 *	pmap_init has been enhanced to support in a fairly consistant
488 *	way, discontiguous physical memory.
489 */
490void
491pmap_init(phys_start, phys_end)
492	vm_offset_t phys_start, phys_end;
493{
494	vm_offset_t addr;
495	vm_size_t s;
496	int i;
497	int initial_pvs;
498
499	/*
500	 * calculate the number of pv_entries needed
501	 */
502	vm_first_phys = phys_avail[0];
503	for (i = 0; phys_avail[i + 1]; i += 2);
504	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
505
506	/*
507	 * Allocate memory for random pmap data structures.  Includes the
508	 * pv_head_table.
509	 */
510	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
511	s = round_page(s);
512
513	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
514	pv_table = (pv_table_t *) addr;
515	for(i = 0; i < pv_npg; i++) {
516		vm_offset_t pa;
517		TAILQ_INIT(&pv_table[i].pv_list);
518		pv_table[i].pv_list_count = 0;
519		pa = vm_first_phys + i * PAGE_SIZE;
520		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
521	}
522
523	/*
524	 * init the pv free list
525	 */
526	initial_pvs = pv_npg;
527	if (initial_pvs < MINPV)
528		initial_pvs = MINPV;
529	pvzone = &pvzone_store;
530	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
531		initial_pvs * sizeof (struct pv_entry));
532	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, pv_npg);
533
534	/*
535	 * Now it is safe to enable pv_table recording.
536	 */
537	pmap_initialized = TRUE;
538}
539
540/*
541 * Initialize the address space (zone) for the pv_entries.  Set a
542 * high water mark so that the system can recover from excessive
543 * numbers of pv entries.
544 */
545void
546pmap_init2() {
547	pv_entry_max = PMAP_SHPGPERPROC * maxproc + pv_npg;
548	pv_entry_high_water = 9 * (pv_entry_max / 10);
549	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
550}
551
552/*
553 *	Used to map a range of physical addresses into kernel
554 *	virtual address space.
555 *
556 *	For now, VM is already on, we only need to map the
557 *	specified memory.
558 */
559vm_offset_t
560pmap_map(virt, start, end, prot)
561	vm_offset_t virt;
562	vm_offset_t start;
563	vm_offset_t end;
564	int prot;
565{
566	while (start < end) {
567		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
568		virt += PAGE_SIZE;
569		start += PAGE_SIZE;
570	}
571	return (virt);
572}
573
574
575/***************************************************
576 * Low level helper routines.....
577 ***************************************************/
578
579#if defined(PMAP_DIAGNOSTIC)
580
581/*
582 * This code checks for non-writeable/modified pages.
583 * This should be an invalid condition.
584 */
585static int
586pmap_nw_modified(pt_entry_t ptea) {
587	int pte;
588
589	pte = (int) ptea;
590
591	if ((pte & (PG_M|PG_RW)) == PG_M)
592		return 1;
593	else
594		return 0;
595}
596#endif
597
598
599/*
600 * this routine defines the region(s) of memory that should
601 * not be tested for the modified bit.
602 */
603static PMAP_INLINE int
604pmap_track_modified( vm_offset_t va) {
605	if ((va < clean_sva) || (va >= clean_eva))
606		return 1;
607	else
608		return 0;
609}
610
611static PMAP_INLINE void
612invltlb_1pg( vm_offset_t va) {
613#if defined(I386_CPU)
614	if (cpu_class == CPUCLASS_386) {
615		invltlb();
616	} else
617#endif
618	{
619		invlpg(va);
620	}
621}
622
623static PMAP_INLINE void
624invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
625#if defined(I386_CPU)
626	if (cpu_class == CPUCLASS_386) {
627		invltlb();
628	} else
629#endif
630	{
631		invlpg(va1);
632		invlpg(va2);
633	}
634}
635
636static unsigned *
637get_ptbase(pmap)
638	pmap_t pmap;
639{
640	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
641
642	/* are we current address space or kernel? */
643	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
644		return (unsigned *) PTmap;
645	}
646	/* otherwise, we are alternate address space */
647	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
648		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
649		invltlb();
650	}
651	return (unsigned *) APTmap;
652}
653
654/*
655 * Super fast pmap_pte routine best used when scanning
656 * the pv lists.  This eliminates many coarse-grained
657 * invltlb calls.  Note that many of the pv list
658 * scans are across different pmaps.  It is very wasteful
659 * to do an entire invltlb for checking a single mapping.
660 */
661
662static unsigned *
663pmap_pte_quick(pmap, va)
664	register pmap_t pmap;
665	vm_offset_t va;
666{
667	unsigned pde, newpf;
668	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
669		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
670		unsigned index = i386_btop(va);
671		/* are we current address space or kernel? */
672		if ((pmap == kernel_pmap) ||
673			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
674			return (unsigned *) PTmap + index;
675		}
676		newpf = pde & PG_FRAME;
677		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
678			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
679			invltlb_1pg((vm_offset_t) PADDR1);
680		}
681		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
682	}
683	return (0);
684}
685
686/*
687 *	Routine:	pmap_extract
688 *	Function:
689 *		Extract the physical page address associated
690 *		with the given map/virtual_address pair.
691 */
692vm_offset_t
693pmap_extract(pmap, va)
694	register pmap_t pmap;
695	vm_offset_t va;
696{
697	vm_offset_t rtval;
698	vm_offset_t pdirindex;
699	pdirindex = va >> PDRSHIFT;
700	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
701		unsigned *pte;
702		if ((rtval & PG_PS) != 0) {
703			rtval &= ~(NBPDR - 1);
704			rtval |= va & (NBPDR - 1);
705			return rtval;
706		}
707		pte = get_ptbase(pmap) + i386_btop(va);
708		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
709		return rtval;
710	}
711	return 0;
712
713}
714
715/*
716 * determine if a page is managed (memory vs. device)
717 */
718static PMAP_INLINE int
719pmap_is_managed(pa)
720	vm_offset_t pa;
721{
722	int i;
723
724	if (!pmap_initialized)
725		return 0;
726
727	for (i = 0; phys_avail[i + 1]; i += 2) {
728		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
729			return 1;
730	}
731	return 0;
732}
733
734
735/***************************************************
736 * Low level mapping routines.....
737 ***************************************************/
738
739/*
740 * Add a list of wired pages to the kva
741 * this routine is only used for temporary
742 * kernel mappings that do not need to have
743 * page modification or references recorded.
744 * Note that old mappings are simply written
745 * over.  The page *must* be wired.
746 */
747void
748pmap_qenter(va, m, count)
749	vm_offset_t va;
750	vm_page_t *m;
751	int count;
752{
753	int i;
754	register unsigned *pte;
755
756	for (i = 0; i < count; i++) {
757		vm_offset_t tva = va + i * PAGE_SIZE;
758		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
759		unsigned opte;
760		pte = (unsigned *)vtopte(tva);
761		opte = *pte;
762		*pte = npte;
763		if (opte)
764			invltlb_1pg(tva);
765	}
766}
767
768/*
769 * this routine jerks page mappings from the
770 * kernel -- it is meant only for temporary mappings.
771 */
772void
773pmap_qremove(va, count)
774	vm_offset_t va;
775	int count;
776{
777	int i;
778	register unsigned *pte;
779
780	for (i = 0; i < count; i++) {
781		pte = (unsigned *)vtopte(va);
782		*pte = 0;
783		invltlb_1pg(va);
784		va += PAGE_SIZE;
785	}
786}
787
788/*
789 * add a wired page to the kva
790 * note that in order for the mapping to take effect -- you
791 * should do a invltlb after doing the pmap_kenter...
792 */
793PMAP_INLINE void
794pmap_kenter(va, pa)
795	vm_offset_t va;
796	register vm_offset_t pa;
797{
798	register unsigned *pte;
799	unsigned npte, opte;
800
801	npte = pa | PG_RW | PG_V | pgeflag;
802	pte = (unsigned *)vtopte(va);
803	opte = *pte;
804	*pte = npte;
805	if (opte)
806		invltlb_1pg(va);
807}
808
809/*
810 * remove a page from the kernel pagetables
811 */
812PMAP_INLINE void
813pmap_kremove(va)
814	vm_offset_t va;
815{
816	register unsigned *pte;
817
818	pte = (unsigned *)vtopte(va);
819	*pte = 0;
820	invltlb_1pg(va);
821}
822
823static vm_page_t
824pmap_page_alloc(object, pindex)
825	vm_object_t object;
826	vm_pindex_t pindex;
827{
828	vm_page_t m;
829	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
830	if (m == NULL) {
831		VM_WAIT;
832	}
833	return m;
834}
835
836static vm_page_t
837pmap_page_lookup(object, pindex)
838	vm_object_t object;
839	vm_pindex_t pindex;
840{
841	vm_page_t m;
842retry:
843	m = vm_page_lookup(object, pindex);
844	if (m) {
845		if (m->flags & PG_BUSY) {
846			m->flags |= PG_WANTED;
847			tsleep(m, PVM, "pplookp", 0);
848			goto retry;
849		}
850	}
851
852	return m;
853}
854
855/*
856 * Create the UPAGES for a new process.
857 * This routine directly affects the fork perf for a process.
858 */
859void
860pmap_new_proc(p)
861	struct proc *p;
862{
863	int i;
864	vm_object_t upobj;
865	vm_page_t m;
866	struct user *up;
867	unsigned *ptek;
868
869	/*
870	 * allocate object for the upages
871	 */
872	upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
873	p->p_upages_obj = upobj;
874
875	/* get a kernel virtual address for the UPAGES for this proc */
876	up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE);
877	if (up == NULL)
878		panic("pmap_new_proc: u_map allocation failed");
879
880	ptek = (unsigned *) vtopte((vm_offset_t) up);
881
882	for(i=0;i<UPAGES;i++) {
883		/*
884		 * Get a kernel stack page
885		 */
886		while ((m = vm_page_alloc(upobj,
887			i, VM_ALLOC_NORMAL)) == NULL) {
888			VM_WAIT;
889		}
890
891		/*
892		 * Wire the page
893		 */
894		m->wire_count++;
895		++cnt.v_wire_count;
896
897		/*
898		 * Enter the page into the kernel address space.
899		 */
900		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
901
902		m->flags &= ~(PG_ZERO|PG_BUSY);
903		m->flags |= PG_MAPPED|PG_WRITEABLE;
904		m->valid = VM_PAGE_BITS_ALL;
905	}
906
907	p->p_addr = up;
908}
909
910/*
911 * Dispose the UPAGES for a process that has exited.
912 * This routine directly impacts the exit perf of a process.
913 */
914void
915pmap_dispose_proc(p)
916	struct proc *p;
917{
918	int i;
919	vm_object_t upobj;
920	vm_page_t m;
921	unsigned *ptek;
922
923	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
924
925	upobj = p->p_upages_obj;
926
927	for(i=0;i<UPAGES;i++) {
928		unsigned oldpte;
929		if ((m = vm_page_lookup(upobj, i)) == NULL)
930			panic("pmap_dispose_proc: upage already missing???");
931		oldpte = *(ptek + i);
932		*(ptek + i) = 0;
933		if (oldpte & PG_G)
934			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
935		vm_page_unwire(m);
936		vm_page_free(m);
937	}
938
939	vm_object_deallocate(upobj);
940
941	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
942}
943
944/*
945 * Allow the UPAGES for a process to be prejudicially paged out.
946 */
947void
948pmap_swapout_proc(p)
949	struct proc *p;
950{
951	int i;
952	vm_object_t upobj;
953	vm_page_t m;
954
955	upobj = p->p_upages_obj;
956	/*
957	 * let the upages be paged
958	 */
959	for(i=0;i<UPAGES;i++) {
960		if ((m = vm_page_lookup(upobj, i)) == NULL)
961			panic("pmap_swapout_proc: upage already missing???");
962		m->dirty = VM_PAGE_BITS_ALL;
963		vm_page_unwire(m);
964		vm_page_deactivate(m);
965		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
966	}
967}
968
969/*
970 * Bring the UPAGES for a specified process back in.
971 */
972void
973pmap_swapin_proc(p)
974	struct proc *p;
975{
976	int i;
977	vm_object_t upobj;
978	vm_page_t m;
979
980	upobj = p->p_upages_obj;
981	for(i=0;i<UPAGES;i++) {
982		int s;
983		s = splvm();
984retry:
985		if ((m = vm_page_lookup(upobj, i)) == NULL) {
986			if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) {
987				VM_WAIT;
988				goto retry;
989			}
990		} else {
991			if ((m->flags & PG_BUSY) || m->busy) {
992				m->flags |= PG_WANTED;
993				tsleep(m, PVM, "swinuw",0);
994				goto retry;
995			}
996			m->flags |= PG_BUSY;
997		}
998		vm_page_wire(m);
999		splx(s);
1000
1001		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
1002			VM_PAGE_TO_PHYS(m));
1003
1004		if (m->valid != VM_PAGE_BITS_ALL) {
1005			int rv;
1006			rv = vm_pager_get_pages(upobj, &m, 1, 0);
1007			if (rv != VM_PAGER_OK)
1008				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1009			m->valid = VM_PAGE_BITS_ALL;
1010		}
1011		PAGE_WAKEUP(m);
1012		m->flags |= PG_MAPPED|PG_WRITEABLE;
1013	}
1014}
1015
1016/***************************************************
1017 * Page table page management routines.....
1018 ***************************************************/
1019
1020/*
1021 * This routine unholds page table pages, and if the hold count
1022 * drops to zero, then it decrements the wire count.
1023 */
1024static int
1025_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1026	int s;
1027
1028	if (m->flags & PG_BUSY) {
1029		s = splvm();
1030		while (m->flags & PG_BUSY) {
1031			m->flags |= PG_WANTED;
1032			tsleep(m, PVM, "pmuwpt", 0);
1033		}
1034		splx(s);
1035	}
1036
1037	if (m->hold_count == 0) {
1038		vm_offset_t pteva;
1039		/*
1040		 * unmap the page table page
1041		 */
1042		pmap->pm_pdir[m->pindex] = 0;
1043		--pmap->pm_stats.resident_count;
1044		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1045			(((unsigned) PTDpde) & PG_FRAME)) {
1046			/*
1047			 * Do a invltlb to make the invalidated mapping
1048			 * take effect immediately.
1049			 */
1050			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1051			invltlb_1pg(pteva);
1052		}
1053
1054#if defined(PTPHINT)
1055		if (pmap->pm_ptphint == m)
1056			pmap->pm_ptphint = NULL;
1057#endif
1058
1059		/*
1060		 * If the page is finally unwired, simply free it.
1061		 */
1062		--m->wire_count;
1063		if (m->wire_count == 0) {
1064
1065			if (m->flags & PG_WANTED) {
1066				m->flags &= ~PG_WANTED;
1067				wakeup(m);
1068			}
1069
1070			vm_page_free_zero(m);
1071			--cnt.v_wire_count;
1072		}
1073		return 1;
1074	}
1075	return 0;
1076}
1077
1078__inline static int
1079pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1080	vm_page_unhold(m);
1081	if (m->hold_count == 0)
1082		return _pmap_unwire_pte_hold(pmap, m);
1083	else
1084		return 0;
1085}
1086
1087/*
1088 * After removing a page table entry, this routine is used to
1089 * conditionally free the page, and manage the hold/wire counts.
1090 */
1091static int
1092pmap_unuse_pt(pmap, va, mpte)
1093	pmap_t pmap;
1094	vm_offset_t va;
1095	vm_page_t mpte;
1096{
1097	unsigned ptepindex;
1098	if (va >= UPT_MIN_ADDRESS)
1099		return 0;
1100
1101	if (mpte == NULL) {
1102		ptepindex = (va >> PDRSHIFT);
1103#if defined(PTPHINT)
1104		if (pmap->pm_ptphint &&
1105			(pmap->pm_ptphint->pindex == ptepindex)) {
1106			mpte = pmap->pm_ptphint;
1107		} else {
1108			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1109			pmap->pm_ptphint = mpte;
1110		}
1111#else
1112		mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1113#endif
1114	}
1115
1116	return pmap_unwire_pte_hold(pmap, mpte);
1117}
1118
1119#if !defined(SMP)
1120void
1121pmap_pinit0(pmap)
1122	struct pmap *pmap;
1123{
1124	pmap->pm_pdir =
1125		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1126	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
1127	pmap->pm_flags = 0;
1128	pmap->pm_count = 1;
1129	pmap->pm_ptphint = NULL;
1130	TAILQ_INIT(&pmap->pm_pvlist);
1131}
1132#else
1133void
1134pmap_pinit0(pmap)
1135	struct pmap *pmap;
1136{
1137	pmap_pinit(pmap);
1138}
1139#endif
1140
1141/*
1142 * Initialize a preallocated and zeroed pmap structure,
1143 * such as one in a vmspace structure.
1144 */
1145void
1146pmap_pinit(pmap)
1147	register struct pmap *pmap;
1148{
1149	vm_page_t ptdpg;
1150	/*
1151	 * No need to allocate page table space yet but we do need a valid
1152	 * page directory table.
1153	 */
1154
1155	if (pdstackptr > 0) {
1156		--pdstackptr;
1157		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
1158	} else {
1159		pmap->pm_pdir =
1160			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1161	}
1162
1163	/*
1164	 * allocate object for the ptes
1165	 */
1166	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1167
1168	/*
1169	 * allocate the page directory page
1170	 */
1171retry:
1172	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
1173	if (ptdpg == NULL)
1174		goto retry;
1175
1176	ptdpg->wire_count = 1;
1177	++cnt.v_wire_count;
1178
1179	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
1180	ptdpg->valid = VM_PAGE_BITS_ALL;
1181
1182	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1183	if ((ptdpg->flags & PG_ZERO) == 0)
1184		bzero(pmap->pm_pdir, PAGE_SIZE);
1185
1186	/* wire in kernel global address entries */
1187	/* XXX copies current process, does not fill in MPPTDI */
1188	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1189
1190	/* install self-referential address mapping entry */
1191	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1192		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
1193
1194	pmap->pm_flags = 0;
1195	pmap->pm_count = 1;
1196	pmap->pm_ptphint = NULL;
1197	TAILQ_INIT(&pmap->pm_pvlist);
1198}
1199
1200static int
1201pmap_release_free_page(pmap, p)
1202	struct pmap *pmap;
1203	vm_page_t p;
1204{
1205	int s;
1206	unsigned *pde = (unsigned *) pmap->pm_pdir;
1207	/*
1208	 * This code optimizes the case of freeing non-busy
1209	 * page-table pages.  Those pages are zero now, and
1210	 * might as well be placed directly into the zero queue.
1211	 */
1212	s = splvm();
1213	if (p->flags & PG_BUSY) {
1214		p->flags |= PG_WANTED;
1215		tsleep(p, PVM, "pmaprl", 0);
1216		splx(s);
1217		return 0;
1218	}
1219
1220	if (p->flags & PG_WANTED) {
1221		p->flags &= ~PG_WANTED;
1222		wakeup(p);
1223	}
1224
1225	/*
1226	 * Remove the page table page from the processes address space.
1227	 */
1228	pde[p->pindex] = 0;
1229	--pmap->pm_stats.resident_count;
1230
1231	if (p->hold_count)  {
1232		panic("pmap_release: freeing held page table page");
1233	}
1234	/*
1235	 * Page directory pages need to have the kernel
1236	 * stuff cleared, so they can go into the zero queue also.
1237	 */
1238	if (p->pindex == PTDPTDI) {
1239		bzero(pde + KPTDI, nkpt * PTESIZE);
1240#ifdef SMP
1241		pde[MPPTDI] = 0;
1242#endif
1243		pde[APTDPTDI] = 0;
1244		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1245	}
1246
1247#if defined(PTPHINT)
1248	if (pmap->pm_ptphint &&
1249		(pmap->pm_ptphint->pindex == p->pindex))
1250		pmap->pm_ptphint = NULL;
1251#endif
1252
1253	vm_page_free_zero(p);
1254	splx(s);
1255	return 1;
1256}
1257
1258/*
1259 * this routine is called if the page table page is not
1260 * mapped correctly.
1261 */
1262static vm_page_t
1263_pmap_allocpte(pmap, ptepindex)
1264	pmap_t	pmap;
1265	unsigned ptepindex;
1266{
1267	vm_offset_t pteva, ptepa;
1268	vm_page_t m;
1269	int needszero = 0;
1270
1271	/*
1272	 * Find or fabricate a new pagetable page
1273	 */
1274retry:
1275	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
1276	if (m == NULL) {
1277		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
1278		if (m == NULL)
1279			goto retry;
1280		if ((m->flags & PG_ZERO) == 0)
1281			needszero = 1;
1282		m->flags &= ~(PG_ZERO|PG_BUSY);
1283		m->valid = VM_PAGE_BITS_ALL;
1284	} else {
1285		if ((m->flags & PG_BUSY) || m->busy) {
1286			m->flags |= PG_WANTED;
1287			tsleep(m, PVM, "ptewai", 0);
1288			goto retry;
1289		}
1290	}
1291
1292	if (m->queue != PQ_NONE) {
1293		int s = splvm();
1294		vm_page_unqueue(m);
1295		splx(s);
1296	}
1297
1298	if (m->wire_count == 0)
1299		++cnt.v_wire_count;
1300	++m->wire_count;
1301
1302	/*
1303	 * Increment the hold count for the page table page
1304	 * (denoting a new mapping.)
1305	 */
1306	++m->hold_count;
1307
1308	/*
1309	 * Map the pagetable page into the process address space, if
1310	 * it isn't already there.
1311	 */
1312
1313	pmap->pm_stats.resident_count++;
1314
1315	ptepa = VM_PAGE_TO_PHYS(m);
1316	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
1317
1318#if defined(PTPHINT)
1319	/*
1320	 * Set the page table hint
1321	 */
1322	pmap->pm_ptphint = m;
1323#endif
1324
1325	/*
1326	 * Try to use the new mapping, but if we cannot, then
1327	 * do it with the routine that maps the page explicitly.
1328	 */
1329	if (needszero) {
1330		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1331			(((unsigned) PTDpde) & PG_FRAME)) {
1332			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1333			bzero((caddr_t) pteva, PAGE_SIZE);
1334		} else {
1335			pmap_zero_page(ptepa);
1336		}
1337	}
1338
1339	m->valid = VM_PAGE_BITS_ALL;
1340	m->flags |= PG_MAPPED;
1341
1342	return m;
1343}
1344
1345static vm_page_t
1346pmap_allocpte(pmap, va)
1347	pmap_t	pmap;
1348	vm_offset_t va;
1349{
1350	unsigned ptepindex;
1351	vm_offset_t ptepa;
1352	vm_page_t m;
1353
1354	/*
1355	 * Calculate pagetable page index
1356	 */
1357	ptepindex = va >> PDRSHIFT;
1358
1359	/*
1360	 * Get the page directory entry
1361	 */
1362	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1363
1364	/*
1365	 * This supports switching from a 4MB page to a
1366	 * normal 4K page.
1367	 */
1368	if (ptepa & PG_PS) {
1369		pmap->pm_pdir[ptepindex] = 0;
1370		ptepa = 0;
1371		invltlb();
1372	}
1373
1374	/*
1375	 * If the page table page is mapped, we just increment the
1376	 * hold count, and activate it.
1377	 */
1378	if (ptepa) {
1379#if defined(PTPHINT)
1380		/*
1381		 * In order to get the page table page, try the
1382		 * hint first.
1383		 */
1384		if (pmap->pm_ptphint &&
1385			(pmap->pm_ptphint->pindex == ptepindex)) {
1386			m = pmap->pm_ptphint;
1387		} else {
1388			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1389			pmap->pm_ptphint = m;
1390		}
1391#else
1392		m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1393#endif
1394		++m->hold_count;
1395		return m;
1396	}
1397	/*
1398	 * Here if the pte page isn't mapped, or if it has been deallocated.
1399	 */
1400	return _pmap_allocpte(pmap, ptepindex);
1401}
1402
1403
1404/***************************************************
1405* Pmap allocation/deallocation routines.
1406 ***************************************************/
1407
1408/*
1409 * Release any resources held by the given physical map.
1410 * Called when a pmap initialized by pmap_pinit is being released.
1411 * Should only be called if the map contains no valid mappings.
1412 */
1413void
1414pmap_release(pmap)
1415	register struct pmap *pmap;
1416{
1417	vm_page_t p,n,ptdpg;
1418	vm_object_t object = pmap->pm_pteobj;
1419
1420#if defined(DIAGNOSTIC)
1421	if (object->ref_count != 1)
1422		panic("pmap_release: pteobj reference count != 1");
1423#endif
1424
1425	ptdpg = NULL;
1426retry:
1427	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1428		n = TAILQ_NEXT(p, listq);
1429		if (p->pindex == PTDPTDI) {
1430			ptdpg = p;
1431			continue;
1432		}
1433		if (!pmap_release_free_page(pmap, p))
1434			goto retry;
1435	}
1436
1437	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1438		goto retry;
1439
1440	vm_object_deallocate(object);
1441	if (pdstackptr < PDSTACKMAX) {
1442		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
1443		++pdstackptr;
1444	} else {
1445		int pdstmp = pdstackptr - 1;
1446		kmem_free(kernel_map, pdstack[pdstmp], PAGE_SIZE);
1447		pdstack[pdstmp] = (vm_offset_t) pmap->pm_pdir;
1448	}
1449	pmap->pm_pdir = 0;
1450}
1451
1452/*
1453 * grow the number of kernel page table entries, if needed
1454 */
1455void
1456pmap_growkernel(vm_offset_t addr)
1457{
1458	struct proc *p;
1459	struct pmap *pmap;
1460	int s;
1461	vm_offset_t ptpkva, ptppaddr;
1462	vm_page_t nkpg;
1463#ifdef SMP
1464	int i;
1465#endif
1466	pd_entry_t newpdir;
1467	vm_pindex_t ptpidx;
1468
1469	s = splhigh();
1470	if (kernel_vm_end == 0) {
1471		kernel_vm_end = KERNBASE;
1472		nkpt = 0;
1473		while (pdir_pde(PTD, kernel_vm_end)) {
1474			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1475			++nkpt;
1476		}
1477	}
1478	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1479	while (kernel_vm_end < addr) {
1480		if (pdir_pde(PTD, kernel_vm_end)) {
1481			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1482			continue;
1483		}
1484		++nkpt;
1485		ptpkva = (vm_offset_t) vtopte(addr);
1486		ptpidx = (ptpkva >> PAGE_SHIFT);
1487		/*
1488		 * This index is bogus, but out of the way
1489		 */
1490		nkpg = vm_page_alloc(kernel_object,
1491			ptpidx, VM_ALLOC_SYSTEM);
1492		if (!nkpg)
1493			panic("pmap_growkernel: no memory to grow kernel");
1494
1495		vm_page_wire(nkpg);
1496		vm_page_remove(nkpg);
1497		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1498		pmap_zero_page(ptppaddr);
1499		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW);
1500		pdir_pde(PTD, kernel_vm_end) = newpdir;
1501
1502#ifdef SMP
1503		for (i = 0; i < mp_ncpus; i++) {
1504			if (IdlePTDS[i])
1505				pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir;
1506		}
1507#endif
1508
1509		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1510			if (p->p_vmspace) {
1511				pmap = &p->p_vmspace->vm_pmap;
1512				*pmap_pde(pmap, kernel_vm_end) = newpdir;
1513			}
1514		}
1515		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
1516		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1517	}
1518	splx(s);
1519}
1520
1521/*
1522 *	Retire the given physical map from service.
1523 *	Should only be called if the map contains
1524 *	no valid mappings.
1525 */
1526void
1527pmap_destroy(pmap)
1528	register pmap_t pmap;
1529{
1530	int count;
1531
1532	if (pmap == NULL)
1533		return;
1534
1535	count = --pmap->pm_count;
1536	if (count == 0) {
1537		pmap_release(pmap);
1538		panic("destroying a pmap is not yet implemented");
1539		/* free((caddr_t) pmap, M_VMPMAP); */
1540	}
1541}
1542
1543/*
1544 *	Add a reference to the specified pmap.
1545 */
1546void
1547pmap_reference(pmap)
1548	pmap_t pmap;
1549{
1550	if (pmap != NULL) {
1551		pmap->pm_count++;
1552	}
1553}
1554
1555/***************************************************
1556* page management routines.
1557 ***************************************************/
1558
1559/*
1560 * free the pv_entry back to the free list
1561 */
1562static inline void
1563free_pv_entry(pv)
1564	pv_entry_t pv;
1565{
1566	pv_entry_count--;
1567	zfreei(pvzone, pv);
1568}
1569
1570/*
1571 * get a new pv_entry, allocating a block from the system
1572 * when needed.
1573 * the memory allocation is performed bypassing the malloc code
1574 * because of the possibility of allocations at interrupt time.
1575 */
1576static pv_entry_t
1577get_pv_entry(void)
1578{
1579	pv_entry_count++;
1580	if (pv_entry_high_water &&
1581		(pv_entry_count > pv_entry_high_water) &&
1582		(pmap_pagedaemon_waken == 0)) {
1583		pmap_pagedaemon_waken = 1;
1584		wakeup (&vm_pages_needed);
1585	}
1586	return zalloci(pvzone);
1587}
1588
1589/*
1590 * This routine is very drastic, but can save the system
1591 * in a pinch.
1592 */
1593void
1594pmap_collect() {
1595	pv_table_t *ppv;
1596	int i;
1597	vm_offset_t pa;
1598	vm_page_t m;
1599	static int warningdone=0;
1600
1601	if (pmap_pagedaemon_waken == 0)
1602		return;
1603
1604	if (warningdone < 5) {
1605		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1606		warningdone++;
1607	}
1608
1609	for(i = 0; i < pv_npg; i++) {
1610		if ((ppv = &pv_table[i]) == 0)
1611			continue;
1612		m = ppv->pv_vm_page;
1613		if ((pa = VM_PAGE_TO_PHYS(m)) == 0)
1614			continue;
1615		if (m->wire_count || m->hold_count || m->busy || (m->flags & PG_BUSY))
1616			continue;
1617		pmap_remove_all(pa);
1618	}
1619	pmap_pagedaemon_waken = 0;
1620}
1621
1622
1623/*
1624 * If it is the first entry on the list, it is actually
1625 * in the header and we must copy the following entry up
1626 * to the header.  Otherwise we must search the list for
1627 * the entry.  In either case we free the now unused entry.
1628 */
1629
1630static int
1631pmap_remove_entry(pmap, ppv, va)
1632	struct pmap *pmap;
1633	pv_table_t *ppv;
1634	vm_offset_t va;
1635{
1636	pv_entry_t pv;
1637	int rtval;
1638	int s;
1639
1640	s = splvm();
1641	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1642		for (pv = TAILQ_FIRST(&ppv->pv_list);
1643			pv;
1644			pv = TAILQ_NEXT(pv, pv_list)) {
1645			if (pmap == pv->pv_pmap && va == pv->pv_va)
1646				break;
1647		}
1648	} else {
1649		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1650			pv;
1651			pv = TAILQ_NEXT(pv, pv_plist)) {
1652			if (va == pv->pv_va)
1653				break;
1654		}
1655	}
1656
1657	rtval = 0;
1658	if (pv) {
1659		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1660		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1661		--ppv->pv_list_count;
1662		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
1663			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1664		}
1665
1666		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1667		free_pv_entry(pv);
1668	}
1669
1670	splx(s);
1671	return rtval;
1672}
1673
1674/*
1675 * Create a pv entry for page at pa for
1676 * (pmap, va).
1677 */
1678static void
1679pmap_insert_entry(pmap, va, mpte, pa)
1680	pmap_t pmap;
1681	vm_offset_t va;
1682	vm_page_t mpte;
1683	vm_offset_t pa;
1684{
1685
1686	int s;
1687	pv_entry_t pv;
1688	pv_table_t *ppv;
1689
1690	s = splvm();
1691	pv = get_pv_entry();
1692	pv->pv_va = va;
1693	pv->pv_pmap = pmap;
1694	pv->pv_ptem = mpte;
1695
1696	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1697
1698	ppv = pa_to_pvh(pa);
1699	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1700	++ppv->pv_list_count;
1701
1702	splx(s);
1703}
1704
1705/*
1706 * pmap_remove_pte: do the things to unmap a page in a process
1707 */
1708static int
1709pmap_remove_pte(pmap, ptq, va)
1710	struct pmap *pmap;
1711	unsigned *ptq;
1712	vm_offset_t va;
1713{
1714	unsigned oldpte;
1715	pv_table_t *ppv;
1716
1717	oldpte = *ptq;
1718	*ptq = 0;
1719	if (oldpte & PG_W)
1720		pmap->pm_stats.wired_count -= 1;
1721	/*
1722	 * Machines that don't support invlpg, also don't support
1723	 * PG_G.
1724	 */
1725	if (oldpte & PG_G)
1726		invlpg(va);
1727	pmap->pm_stats.resident_count -= 1;
1728	if (oldpte & PG_MANAGED) {
1729		ppv = pa_to_pvh(oldpte);
1730		if (oldpte & PG_M) {
1731#if defined(PMAP_DIAGNOSTIC)
1732			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1733				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
1734			}
1735#endif
1736			if (pmap_track_modified(va))
1737				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1738		}
1739		return pmap_remove_entry(pmap, ppv, va);
1740	} else {
1741		return pmap_unuse_pt(pmap, va, NULL);
1742	}
1743
1744	return 0;
1745}
1746
1747/*
1748 * Remove a single page from a process address space
1749 */
1750static void
1751pmap_remove_page(pmap, va)
1752	struct pmap *pmap;
1753	register vm_offset_t va;
1754{
1755	register unsigned *ptq;
1756
1757	/*
1758	 * if there is no pte for this address, just skip it!!!
1759	 */
1760	if (*pmap_pde(pmap, va) == 0) {
1761		return;
1762	}
1763
1764	/*
1765	 * get a local va for mappings for this pmap.
1766	 */
1767	ptq = get_ptbase(pmap) + i386_btop(va);
1768	if (*ptq) {
1769		(void) pmap_remove_pte(pmap, ptq, va);
1770		invltlb_1pg(va);
1771	}
1772	return;
1773}
1774
1775/*
1776 *	Remove the given range of addresses from the specified map.
1777 *
1778 *	It is assumed that the start and end are properly
1779 *	rounded to the page size.
1780 */
1781void
1782pmap_remove(pmap, sva, eva)
1783	struct pmap *pmap;
1784	register vm_offset_t sva;
1785	register vm_offset_t eva;
1786{
1787	register unsigned *ptbase;
1788	vm_offset_t pdnxt;
1789	vm_offset_t ptpaddr;
1790	vm_offset_t sindex, eindex;
1791	int anyvalid;
1792
1793	if (pmap == NULL)
1794		return;
1795
1796	if (pmap->pm_stats.resident_count == 0)
1797		return;
1798
1799	/*
1800	 * special handling of removing one page.  a very
1801	 * common operation and easy to short circuit some
1802	 * code.
1803	 */
1804	if (((sva + PAGE_SIZE) == eva) &&
1805		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1806		pmap_remove_page(pmap, sva);
1807		return;
1808	}
1809
1810	anyvalid = 0;
1811
1812	/*
1813	 * Get a local virtual address for the mappings that are being
1814	 * worked with.
1815	 */
1816	ptbase = get_ptbase(pmap);
1817
1818	sindex = i386_btop(sva);
1819	eindex = i386_btop(eva);
1820
1821	for (; sindex < eindex; sindex = pdnxt) {
1822		unsigned pdirindex;
1823
1824		/*
1825		 * Calculate index for next page table.
1826		 */
1827		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1828		if (pmap->pm_stats.resident_count == 0)
1829			break;
1830
1831		pdirindex = sindex / NPDEPG;
1832		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1833			pmap->pm_pdir[pdirindex] = 0;
1834			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1835			anyvalid++;
1836			continue;
1837		}
1838
1839		/*
1840		 * Weed out invalid mappings. Note: we assume that the page
1841		 * directory table is always allocated, and in kernel virtual.
1842		 */
1843		if (ptpaddr == 0)
1844			continue;
1845
1846		/*
1847		 * Limit our scan to either the end of the va represented
1848		 * by the current page table page, or to the end of the
1849		 * range being removed.
1850		 */
1851		if (pdnxt > eindex) {
1852			pdnxt = eindex;
1853		}
1854
1855		for ( ;sindex != pdnxt; sindex++) {
1856			vm_offset_t va;
1857			if (ptbase[sindex] == 0) {
1858				continue;
1859			}
1860			va = i386_ptob(sindex);
1861
1862			anyvalid++;
1863			if (pmap_remove_pte(pmap,
1864				ptbase + sindex, va))
1865				break;
1866		}
1867	}
1868
1869	if (anyvalid) {
1870		invltlb();
1871	}
1872}
1873
1874/*
1875 *	Routine:	pmap_remove_all
1876 *	Function:
1877 *		Removes this physical page from
1878 *		all physical maps in which it resides.
1879 *		Reflects back modify bits to the pager.
1880 *
1881 *	Notes:
1882 *		Original versions of this routine were very
1883 *		inefficient because they iteratively called
1884 *		pmap_remove (slow...)
1885 */
1886
1887static void
1888pmap_remove_all(pa)
1889	vm_offset_t pa;
1890{
1891	register pv_entry_t pv;
1892	pv_table_t *ppv;
1893	register unsigned *pte, tpte;
1894	int nmodify;
1895	int update_needed;
1896	int s;
1897
1898	nmodify = 0;
1899	update_needed = 0;
1900#if defined(PMAP_DIAGNOSTIC)
1901	/*
1902	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1903	 * pages!
1904	 */
1905	if (!pmap_is_managed(pa)) {
1906		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1907	}
1908#endif
1909
1910	s = splvm();
1911	ppv = pa_to_pvh(pa);
1912	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1913		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1914
1915		pv->pv_pmap->pm_stats.resident_count--;
1916
1917		tpte = *pte;
1918		*pte = 0;
1919		if (tpte & PG_W)
1920			pv->pv_pmap->pm_stats.wired_count--;
1921		/*
1922		 * Update the vm_page_t clean and reference bits.
1923		 */
1924		if (tpte & PG_M) {
1925#if defined(PMAP_DIAGNOSTIC)
1926			if (pmap_nw_modified((pt_entry_t) tpte)) {
1927				printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
1928			}
1929#endif
1930			if (pmap_track_modified(pv->pv_va))
1931				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1932		}
1933		if (!update_needed &&
1934			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1935			(pv->pv_pmap == kernel_pmap))) {
1936			update_needed = 1;
1937		}
1938
1939		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1940		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1941		--ppv->pv_list_count;
1942		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1943		free_pv_entry(pv);
1944	}
1945	ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1946
1947
1948	if (update_needed)
1949		invltlb();
1950	splx(s);
1951	return;
1952}
1953
1954/*
1955 *	Set the physical protection on the
1956 *	specified range of this map as requested.
1957 */
1958void
1959pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1960{
1961	register unsigned *ptbase;
1962	vm_offset_t pdnxt;
1963	vm_offset_t ptpaddr;
1964	vm_offset_t sindex, eindex;
1965	int anychanged;
1966
1967
1968	if (pmap == NULL)
1969		return;
1970
1971	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1972		pmap_remove(pmap, sva, eva);
1973		return;
1974	}
1975
1976	anychanged = 0;
1977
1978	ptbase = get_ptbase(pmap);
1979
1980	sindex = i386_btop(sva);
1981	eindex = i386_btop(eva);
1982
1983	for (; sindex < eindex; sindex = pdnxt) {
1984
1985		unsigned pdirindex;
1986
1987		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1988
1989		pdirindex = sindex / NPDEPG;
1990		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1991			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1992			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1993			anychanged++;
1994			continue;
1995		}
1996
1997		/*
1998		 * Weed out invalid mappings. Note: we assume that the page
1999		 * directory table is always allocated, and in kernel virtual.
2000		 */
2001		if (ptpaddr == 0)
2002			continue;
2003
2004		if (pdnxt > eindex) {
2005			pdnxt = eindex;
2006		}
2007
2008		for (; sindex != pdnxt; sindex++) {
2009
2010			unsigned pbits = ptbase[sindex];
2011
2012			if (prot & VM_PROT_WRITE) {
2013				if ((pbits & (PG_RW|PG_V)) == PG_V) {
2014					if (pbits & PG_MANAGED) {
2015						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
2016						m->flags |= PG_WRITEABLE;
2017						m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
2018					}
2019					ptbase[sindex] = pbits | PG_RW;
2020					anychanged = 1;
2021				}
2022			} else if (pbits & PG_RW) {
2023				if (pbits & PG_M) {
2024					vm_offset_t sva1 = i386_ptob(sindex);
2025					if ((pbits & PG_MANAGED) && pmap_track_modified(sva1)) {
2026						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
2027						m->dirty = VM_PAGE_BITS_ALL;
2028					}
2029				}
2030				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
2031				anychanged = 1;
2032			}
2033		}
2034	}
2035	if (anychanged)
2036		invltlb();
2037}
2038
2039/*
2040 *	Insert the given physical page (p) at
2041 *	the specified virtual address (v) in the
2042 *	target physical map with the protection requested.
2043 *
2044 *	If specified, the page will be wired down, meaning
2045 *	that the related pte can not be reclaimed.
2046 *
2047 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2048 *	or lose information.  That is, this routine must actually
2049 *	insert this page into the given map NOW.
2050 */
2051void
2052pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
2053	   boolean_t wired)
2054{
2055	register unsigned *pte;
2056	vm_offset_t opa;
2057	vm_offset_t origpte, newpte;
2058	vm_page_t mpte;
2059
2060	if (pmap == NULL)
2061		return;
2062
2063	va &= PG_FRAME;
2064#ifdef PMAP_DIAGNOSTIC
2065	if (va > VM_MAX_KERNEL_ADDRESS)
2066		panic("pmap_enter: toobig");
2067	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
2068		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
2069#endif
2070
2071	mpte = NULL;
2072	/*
2073	 * In the case that a page table page is not
2074	 * resident, we are creating it here.
2075	 */
2076	if (va < UPT_MIN_ADDRESS) {
2077		mpte = pmap_allocpte(pmap, va);
2078	}
2079#if 0 && defined(PMAP_DIAGNOSTIC)
2080	else {
2081		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
2082		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
2083			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
2084				pmap->pm_pdir[PTDPTDI], origpte, va);
2085		}
2086		if (smp_active) {
2087			pdeaddr = (vm_offset_t *) IdlePTDS[cpuid];
2088			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
2089				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
2090					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
2091				printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
2092				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
2093					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
2094			}
2095		}
2096	}
2097#endif
2098
2099	pte = pmap_pte(pmap, va);
2100	/*
2101	 * Page Directory table entry not valid, we need a new PT page
2102	 */
2103	if (pte == NULL) {
2104		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
2105			pmap->pm_pdir[PTDPTDI], va);
2106	}
2107
2108	origpte = *(vm_offset_t *)pte;
2109	pa &= PG_FRAME;
2110	opa = origpte & PG_FRAME;
2111	if (origpte & PG_PS)
2112		panic("pmap_enter: attempted pmap_enter on 4MB page");
2113
2114	/*
2115	 * Mapping has not changed, must be protection or wiring change.
2116	 */
2117	if (origpte && (opa == pa)) {
2118		/*
2119		 * Wiring change, just update stats. We don't worry about
2120		 * wiring PT pages as they remain resident as long as there
2121		 * are valid mappings in them. Hence, if a user page is wired,
2122		 * the PT page will be also.
2123		 */
2124		if (wired && ((origpte & PG_W) == 0))
2125			pmap->pm_stats.wired_count++;
2126		else if (!wired && (origpte & PG_W))
2127			pmap->pm_stats.wired_count--;
2128
2129#if defined(PMAP_DIAGNOSTIC)
2130		if (pmap_nw_modified((pt_entry_t) origpte)) {
2131			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
2132		}
2133#endif
2134
2135		/*
2136		 * We might be turning off write access to the page,
2137		 * so we go ahead and sense modify status.
2138		 */
2139		if (origpte & PG_MANAGED) {
2140			vm_page_t m;
2141			if (origpte & PG_M) {
2142				if (pmap_track_modified(va)) {
2143					m = PHYS_TO_VM_PAGE(pa);
2144					m->dirty = VM_PAGE_BITS_ALL;
2145				}
2146			}
2147			pa |= PG_MANAGED;
2148		}
2149
2150		if (mpte)
2151			--mpte->hold_count;
2152
2153		goto validate;
2154	}
2155	/*
2156	 * Mapping has changed, invalidate old range and fall through to
2157	 * handle validating new mapping.
2158	 */
2159	if (opa) {
2160		int err;
2161		err = pmap_remove_pte(pmap, pte, va);
2162		if (err)
2163			panic("pmap_enter: pte vanished, va: 0x%x", va);
2164	}
2165
2166	/*
2167	 * Enter on the PV list if part of our managed memory Note that we
2168	 * raise IPL while manipulating pv_table since pmap_enter can be
2169	 * called at interrupt time.
2170	 */
2171	if (pmap_is_managed(pa)) {
2172		pmap_insert_entry(pmap, va, mpte, pa);
2173		pa |= PG_MANAGED;
2174	}
2175
2176	/*
2177	 * Increment counters
2178	 */
2179	pmap->pm_stats.resident_count++;
2180	if (wired)
2181		pmap->pm_stats.wired_count++;
2182
2183validate:
2184	/*
2185	 * Now validate mapping with desired protection/wiring.
2186	 */
2187	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2188
2189	if (wired)
2190		newpte |= PG_W;
2191	if (va < UPT_MIN_ADDRESS)
2192		newpte |= PG_U;
2193	if (pmap == kernel_pmap)
2194		newpte |= pgeflag;
2195
2196	/*
2197	 * if the mapping or permission bits are different, we need
2198	 * to update the pte.
2199	 */
2200	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2201		*pte = newpte;
2202		if (origpte)
2203			invltlb_1pg(va);
2204	}
2205}
2206
2207/*
2208 * this code makes some *MAJOR* assumptions:
2209 * 1. Current pmap & pmap exists.
2210 * 2. Not wired.
2211 * 3. Read access.
2212 * 4. No page table pages.
2213 * 5. Tlbflush is deferred to calling procedure.
2214 * 6. Page IS managed.
2215 * but is *MUCH* faster than pmap_enter...
2216 */
2217
2218static vm_page_t
2219pmap_enter_quick(pmap, va, pa, mpte)
2220	register pmap_t pmap;
2221	vm_offset_t va;
2222	register vm_offset_t pa;
2223	vm_page_t mpte;
2224{
2225	register unsigned *pte;
2226
2227	/*
2228	 * In the case that a page table page is not
2229	 * resident, we are creating it here.
2230	 */
2231	if (va < UPT_MIN_ADDRESS) {
2232		unsigned ptepindex;
2233		vm_offset_t ptepa;
2234
2235		/*
2236		 * Calculate pagetable page index
2237		 */
2238		ptepindex = va >> PDRSHIFT;
2239		if (mpte && (mpte->pindex == ptepindex)) {
2240			++mpte->hold_count;
2241		} else {
2242retry:
2243			/*
2244			 * Get the page directory entry
2245			 */
2246			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2247
2248			/*
2249			 * If the page table page is mapped, we just increment
2250			 * the hold count, and activate it.
2251			 */
2252			if (ptepa) {
2253				if (ptepa & PG_PS)
2254					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2255#if defined(PTPHINT)
2256				if (pmap->pm_ptphint &&
2257					(pmap->pm_ptphint->pindex == ptepindex)) {
2258					mpte = pmap->pm_ptphint;
2259				} else {
2260					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2261					pmap->pm_ptphint = mpte;
2262				}
2263#else
2264				mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2265#endif
2266				if (mpte == NULL)
2267					goto retry;
2268				++mpte->hold_count;
2269			} else {
2270				mpte = _pmap_allocpte(pmap, ptepindex);
2271			}
2272		}
2273	} else {
2274		mpte = NULL;
2275	}
2276
2277	/*
2278	 * This call to vtopte makes the assumption that we are
2279	 * entering the page into the current pmap.  In order to support
2280	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2281	 * But that isn't as quick as vtopte.
2282	 */
2283	pte = (unsigned *)vtopte(va);
2284	if (*pte) {
2285		if (mpte)
2286			pmap_unwire_pte_hold(pmap, mpte);
2287		return 0;
2288	}
2289
2290	/*
2291	 * Enter on the PV list if part of our managed memory Note that we
2292	 * raise IPL while manipulating pv_table since pmap_enter can be
2293	 * called at interrupt time.
2294	 */
2295	pmap_insert_entry(pmap, va, mpte, pa);
2296
2297	/*
2298	 * Increment counters
2299	 */
2300	pmap->pm_stats.resident_count++;
2301
2302	/*
2303	 * Now validate mapping with RO protection
2304	 */
2305	*pte = pa | PG_V | PG_U | PG_MANAGED;
2306
2307	return mpte;
2308}
2309
2310#define MAX_INIT_PT (96)
2311/*
2312 * pmap_object_init_pt preloads the ptes for a given object
2313 * into the specified pmap.  This eliminates the blast of soft
2314 * faults on process startup and immediately after an mmap.
2315 */
2316void
2317pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2318	pmap_t pmap;
2319	vm_offset_t addr;
2320	vm_object_t object;
2321	vm_pindex_t pindex;
2322	vm_size_t size;
2323	int limit;
2324{
2325	vm_offset_t tmpidx;
2326	int psize;
2327	vm_page_t p, mpte;
2328	int objpgs;
2329
2330	if (!pmap)
2331		return;
2332
2333	/*
2334	 * This code maps large physical mmap regions into the
2335	 * processor address space.  Note that some shortcuts
2336	 * are taken, but the code works.
2337	 */
2338	if (pseflag &&
2339		(object->type == OBJT_DEVICE) &&
2340		((addr & (NBPDR - 1)) == 0) &&
2341		((size & (NBPDR - 1)) == 0) ) {
2342		int i;
2343		int s;
2344		vm_page_t m[1];
2345		unsigned int ptepindex;
2346		int npdes;
2347		vm_offset_t ptepa;
2348
2349		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2350			return;
2351
2352		s = splhigh();
2353retry:
2354		p = vm_page_lookup(object, pindex);
2355		if (p && (p->flags & PG_BUSY)) {
2356			tsleep(p, PVM, "init4p", 0);
2357			goto retry;
2358		}
2359		splx(s);
2360
2361		if (p == NULL) {
2362			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2363			if (p == NULL)
2364				return;
2365			m[0] = p;
2366
2367			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2368				PAGE_WAKEUP(p);
2369				vm_page_free(p);
2370				return;
2371			}
2372
2373			p = vm_page_lookup(object, pindex);
2374			PAGE_WAKEUP(p);
2375		}
2376
2377		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2378		if (ptepa & (NBPDR - 1)) {
2379			return;
2380		}
2381
2382		p->valid = VM_PAGE_BITS_ALL;
2383
2384		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2385		npdes = size >> PDRSHIFT;
2386		for(i=0;i<npdes;i++) {
2387			pmap->pm_pdir[ptepindex] =
2388				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2389			ptepa += NBPDR;
2390			ptepindex += 1;
2391		}
2392		p->flags |= PG_MAPPED;
2393		invltlb();
2394		return;
2395	}
2396
2397	psize = i386_btop(size);
2398
2399	if ((object->type != OBJT_VNODE) ||
2400		(limit && (psize > MAX_INIT_PT) &&
2401			(object->resident_page_count > MAX_INIT_PT))) {
2402		return;
2403	}
2404
2405	if (psize + pindex > object->size)
2406		psize = object->size - pindex;
2407
2408	mpte = NULL;
2409	/*
2410	 * if we are processing a major portion of the object, then scan the
2411	 * entire thing.
2412	 */
2413	if (psize > (object->size >> 2)) {
2414		objpgs = psize;
2415
2416		for (p = TAILQ_FIRST(&object->memq);
2417		    ((objpgs > 0) && (p != NULL));
2418		    p = TAILQ_NEXT(p, listq)) {
2419
2420			tmpidx = p->pindex;
2421			if (tmpidx < pindex) {
2422				continue;
2423			}
2424			tmpidx -= pindex;
2425			if (tmpidx >= psize) {
2426				continue;
2427			}
2428			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2429			    (p->busy == 0) &&
2430			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2431				if ((p->queue - p->pc) == PQ_CACHE)
2432					vm_page_deactivate(p);
2433				p->flags |= PG_BUSY;
2434				mpte = pmap_enter_quick(pmap,
2435					addr + i386_ptob(tmpidx),
2436					VM_PAGE_TO_PHYS(p), mpte);
2437				p->flags |= PG_MAPPED;
2438				PAGE_WAKEUP(p);
2439			}
2440			objpgs -= 1;
2441		}
2442	} else {
2443		/*
2444		 * else lookup the pages one-by-one.
2445		 */
2446		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2447			p = vm_page_lookup(object, tmpidx + pindex);
2448			if (p &&
2449			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2450			    (p->busy == 0) &&
2451			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2452				if ((p->queue - p->pc) == PQ_CACHE)
2453					vm_page_deactivate(p);
2454				p->flags |= PG_BUSY;
2455				mpte = pmap_enter_quick(pmap,
2456					addr + i386_ptob(tmpidx),
2457					VM_PAGE_TO_PHYS(p), mpte);
2458				p->flags |= PG_MAPPED;
2459				PAGE_WAKEUP(p);
2460			}
2461		}
2462	}
2463	return;
2464}
2465
2466/*
2467 * pmap_prefault provides a quick way of clustering
2468 * pagefaults into a processes address space.  It is a "cousin"
2469 * of pmap_object_init_pt, except it runs at page fault time instead
2470 * of mmap time.
2471 */
2472#define PFBAK 2
2473#define PFFOR 2
2474#define PAGEORDER_SIZE (PFBAK+PFFOR)
2475
2476static int pmap_prefault_pageorder[] = {
2477	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
2478};
2479
2480void
2481pmap_prefault(pmap, addra, entry, object)
2482	pmap_t pmap;
2483	vm_offset_t addra;
2484	vm_map_entry_t entry;
2485	vm_object_t object;
2486{
2487	int i;
2488	vm_offset_t starta;
2489	vm_offset_t addr;
2490	vm_pindex_t pindex;
2491	vm_page_t m, mpte;
2492
2493	if (entry->object.vm_object != object)
2494		return;
2495
2496	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2497		return;
2498
2499	starta = addra - PFBAK * PAGE_SIZE;
2500	if (starta < entry->start) {
2501		starta = entry->start;
2502	} else if (starta > addra) {
2503		starta = 0;
2504	}
2505
2506	mpte = NULL;
2507	for (i = 0; i < PAGEORDER_SIZE; i++) {
2508		vm_object_t lobject;
2509		unsigned *pte;
2510
2511		addr = addra + pmap_prefault_pageorder[i];
2512		if (addr < starta || addr >= entry->end)
2513			continue;
2514
2515		if ((*pmap_pde(pmap, addr)) == NULL)
2516			continue;
2517
2518		pte = (unsigned *) vtopte(addr);
2519		if (*pte)
2520			continue;
2521
2522		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2523		lobject = object;
2524		for (m = vm_page_lookup(lobject, pindex);
2525		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2526		    lobject = lobject->backing_object) {
2527			if (lobject->backing_object_offset & PAGE_MASK)
2528				break;
2529			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2530			m = vm_page_lookup(lobject->backing_object, pindex);
2531		}
2532
2533		/*
2534		 * give-up when a page is not in memory
2535		 */
2536		if (m == NULL)
2537			break;
2538
2539		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2540		    (m->busy == 0) &&
2541		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2542
2543			if ((m->queue - m->pc) == PQ_CACHE) {
2544				vm_page_deactivate(m);
2545			}
2546			m->flags |= PG_BUSY;
2547			mpte = pmap_enter_quick(pmap, addr,
2548				VM_PAGE_TO_PHYS(m), mpte);
2549			m->flags |= PG_MAPPED;
2550			PAGE_WAKEUP(m);
2551		}
2552	}
2553}
2554
2555/*
2556 *	Routine:	pmap_change_wiring
2557 *	Function:	Change the wiring attribute for a map/virtual-address
2558 *			pair.
2559 *	In/out conditions:
2560 *			The mapping must already exist in the pmap.
2561 */
2562void
2563pmap_change_wiring(pmap, va, wired)
2564	register pmap_t pmap;
2565	vm_offset_t va;
2566	boolean_t wired;
2567{
2568	register unsigned *pte;
2569
2570	if (pmap == NULL)
2571		return;
2572
2573	pte = pmap_pte(pmap, va);
2574
2575	if (wired && !pmap_pte_w(pte))
2576		pmap->pm_stats.wired_count++;
2577	else if (!wired && pmap_pte_w(pte))
2578		pmap->pm_stats.wired_count--;
2579
2580	/*
2581	 * Wiring is not a hardware characteristic so there is no need to
2582	 * invalidate TLB.
2583	 */
2584	pmap_pte_set_w(pte, wired);
2585}
2586
2587
2588
2589/*
2590 *	Copy the range specified by src_addr/len
2591 *	from the source map to the range dst_addr/len
2592 *	in the destination map.
2593 *
2594 *	This routine is only advisory and need not do anything.
2595 */
2596
2597void
2598pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2599	pmap_t dst_pmap, src_pmap;
2600	vm_offset_t dst_addr;
2601	vm_size_t len;
2602	vm_offset_t src_addr;
2603{
2604	vm_offset_t addr;
2605	vm_offset_t end_addr = src_addr + len;
2606	vm_offset_t pdnxt;
2607	unsigned src_frame, dst_frame;
2608
2609	if (dst_addr != src_addr)
2610		return;
2611
2612	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2613	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2614		return;
2615	}
2616
2617	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2618	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2619		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2620		invltlb();
2621	}
2622
2623	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2624		unsigned *src_pte, *dst_pte;
2625		vm_page_t dstmpte, srcmpte;
2626		vm_offset_t srcptepaddr;
2627		unsigned ptepindex;
2628
2629		if (addr >= UPT_MIN_ADDRESS)
2630			panic("pmap_copy: invalid to pmap_copy page tables\n");
2631
2632		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2633		ptepindex = addr >> PDRSHIFT;
2634
2635		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2636		if (srcptepaddr == 0)
2637			continue;
2638
2639		if (srcptepaddr & PG_PS) {
2640			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2641				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2642				dst_pmap->pm_stats.resident_count += NBPDR;
2643			}
2644			continue;
2645		}
2646
2647		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2648		if ((srcmpte == NULL) ||
2649			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2650			continue;
2651
2652		if (pdnxt > end_addr)
2653			pdnxt = end_addr;
2654
2655		src_pte = (unsigned *) vtopte(addr);
2656		dst_pte = (unsigned *) avtopte(addr);
2657		while (addr < pdnxt) {
2658			unsigned ptetemp;
2659			ptetemp = *src_pte;
2660			/*
2661			 * we only virtual copy managed pages
2662			 */
2663			if ((ptetemp & PG_MANAGED) != 0) {
2664				/*
2665				 * We have to check after allocpte for the
2666				 * pte still being around...  allocpte can
2667				 * block.
2668				 */
2669				dstmpte = pmap_allocpte(dst_pmap, addr);
2670				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2671					/*
2672					 * Clear the modified and
2673					 * accessed (referenced) bits
2674					 * during the copy.
2675					 */
2676					*dst_pte = ptetemp & ~(PG_M|PG_A);
2677					dst_pmap->pm_stats.resident_count++;
2678					pmap_insert_entry(dst_pmap, addr,
2679						dstmpte,
2680						(ptetemp & PG_FRAME));
2681	 			} else {
2682					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2683				}
2684				if (dstmpte->hold_count >= srcmpte->hold_count)
2685					break;
2686			}
2687			addr += PAGE_SIZE;
2688			++src_pte;
2689			++dst_pte;
2690		}
2691	}
2692}
2693
2694/*
2695 *	Routine:	pmap_kernel
2696 *	Function:
2697 *		Returns the physical map handle for the kernel.
2698 */
2699pmap_t
2700pmap_kernel()
2701{
2702	return (kernel_pmap);
2703}
2704
2705/*
2706 *	pmap_zero_page zeros the specified (machine independent)
2707 *	page by mapping the page into virtual memory and using
2708 *	bzero to clear its contents, one machine dependent page
2709 *	at a time.
2710 */
2711void
2712pmap_zero_page(phys)
2713	vm_offset_t phys;
2714{
2715#ifdef SMP
2716	if (*(int *) prv_CMAP3)
2717		panic("pmap_zero_page: prv_CMAP3 busy");
2718
2719	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME);
2720	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2721
2722	bzero(&prv_CPAGE3, PAGE_SIZE);
2723
2724	*(int *) prv_CMAP3 = 0;
2725	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2726#else
2727	if (*(int *) CMAP2)
2728		panic("pmap_zero_page: CMAP busy");
2729
2730	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
2731	bzero(CADDR2, PAGE_SIZE);
2732	*(int *) CMAP2 = 0;
2733	invltlb_1pg((vm_offset_t) CADDR2);
2734#endif
2735}
2736
2737/*
2738 *	pmap_copy_page copies the specified (machine independent)
2739 *	page by mapping the page into virtual memory and using
2740 *	bcopy to copy the page, one machine dependent page at a
2741 *	time.
2742 */
2743void
2744pmap_copy_page(src, dst)
2745	vm_offset_t src;
2746	vm_offset_t dst;
2747{
2748#ifdef SMP
2749	if (*(int *) prv_CMAP1)
2750		panic("pmap_copy_page: prv_CMAP1 busy");
2751	if (*(int *) prv_CMAP2)
2752		panic("pmap_copy_page: prv_CMAP2 busy");
2753
2754	*(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2755	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2756
2757	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2758
2759	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2760
2761	*(int *) prv_CMAP1 = 0;
2762	*(int *) prv_CMAP2 = 0;
2763	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2764#else
2765	if (*(int *) CMAP1 || *(int *) CMAP2)
2766		panic("pmap_copy_page: CMAP busy");
2767
2768	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2769	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2770
2771	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2772
2773	*(int *) CMAP1 = 0;
2774	*(int *) CMAP2 = 0;
2775	invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
2776#endif
2777}
2778
2779
2780/*
2781 *	Routine:	pmap_pageable
2782 *	Function:
2783 *		Make the specified pages (by pmap, offset)
2784 *		pageable (or not) as requested.
2785 *
2786 *		A page which is not pageable may not take
2787 *		a fault; therefore, its page table entry
2788 *		must remain valid for the duration.
2789 *
2790 *		This routine is merely advisory; pmap_enter
2791 *		will specify that these pages are to be wired
2792 *		down (or not) as appropriate.
2793 */
2794void
2795pmap_pageable(pmap, sva, eva, pageable)
2796	pmap_t pmap;
2797	vm_offset_t sva, eva;
2798	boolean_t pageable;
2799{
2800}
2801
2802/*
2803 * this routine returns true if a physical page resides
2804 * in the given pmap.
2805 */
2806boolean_t
2807pmap_page_exists(pmap, pa)
2808	pmap_t pmap;
2809	vm_offset_t pa;
2810{
2811	register pv_entry_t pv;
2812	pv_table_t *ppv;
2813	int s;
2814
2815	if (!pmap_is_managed(pa))
2816		return FALSE;
2817
2818	s = splvm();
2819
2820	ppv = pa_to_pvh(pa);
2821	/*
2822	 * Not found, check current mappings returning immediately if found.
2823	 */
2824	for (pv = TAILQ_FIRST(&ppv->pv_list);
2825		pv;
2826		pv = TAILQ_NEXT(pv, pv_list)) {
2827		if (pv->pv_pmap == pmap) {
2828			splx(s);
2829			return TRUE;
2830		}
2831	}
2832	splx(s);
2833	return (FALSE);
2834}
2835
2836#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2837/*
2838 * Remove all pages from specified address space
2839 * this aids process exit speeds.  Also, this code
2840 * is special cased for current process only, but
2841 * can have the more generic (and slightly slower)
2842 * mode enabled.  This is much faster than pmap_remove
2843 * in the case of running down an entire address space.
2844 */
2845void
2846pmap_remove_pages(pmap, sva, eva)
2847	pmap_t pmap;
2848	vm_offset_t sva, eva;
2849{
2850	unsigned *pte, tpte;
2851	pv_table_t *ppv;
2852	pv_entry_t pv, npv;
2853	int s;
2854
2855#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2856	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2857		printf("warning: pmap_remove_pages called with non-current pmap\n");
2858		return;
2859	}
2860#endif
2861
2862	s = splvm();
2863	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2864		pv;
2865		pv = npv) {
2866
2867		if (pv->pv_va >= eva || pv->pv_va < sva) {
2868			npv = TAILQ_NEXT(pv, pv_plist);
2869			continue;
2870		}
2871
2872#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2873		pte = (unsigned *)vtopte(pv->pv_va);
2874#else
2875		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2876#endif
2877		tpte = *pte;
2878
2879/*
2880 * We cannot remove wired pages from a process' mapping at this time
2881 */
2882		if (tpte & PG_W) {
2883			npv = TAILQ_NEXT(pv, pv_plist);
2884			continue;
2885		}
2886		*pte = 0;
2887
2888		ppv = pa_to_pvh(tpte);
2889
2890		pv->pv_pmap->pm_stats.resident_count--;
2891
2892		/*
2893		 * Update the vm_page_t clean and reference bits.
2894		 */
2895		if (tpte & PG_M) {
2896			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2897		}
2898
2899
2900		npv = TAILQ_NEXT(pv, pv_plist);
2901		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2902
2903		--ppv->pv_list_count;
2904		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2905		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2906			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
2907		}
2908
2909		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2910		free_pv_entry(pv);
2911	}
2912	splx(s);
2913	invltlb();
2914}
2915
2916/*
2917 * pmap_testbit tests bits in pte's
2918 * note that the testbit/changebit routines are inline,
2919 * and a lot of things compile-time evaluate.
2920 */
2921static boolean_t
2922pmap_testbit(pa, bit)
2923	register vm_offset_t pa;
2924	int bit;
2925{
2926	register pv_entry_t pv;
2927	pv_table_t *ppv;
2928	unsigned *pte;
2929	int s;
2930
2931	if (!pmap_is_managed(pa))
2932		return FALSE;
2933
2934	ppv = pa_to_pvh(pa);
2935	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
2936		return FALSE;
2937
2938	s = splvm();
2939
2940	for (pv = TAILQ_FIRST(&ppv->pv_list);
2941		pv;
2942		pv = TAILQ_NEXT(pv, pv_list)) {
2943
2944		/*
2945		 * if the bit being tested is the modified bit, then
2946		 * mark clean_map and ptes as never
2947		 * modified.
2948		 */
2949		if (bit & (PG_A|PG_M)) {
2950			if (!pmap_track_modified(pv->pv_va))
2951				continue;
2952		}
2953
2954#if defined(PMAP_DIAGNOSTIC)
2955		if (!pv->pv_pmap) {
2956			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
2957			continue;
2958		}
2959#endif
2960		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2961		if (*pte & bit) {
2962			splx(s);
2963			return TRUE;
2964		}
2965	}
2966	splx(s);
2967	return (FALSE);
2968}
2969
2970/*
2971 * this routine is used to modify bits in ptes
2972 */
2973static void
2974pmap_changebit(pa, bit, setem)
2975	vm_offset_t pa;
2976	int bit;
2977	boolean_t setem;
2978{
2979	register pv_entry_t pv;
2980	pv_table_t *ppv;
2981	register unsigned *pte;
2982	int changed;
2983	int s;
2984
2985	if (!pmap_is_managed(pa))
2986		return;
2987
2988	s = splvm();
2989	changed = 0;
2990	ppv = pa_to_pvh(pa);
2991
2992	/*
2993	 * Loop over all current mappings setting/clearing as appropos If
2994	 * setting RO do we need to clear the VAC?
2995	 */
2996	for (pv = TAILQ_FIRST(&ppv->pv_list);
2997		pv;
2998		pv = TAILQ_NEXT(pv, pv_list)) {
2999
3000		/*
3001		 * don't write protect pager mappings
3002		 */
3003		if (!setem && (bit == PG_RW)) {
3004			if (!pmap_track_modified(pv->pv_va))
3005				continue;
3006		}
3007
3008#if defined(PMAP_DIAGNOSTIC)
3009		if (!pv->pv_pmap) {
3010			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
3011			continue;
3012		}
3013#endif
3014
3015		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3016
3017		if (setem) {
3018			*(int *)pte |= bit;
3019			changed = 1;
3020		} else {
3021			vm_offset_t pbits = *(vm_offset_t *)pte;
3022			if (pbits & bit) {
3023				changed = 1;
3024				if (bit == PG_RW) {
3025					if (pbits & PG_M) {
3026						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
3027					}
3028					*(int *)pte = pbits & ~(PG_M|PG_RW);
3029				} else {
3030					*(int *)pte = pbits & ~bit;
3031				}
3032			}
3033		}
3034	}
3035	splx(s);
3036	if (changed)
3037		invltlb();
3038}
3039
3040/*
3041 *      pmap_page_protect:
3042 *
3043 *      Lower the permission for all mappings to a given page.
3044 */
3045void
3046pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
3047{
3048	if ((prot & VM_PROT_WRITE) == 0) {
3049		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
3050			pmap_changebit(phys, PG_RW, FALSE);
3051		} else {
3052			pmap_remove_all(phys);
3053		}
3054	}
3055}
3056
3057vm_offset_t
3058pmap_phys_address(ppn)
3059	int ppn;
3060{
3061	return (i386_ptob(ppn));
3062}
3063
3064/*
3065 *	pmap_ts_referenced:
3066 *
3067 *	Return the count of reference bits for a page, clearing all of them.
3068 *
3069 */
3070int
3071pmap_ts_referenced(vm_offset_t pa)
3072{
3073	register pv_entry_t pv;
3074	pv_table_t *ppv;
3075	unsigned *pte;
3076	int s;
3077	int rtval = 0;
3078
3079	if (!pmap_is_managed(pa))
3080		return FALSE;
3081
3082	s = splvm();
3083
3084	ppv = pa_to_pvh(pa);
3085
3086	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3087		splx(s);
3088		return 0;
3089	}
3090
3091	/*
3092	 * Not found, check current mappings returning immediately if found.
3093	 */
3094	for (pv = TAILQ_FIRST(&ppv->pv_list);
3095		pv;
3096		pv = TAILQ_NEXT(pv, pv_list)) {
3097		/*
3098		 * if the bit being tested is the modified bit, then
3099		 * mark clean_map and ptes as never
3100		 * modified.
3101		 */
3102		if (!pmap_track_modified(pv->pv_va))
3103			continue;
3104
3105		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3106		if (pte == NULL) {
3107			continue;
3108		}
3109		if (*pte & PG_A) {
3110			rtval++;
3111			*pte &= ~PG_A;
3112		}
3113	}
3114	splx(s);
3115	if (rtval) {
3116		invltlb();
3117	}
3118	return (rtval);
3119}
3120
3121/*
3122 *	pmap_is_modified:
3123 *
3124 *	Return whether or not the specified physical page was modified
3125 *	in any physical maps.
3126 */
3127boolean_t
3128pmap_is_modified(vm_offset_t pa)
3129{
3130	return pmap_testbit((pa), PG_M);
3131}
3132
3133/*
3134 *	Clear the modify bits on the specified physical page.
3135 */
3136void
3137pmap_clear_modify(vm_offset_t pa)
3138{
3139	pmap_changebit((pa), PG_M, FALSE);
3140}
3141
3142/*
3143 *	pmap_clear_reference:
3144 *
3145 *	Clear the reference bit on the specified physical page.
3146 */
3147void
3148pmap_clear_reference(vm_offset_t pa)
3149{
3150	pmap_changebit((pa), PG_A, FALSE);
3151}
3152
3153/*
3154 * Miscellaneous support routines follow
3155 */
3156
3157static void
3158i386_protection_init()
3159{
3160	register int *kp, prot;
3161
3162	kp = protection_codes;
3163	for (prot = 0; prot < 8; prot++) {
3164		switch (prot) {
3165		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3166			/*
3167			 * Read access is also 0. There isn't any execute bit,
3168			 * so just make it readable.
3169			 */
3170		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3171		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3172		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3173			*kp++ = 0;
3174			break;
3175		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3176		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3177		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3178		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3179			*kp++ = PG_RW;
3180			break;
3181		}
3182	}
3183}
3184
3185/*
3186 * Map a set of physical memory pages into the kernel virtual
3187 * address space. Return a pointer to where it is mapped. This
3188 * routine is intended to be used for mapping device memory,
3189 * NOT real memory.
3190 */
3191void *
3192pmap_mapdev(pa, size)
3193	vm_offset_t pa;
3194	vm_size_t size;
3195{
3196	vm_offset_t va, tmpva;
3197	unsigned *pte;
3198
3199	size = roundup(size, PAGE_SIZE);
3200
3201	va = kmem_alloc_pageable(kernel_map, size);
3202	if (!va)
3203		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3204
3205	pa = pa & PG_FRAME;
3206	for (tmpva = va; size > 0;) {
3207		pte = (unsigned *)vtopte(tmpva);
3208		*pte = pa | PG_RW | PG_V | pgeflag;
3209		size -= PAGE_SIZE;
3210		tmpva += PAGE_SIZE;
3211		pa += PAGE_SIZE;
3212	}
3213	invltlb();
3214
3215	return ((void *) va);
3216}
3217
3218/*
3219 * perform the pmap work for mincore
3220 */
3221int
3222pmap_mincore(pmap, addr)
3223	pmap_t pmap;
3224	vm_offset_t addr;
3225{
3226
3227	unsigned *ptep, pte;
3228	int val = 0;
3229
3230	ptep = pmap_pte(pmap, addr);
3231	if (ptep == 0) {
3232		return 0;
3233	}
3234
3235	if (pte = *ptep) {
3236		vm_offset_t pa;
3237		val = MINCORE_INCORE;
3238		pa = pte & PG_FRAME;
3239
3240		/*
3241		 * Modified by us
3242		 */
3243		if (pte & PG_M)
3244			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3245		/*
3246		 * Modified by someone
3247		 */
3248		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
3249			pmap_is_modified(pa))
3250			val |= MINCORE_MODIFIED_OTHER;
3251		/*
3252		 * Referenced by us
3253		 */
3254		if (pte & PG_U)
3255			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3256
3257		/*
3258		 * Referenced by someone
3259		 */
3260		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
3261			pmap_ts_referenced(pa)) {
3262			val |= MINCORE_REFERENCED_OTHER;
3263			PHYS_TO_VM_PAGE(pa)->flags |= PG_REFERENCED;
3264		}
3265	}
3266	return val;
3267}
3268
3269void
3270pmap_activate(struct proc *p)
3271{
3272#if defined(SWTCH_OPTIM_STATS)
3273	++tlb_flush_count;
3274#endif
3275	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3276		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3277}
3278
3279vm_offset_t
3280pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3281
3282	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3283		return addr;
3284	}
3285
3286	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3287	return addr;
3288}
3289
3290
3291#if defined(PMAP_DEBUG)
3292pmap_pid_dump(int pid) {
3293	pmap_t pmap;
3294	struct proc *p;
3295	int npte = 0;
3296	int index;
3297	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3298		if (p->p_pid != pid)
3299			continue;
3300
3301		if (p->p_vmspace) {
3302			int i,j;
3303			index = 0;
3304			pmap = &p->p_vmspace->vm_pmap;
3305			for(i=0;i<1024;i++) {
3306				pd_entry_t *pde;
3307				unsigned *pte;
3308				unsigned base = i << PDRSHIFT;
3309
3310				pde = &pmap->pm_pdir[i];
3311				if (pde && pmap_pde_v(pde)) {
3312					for(j=0;j<1024;j++) {
3313						unsigned va = base + (j << PAGE_SHIFT);
3314						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3315							if (index) {
3316								index = 0;
3317								printf("\n");
3318							}
3319							return npte;
3320						}
3321						pte = pmap_pte_quick( pmap, va);
3322						if (pte && pmap_pte_v(pte)) {
3323							vm_offset_t pa;
3324							vm_page_t m;
3325							pa = *(int *)pte;
3326							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3327							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3328								va, pa, m->hold_count, m->wire_count, m->flags);
3329							npte++;
3330							index++;
3331							if (index >= 2) {
3332								index = 0;
3333								printf("\n");
3334							} else {
3335								printf(" ");
3336							}
3337						}
3338					}
3339				}
3340			}
3341		}
3342	}
3343	return npte;
3344}
3345#endif
3346
3347#if defined(DEBUG)
3348
3349static void	pads __P((pmap_t pm));
3350static void	pmap_pvdump __P((vm_offset_t pa));
3351
3352/* print address space of pmap*/
3353static void
3354pads(pm)
3355	pmap_t pm;
3356{
3357	unsigned va, i, j;
3358	unsigned *ptep;
3359
3360	if (pm == kernel_pmap)
3361		return;
3362	for (i = 0; i < 1024; i++)
3363		if (pm->pm_pdir[i])
3364			for (j = 0; j < 1024; j++) {
3365				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3366				if (pm == kernel_pmap && va < KERNBASE)
3367					continue;
3368				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3369					continue;
3370				ptep = pmap_pte_quick(pm, va);
3371				if (pmap_pte_v(ptep))
3372					printf("%x:%x ", va, *(int *) ptep);
3373			};
3374
3375}
3376
3377static void
3378pmap_pvdump(pa)
3379	vm_offset_t pa;
3380{
3381	pv_table_t *ppv;
3382	register pv_entry_t pv;
3383
3384	printf("pa %x", pa);
3385	ppv = pa_to_pvh(pa);
3386	for (pv = TAILQ_FIRST(&ppv->pv_list);
3387		pv;
3388		pv = TAILQ_NEXT(pv, pv_list)) {
3389#ifdef used_to_be
3390		printf(" -> pmap %x, va %x, flags %x",
3391		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
3392#endif
3393		printf(" -> pmap %x, va %x",
3394		    pv->pv_pmap, pv->pv_va);
3395		pads(pv->pv_pmap);
3396	}
3397	printf(" ");
3398}
3399#endif
3400