pmap.c revision 30309
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.162 1997/09/21 05:50:02 dyson Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_cpu.h"
72
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/msgbuf.h>
78#include <sys/vmmeter.h>
79#include <sys/mman.h>
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_prot.h>
84#include <sys/lock.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/vm_zone.h>
93
94#include <sys/user.h>
95
96#include <machine/cpu.h>
97#include <machine/cputypes.h>
98#include <machine/md_var.h>
99#include <machine/specialreg.h>
100#if defined(SMP) || defined(APIC_IO)
101#include <machine/smp.h>
102#include <machine/apic.h>
103#endif /* SMP || APIC_IO */
104
105#define PMAP_KEEP_PDIRS
106#ifndef PMAP_SHPGPERPROC
107#define PMAP_SHPGPERPROC 200
108#endif
109
110#if defined(DIAGNOSTIC)
111#define PMAP_DIAGNOSTIC
112#endif
113
114#if !defined(PMAP_DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#define PTPHINT
121
122/*
123 * Get PDEs and PTEs for user/kernel address space
124 */
125#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
126#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
127
128#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
129#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
130#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
131#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
132#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
133
134#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
135#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
136
137/*
138 * Given a map and a machine independent protection code,
139 * convert to a vax protection code.
140 */
141#define pte_prot(m, p)	(protection_codes[p])
142static int protection_codes[8];
143
144#define	pa_index(pa)		atop((pa) - vm_first_phys)
145#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
146
147static struct pmap kernel_pmap_store;
148pmap_t kernel_pmap;
149extern pd_entry_t my_idlePTD;
150
151vm_offset_t avail_start;	/* PA of first available physical page */
152vm_offset_t avail_end;		/* PA of last available physical page */
153vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
154vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
155static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
156static vm_offset_t vm_first_phys;
157int pgeflag;		/* PG_G or-in */
158int pseflag;		/* PG_PS or-in */
159int pv_npg;
160
161int nkpt;
162vm_offset_t kernel_vm_end;
163
164extern vm_offset_t clean_sva, clean_eva;
165
166/*
167 * Data for the pv entry allocation mechanism
168 */
169vm_zone_t pvzone;
170struct vm_zone pvzone_store;
171struct vm_object pvzone_obj;
172#define NPVINIT 8192
173struct pv_entry pvinit[NPVINIT];
174
175/*
176 * All those kernel PT submaps that BSD is so fond of
177 */
178pt_entry_t *CMAP1 = 0;
179static pt_entry_t *CMAP2, *ptmmap;
180static pv_table_t *pv_table;
181caddr_t CADDR1 = 0, ptvmmap = 0;
182static caddr_t CADDR2;
183static pt_entry_t *msgbufmap;
184struct msgbuf *msgbufp=0;
185
186#ifdef SMP
187extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
188extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
189extern pd_entry_t *IdlePTDS[];
190extern pt_entry_t SMP_prvpt[];
191#endif
192
193pt_entry_t *PMAP1 = 0;
194unsigned *PADDR1 = 0;
195
196static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
197static unsigned * get_ptbase __P((pmap_t pmap));
198static pv_entry_t get_pv_entry __P((void));
199static void	i386_protection_init __P((void));
200static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
201
202static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
203static void	pmap_remove_all __P((vm_offset_t pa));
204static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
205				      vm_offset_t pa, vm_page_t mpte));
206static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
207					vm_offset_t sva));
208static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
209static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
210					vm_offset_t va));
211static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
212static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
213		vm_page_t mpte, vm_offset_t pa));
214
215static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
216
217static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
218static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
219static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
220static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
221static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
222static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
223vm_offset_t pmap_kmem_choose(vm_offset_t addr) ;
224
225#define PDSTACKMAX 6
226static vm_offset_t pdstack[PDSTACKMAX];
227static int pdstackptr;
228unsigned pdir4mb;
229
230/*
231 *	Routine:	pmap_pte
232 *	Function:
233 *		Extract the page table entry associated
234 *		with the given map/virtual_address pair.
235 */
236
237PMAP_INLINE unsigned *
238pmap_pte(pmap, va)
239	register pmap_t pmap;
240	vm_offset_t va;
241{
242	unsigned *pdeaddr;
243
244	if (pmap) {
245		pdeaddr = (unsigned *) pmap_pde(pmap, va);
246		if (*pdeaddr & PG_PS)
247			return pdeaddr;
248		if (*pdeaddr) {
249			return get_ptbase(pmap) + i386_btop(va);
250		}
251	}
252	return (0);
253}
254
255/*
256 * Move the kernel virtual free pointer to the next
257 * 4MB.  This is used to help improve performance
258 * by using a large (4MB) page for much of the kernel
259 * (.text, .data, .bss)
260 */
261vm_offset_t
262pmap_kmem_choose(vm_offset_t addr) {
263	vm_offset_t newaddr = addr;
264#ifndef DISABLE_PSE
265	if (cpu_feature & CPUID_PSE) {
266		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
267	}
268#endif
269	return newaddr;
270}
271
272/*
273 *	Bootstrap the system enough to run with virtual memory.
274 *
275 *	On the i386 this is called after mapping has already been enabled
276 *	and just syncs the pmap module with what has already been done.
277 *	[We can't call it easily with mapping off since the kernel is not
278 *	mapped with PA == VA, hence we would have to relocate every address
279 *	from the linked base (virtual) address "KERNBASE" to the actual
280 *	(physical) address starting relative to 0]
281 */
282void
283pmap_bootstrap(firstaddr, loadaddr)
284	vm_offset_t firstaddr;
285	vm_offset_t loadaddr;
286{
287	vm_offset_t va;
288	pt_entry_t *pte;
289	int i, j;
290
291	avail_start = firstaddr;
292
293	/*
294	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
295	 * large. It should instead be correctly calculated in locore.s and
296	 * not based on 'first' (which is a physical address, not a virtual
297	 * address, for the start of unused physical memory). The kernel
298	 * page tables are NOT double mapped and thus should not be included
299	 * in this calculation.
300	 */
301	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
302	virtual_avail = pmap_kmem_choose(virtual_avail);
303
304	virtual_end = VM_MAX_KERNEL_ADDRESS;
305
306	/*
307	 * Initialize protection array.
308	 */
309	i386_protection_init();
310
311	/*
312	 * The kernel's pmap is statically allocated so we don't have to use
313	 * pmap_create, which is unlikely to work correctly at this part of
314	 * the boot sequence (XXX and which no longer exists).
315	 */
316	kernel_pmap = &kernel_pmap_store;
317
318	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
319
320	kernel_pmap->pm_count = 1;
321	TAILQ_INIT(&kernel_pmap->pm_pvlist);
322	nkpt = NKPT;
323
324	/*
325	 * Reserve some special page table entries/VA space for temporary
326	 * mapping of pages.
327	 */
328#define	SYSMAP(c, p, v, n)	\
329	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
330
331	va = virtual_avail;
332	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
333
334	/*
335	 * CMAP1/CMAP2 are used for zeroing and copying pages.
336	 */
337	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
338	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
339
340	/*
341	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
342	 * XXX ptmmap is not used.
343	 */
344	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
345
346	/*
347	 * msgbufp is used to map the system message buffer.
348	 * XXX msgbufmap is not used.
349	 */
350	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
351	       atop(round_page(sizeof(struct msgbuf))))
352
353	/*
354	 * ptemap is used for pmap_pte_quick
355	 */
356	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
357
358	virtual_avail = va;
359
360	*(int *) CMAP1 = *(int *) CMAP2 = 0;
361	*(int *) PTD = 0;
362
363
364	pgeflag = 0;
365#if !defined(SMP)
366	if (cpu_feature & CPUID_PGE) {
367		pgeflag = PG_G;
368	}
369#endif
370
371/*
372 * Initialize the 4MB page size flag
373 */
374	pseflag = 0;
375/*
376 * The 4MB page version of the initial
377 * kernel page mapping.
378 */
379	pdir4mb = 0;
380
381#if !defined(DISABLE_PSE)
382	if (cpu_feature & CPUID_PSE) {
383		unsigned ptditmp;
384		/*
385		 * Enable the PSE mode
386		 */
387		load_cr4(rcr4() | CR4_PSE);
388
389		/*
390		 * Note that we have enabled PSE mode
391		 */
392		pseflag = PG_PS;
393		ptditmp = (unsigned) kernel_pmap->pm_pdir[KPTDI];
394		ptditmp &= ~(NBPDR - 1);
395		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
396		pdir4mb = ptditmp;
397		/*
398		 * We can do the mapping here for the single processor
399		 * case.  We simply ignore the old page table page from
400		 * now on.
401		 */
402#if !defined(SMP)
403		PTD[KPTDI] = (pd_entry_t) ptditmp;
404		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
405		invltlb();
406#endif
407	}
408#endif
409
410#ifdef SMP
411	if (cpu_apic_address == 0)
412		panic("pmap_bootstrap: no local apic!");
413
414	/* 0 = private page */
415	/* 1 = page table page */
416	/* 2 = local apic */
417	/* 16-31 = io apics */
418	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME));
419
420	for (i = 0; i < mp_napics; i++) {
421		for (j = 0; j < 16; j++) {
422			/* same page frame as a previous IO apic? */
423			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) ==
424			    ((u_long)io_apic_address[0] & PG_FRAME)) {
425				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
426				break;
427			}
428			/* use this slot if available */
429			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
430				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
431				    ((u_long)io_apic_address[i] & PG_FRAME));
432				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
433				break;
434			}
435		}
436		if (j == 16)
437			panic("no space to map IO apic %d!", i);
438	}
439
440	/* BSP does this itself, AP's get it pre-set */
441	prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[5];
442	prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[6];
443	prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[7];
444#endif
445
446	invltlb();
447
448}
449
450/*
451 * Set 4mb pdir for mp startup, and global flags
452 */
453void
454pmap_set_opt(unsigned *pdir) {
455	int i;
456
457	if (pseflag && (cpu_feature & CPUID_PSE)) {
458		load_cr4(rcr4() | CR4_PSE);
459		if (pdir4mb) {
460			(unsigned) pdir[KPTDI] = pdir4mb;
461		}
462	}
463
464	if (pgeflag && (cpu_feature & CPUID_PGE)) {
465		load_cr4(rcr4() | CR4_PGE);
466		for(i = KPTDI; i < KPTDI + nkpt; i++) {
467			if (pdir[i]) {
468				pdir[i] |= PG_G;
469			}
470		}
471	}
472}
473
474/*
475 * Setup the PTD for the boot processor
476 */
477void
478pmap_set_opt_bsp(void)
479{
480	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
481	pmap_set_opt((unsigned *)PTD);
482	invltlb();
483}
484
485/*
486 *	Initialize the pmap module.
487 *	Called by vm_init, to initialize any structures that the pmap
488 *	system needs to map virtual memory.
489 *	pmap_init has been enhanced to support in a fairly consistant
490 *	way, discontiguous physical memory.
491 */
492void
493pmap_init(phys_start, phys_end)
494	vm_offset_t phys_start, phys_end;
495{
496	vm_offset_t addr;
497	vm_size_t s;
498	int i;
499
500	/*
501	 * calculate the number of pv_entries needed
502	 */
503	vm_first_phys = phys_avail[0];
504	for (i = 0; phys_avail[i + 1]; i += 2);
505	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
506
507	/*
508	 * Allocate memory for random pmap data structures.  Includes the
509	 * pv_head_table.
510	 */
511	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
512	s = round_page(s);
513
514	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
515	pv_table = (pv_table_t *) addr;
516	for(i = 0; i < pv_npg; i++) {
517		vm_offset_t pa;
518		TAILQ_INIT(&pv_table[i].pv_list);
519		pv_table[i].pv_list_count = 0;
520		pa = vm_first_phys + i * PAGE_SIZE;
521		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
522	}
523
524	/*
525	 * init the pv free list
526	 */
527	pvzone = &pvzone_store;
528	zbootinit(pvzone, "PV ENTRY", sizeof(pvinit[0]), pvinit, NPVINIT);
529
530	/*
531	 * Now it is safe to enable pv_table recording.
532	 */
533	pmap_initialized = TRUE;
534}
535
536void
537pmap_init2() {
538	zinitna(pvzone, &pvzone_obj, NULL, 0,
539		PMAP_SHPGPERPROC * maxproc + pv_npg, ZONE_INTERRUPT, 4);
540}
541
542/*
543 *	Used to map a range of physical addresses into kernel
544 *	virtual address space.
545 *
546 *	For now, VM is already on, we only need to map the
547 *	specified memory.
548 */
549vm_offset_t
550pmap_map(virt, start, end, prot)
551	vm_offset_t virt;
552	vm_offset_t start;
553	vm_offset_t end;
554	int prot;
555{
556	while (start < end) {
557		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
558		virt += PAGE_SIZE;
559		start += PAGE_SIZE;
560	}
561	return (virt);
562}
563
564
565/***************************************************
566 * Low level helper routines.....
567 ***************************************************/
568
569#if defined(PMAP_DIAGNOSTIC)
570
571/*
572 * This code checks for non-writeable/modified pages.
573 * This should be an invalid condition.
574 */
575static int
576pmap_nw_modified(pt_entry_t ptea) {
577	int pte;
578
579	pte = (int) ptea;
580
581	if ((pte & (PG_M|PG_RW)) == PG_M)
582		return 1;
583	else
584		return 0;
585}
586#endif
587
588
589/*
590 * this routine defines the region(s) of memory that should
591 * not be tested for the modified bit.
592 */
593static PMAP_INLINE int
594pmap_track_modified( vm_offset_t va) {
595	if ((va < clean_sva) || (va >= clean_eva))
596		return 1;
597	else
598		return 0;
599}
600
601static PMAP_INLINE void
602invltlb_1pg( vm_offset_t va) {
603#if defined(I386_CPU)
604	if (cpu_class == CPUCLASS_386) {
605		invltlb();
606	} else
607#endif
608	{
609		invlpg(va);
610	}
611}
612
613static PMAP_INLINE void
614invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
615#if defined(I386_CPU)
616	if (cpu_class == CPUCLASS_386) {
617		invltlb();
618	} else
619#endif
620	{
621		invlpg(va1);
622		invlpg(va2);
623	}
624}
625
626static unsigned *
627get_ptbase(pmap)
628	pmap_t pmap;
629{
630	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
631
632	/* are we current address space or kernel? */
633	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
634		return (unsigned *) PTmap;
635	}
636	/* otherwise, we are alternate address space */
637	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
638		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
639		invltlb();
640	}
641	return (unsigned *) APTmap;
642}
643
644/*
645 * Super fast pmap_pte routine best used when scanning
646 * the pv lists.  This eliminates many coarse-grained
647 * invltlb calls.  Note that many of the pv list
648 * scans are across different pmaps.  It is very wasteful
649 * to do an entire invltlb for checking a single mapping.
650 */
651
652static unsigned *
653pmap_pte_quick(pmap, va)
654	register pmap_t pmap;
655	vm_offset_t va;
656{
657	unsigned pde, newpf;
658	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
659		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
660		unsigned index = i386_btop(va);
661		/* are we current address space or kernel? */
662		if ((pmap == kernel_pmap) ||
663			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
664			return (unsigned *) PTmap + index;
665		}
666		newpf = pde & PG_FRAME;
667		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
668			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
669			invltlb_1pg((vm_offset_t) PADDR1);
670		}
671		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
672	}
673	return (0);
674}
675
676/*
677 *	Routine:	pmap_extract
678 *	Function:
679 *		Extract the physical page address associated
680 *		with the given map/virtual_address pair.
681 */
682vm_offset_t
683pmap_extract(pmap, va)
684	register pmap_t pmap;
685	vm_offset_t va;
686{
687	vm_offset_t rtval;
688	vm_offset_t pdirindex;
689	pdirindex = va >> PDRSHIFT;
690	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
691		unsigned *pte;
692		if ((rtval & PG_PS) != 0) {
693			rtval &= ~(NBPDR - 1);
694			rtval |= va & (NBPDR - 1);
695			return rtval;
696		}
697		pte = get_ptbase(pmap) + i386_btop(va);
698		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
699		return rtval;
700	}
701	return 0;
702
703}
704
705/*
706 * determine if a page is managed (memory vs. device)
707 */
708static PMAP_INLINE int
709pmap_is_managed(pa)
710	vm_offset_t pa;
711{
712	int i;
713
714	if (!pmap_initialized)
715		return 0;
716
717	for (i = 0; phys_avail[i + 1]; i += 2) {
718		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
719			return 1;
720	}
721	return 0;
722}
723
724
725/***************************************************
726 * Low level mapping routines.....
727 ***************************************************/
728
729/*
730 * Add a list of wired pages to the kva
731 * this routine is only used for temporary
732 * kernel mappings that do not need to have
733 * page modification or references recorded.
734 * Note that old mappings are simply written
735 * over.  The page *must* be wired.
736 */
737void
738pmap_qenter(va, m, count)
739	vm_offset_t va;
740	vm_page_t *m;
741	int count;
742{
743	int i;
744	register unsigned *pte;
745
746	for (i = 0; i < count; i++) {
747		vm_offset_t tva = va + i * PAGE_SIZE;
748		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
749		unsigned opte;
750		pte = (unsigned *)vtopte(tva);
751		opte = *pte;
752		*pte = npte;
753		if (opte)
754			invltlb_1pg(tva);
755	}
756}
757
758/*
759 * this routine jerks page mappings from the
760 * kernel -- it is meant only for temporary mappings.
761 */
762void
763pmap_qremove(va, count)
764	vm_offset_t va;
765	int count;
766{
767	int i;
768	register unsigned *pte;
769
770	for (i = 0; i < count; i++) {
771		pte = (unsigned *)vtopte(va);
772		*pte = 0;
773		invltlb_1pg(va);
774		va += PAGE_SIZE;
775	}
776}
777
778/*
779 * add a wired page to the kva
780 * note that in order for the mapping to take effect -- you
781 * should do a invltlb after doing the pmap_kenter...
782 */
783PMAP_INLINE void
784pmap_kenter(va, pa)
785	vm_offset_t va;
786	register vm_offset_t pa;
787{
788	register unsigned *pte;
789	unsigned npte, opte;
790
791	npte = pa | PG_RW | PG_V | pgeflag;
792	pte = (unsigned *)vtopte(va);
793	opte = *pte;
794	*pte = npte;
795	if (opte)
796		invltlb_1pg(va);
797}
798
799/*
800 * remove a page from the kernel pagetables
801 */
802PMAP_INLINE void
803pmap_kremove(va)
804	vm_offset_t va;
805{
806	register unsigned *pte;
807
808	pte = (unsigned *)vtopte(va);
809	*pte = 0;
810	invltlb_1pg(va);
811}
812
813static vm_page_t
814pmap_page_alloc(object, pindex)
815	vm_object_t object;
816	vm_pindex_t pindex;
817{
818	vm_page_t m;
819	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
820	if (m == NULL) {
821		VM_WAIT;
822	}
823	return m;
824}
825
826static vm_page_t
827pmap_page_lookup(object, pindex)
828	vm_object_t object;
829	vm_pindex_t pindex;
830{
831	vm_page_t m;
832retry:
833	m = vm_page_lookup(object, pindex);
834	if (m) {
835		if (m->flags & PG_BUSY) {
836			m->flags |= PG_WANTED;
837			tsleep(m, PVM, "pplookp", 0);
838			goto retry;
839		}
840	}
841
842	return m;
843}
844
845/*
846 * Create the UPAGES for a new process.
847 * This routine directly affects the fork perf for a process.
848 */
849void
850pmap_new_proc(p)
851	struct proc *p;
852{
853	int i;
854	vm_object_t upobj;
855	vm_page_t m;
856	struct user *up;
857	unsigned *ptek;
858
859	/*
860	 * allocate object for the upages
861	 */
862	upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
863	p->p_upages_obj = upobj;
864
865	/* get a kernel virtual address for the UPAGES for this proc */
866	up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE);
867	if (up == NULL)
868		panic("pmap_new_proc: u_map allocation failed");
869
870	ptek = (unsigned *) vtopte((vm_offset_t) up);
871
872	for(i=0;i<UPAGES;i++) {
873		/*
874		 * Get a kernel stack page
875		 */
876		while ((m = vm_page_alloc(upobj,
877			i, VM_ALLOC_NORMAL)) == NULL) {
878			VM_WAIT;
879		}
880
881		/*
882		 * Wire the page
883		 */
884		m->wire_count++;
885		++cnt.v_wire_count;
886
887		/*
888		 * Enter the page into the kernel address space.
889		 */
890		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
891
892		m->flags &= ~(PG_ZERO|PG_BUSY);
893		m->flags |= PG_MAPPED|PG_WRITEABLE;
894		m->valid = VM_PAGE_BITS_ALL;
895	}
896
897	p->p_addr = up;
898}
899
900/*
901 * Dispose the UPAGES for a process that has exited.
902 * This routine directly impacts the exit perf of a process.
903 */
904void
905pmap_dispose_proc(p)
906	struct proc *p;
907{
908	int i;
909	vm_object_t upobj;
910	vm_page_t m;
911	unsigned *ptek;
912
913	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
914
915	upobj = p->p_upages_obj;
916
917	for(i=0;i<UPAGES;i++) {
918		unsigned oldpte;
919		if ((m = vm_page_lookup(upobj, i)) == NULL)
920			panic("pmap_dispose_proc: upage already missing???");
921		oldpte = *(ptek + i);
922		*(ptek + i) = 0;
923		if (oldpte & PG_G)
924			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
925		vm_page_unwire(m);
926		vm_page_free(m);
927	}
928
929	vm_object_deallocate(upobj);
930
931	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
932}
933
934/*
935 * Allow the UPAGES for a process to be prejudicially paged out.
936 */
937void
938pmap_swapout_proc(p)
939	struct proc *p;
940{
941	int i;
942	vm_object_t upobj;
943	vm_page_t m;
944
945	upobj = p->p_upages_obj;
946	/*
947	 * let the upages be paged
948	 */
949	for(i=0;i<UPAGES;i++) {
950		if ((m = vm_page_lookup(upobj, i)) == NULL)
951			panic("pmap_swapout_proc: upage already missing???");
952		m->dirty = VM_PAGE_BITS_ALL;
953		vm_page_unwire(m);
954		vm_page_deactivate(m);
955		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
956	}
957}
958
959/*
960 * Bring the UPAGES for a specified process back in.
961 */
962void
963pmap_swapin_proc(p)
964	struct proc *p;
965{
966	int i;
967	vm_object_t upobj;
968	vm_page_t m;
969	unsigned *pte;
970
971	upobj = p->p_upages_obj;
972	for(i=0;i<UPAGES;i++) {
973		int s;
974		s = splvm();
975retry:
976		if ((m = vm_page_lookup(upobj, i)) == NULL) {
977			if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) {
978				VM_WAIT;
979				goto retry;
980			}
981		} else {
982			if ((m->flags & PG_BUSY) || m->busy) {
983				m->flags |= PG_WANTED;
984				tsleep(m, PVM, "swinuw",0);
985				goto retry;
986			}
987			m->flags |= PG_BUSY;
988		}
989		vm_page_wire(m);
990		splx(s);
991
992		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
993			VM_PAGE_TO_PHYS(m));
994
995		if (m->valid != VM_PAGE_BITS_ALL) {
996			int rv;
997			rv = vm_pager_get_pages(upobj, &m, 1, 0);
998			if (rv != VM_PAGER_OK)
999				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1000			m->valid = VM_PAGE_BITS_ALL;
1001		}
1002		PAGE_WAKEUP(m);
1003		m->flags |= PG_MAPPED|PG_WRITEABLE;
1004	}
1005}
1006
1007/***************************************************
1008 * Page table page management routines.....
1009 ***************************************************/
1010
1011/*
1012 * This routine unholds page table pages, and if the hold count
1013 * drops to zero, then it decrements the wire count.
1014 */
1015static int
1016_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1017	int s;
1018
1019	if (m->flags & PG_BUSY) {
1020		s = splvm();
1021		while (m->flags & PG_BUSY) {
1022			m->flags |= PG_WANTED;
1023			tsleep(m, PVM, "pmuwpt", 0);
1024		}
1025		splx(s);
1026	}
1027
1028	if (m->hold_count == 0) {
1029		vm_offset_t pteva;
1030		/*
1031		 * unmap the page table page
1032		 */
1033		pmap->pm_pdir[m->pindex] = 0;
1034		--pmap->pm_stats.resident_count;
1035		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1036			(((unsigned) PTDpde) & PG_FRAME)) {
1037			/*
1038			 * Do a invltlb to make the invalidated mapping
1039			 * take effect immediately.
1040			 */
1041			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1042			invltlb_1pg(pteva);
1043		}
1044
1045#if defined(PTPHINT)
1046		if (pmap->pm_ptphint == m)
1047			pmap->pm_ptphint = NULL;
1048#endif
1049
1050		/*
1051		 * If the page is finally unwired, simply free it.
1052		 */
1053		--m->wire_count;
1054		if (m->wire_count == 0) {
1055
1056			if (m->flags & PG_WANTED) {
1057				m->flags &= ~PG_WANTED;
1058				wakeup(m);
1059			}
1060
1061			vm_page_free_zero(m);
1062			--cnt.v_wire_count;
1063		}
1064		return 1;
1065	}
1066	return 0;
1067}
1068
1069__inline static int
1070pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1071	vm_page_unhold(m);
1072	if (m->hold_count == 0)
1073		return _pmap_unwire_pte_hold(pmap, m);
1074	else
1075		return 0;
1076}
1077
1078/*
1079 * After removing a page table entry, this routine is used to
1080 * conditionally free the page, and manage the hold/wire counts.
1081 */
1082static int
1083pmap_unuse_pt(pmap, va, mpte)
1084	pmap_t pmap;
1085	vm_offset_t va;
1086	vm_page_t mpte;
1087{
1088	unsigned ptepindex;
1089	if (va >= UPT_MIN_ADDRESS)
1090		return 0;
1091
1092	if (mpte == NULL) {
1093		ptepindex = (va >> PDRSHIFT);
1094#if defined(PTPHINT)
1095		if (pmap->pm_ptphint &&
1096			(pmap->pm_ptphint->pindex == ptepindex)) {
1097			mpte = pmap->pm_ptphint;
1098		} else {
1099			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1100			pmap->pm_ptphint = mpte;
1101		}
1102#else
1103		mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1104#endif
1105	}
1106
1107	return pmap_unwire_pte_hold(pmap, mpte);
1108}
1109
1110/*
1111 * Initialize a preallocated and zeroed pmap structure,
1112 * such as one in a vmspace structure.
1113 */
1114void
1115pmap_pinit(pmap)
1116	register struct pmap *pmap;
1117{
1118	vm_page_t ptdpg;
1119	/*
1120	 * No need to allocate page table space yet but we do need a valid
1121	 * page directory table.
1122	 */
1123
1124	if (pdstackptr > 0) {
1125		--pdstackptr;
1126		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
1127	} else {
1128		pmap->pm_pdir =
1129			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1130	}
1131
1132	/*
1133	 * allocate object for the ptes
1134	 */
1135	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1136
1137	/*
1138	 * allocate the page directory page
1139	 */
1140retry:
1141	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
1142	if (ptdpg == NULL)
1143		goto retry;
1144
1145	ptdpg->wire_count = 1;
1146	++cnt.v_wire_count;
1147
1148	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
1149	ptdpg->valid = VM_PAGE_BITS_ALL;
1150
1151	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1152	if ((ptdpg->flags & PG_ZERO) == 0)
1153		bzero(pmap->pm_pdir, PAGE_SIZE);
1154
1155	/* wire in kernel global address entries */
1156	/* XXX copies current process, does not fill in MPPTDI */
1157	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1158
1159	/* install self-referential address mapping entry */
1160	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1161		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
1162
1163	pmap->pm_flags = 0;
1164	pmap->pm_count = 1;
1165	pmap->pm_ptphint = NULL;
1166	TAILQ_INIT(&pmap->pm_pvlist);
1167}
1168
1169static int
1170pmap_release_free_page(pmap, p)
1171	struct pmap *pmap;
1172	vm_page_t p;
1173{
1174	int s;
1175	unsigned *pde = (unsigned *) pmap->pm_pdir;
1176	/*
1177	 * This code optimizes the case of freeing non-busy
1178	 * page-table pages.  Those pages are zero now, and
1179	 * might as well be placed directly into the zero queue.
1180	 */
1181	s = splvm();
1182	if (p->flags & PG_BUSY) {
1183		p->flags |= PG_WANTED;
1184		tsleep(p, PVM, "pmaprl", 0);
1185		splx(s);
1186		return 0;
1187	}
1188
1189	if (p->flags & PG_WANTED) {
1190		p->flags &= ~PG_WANTED;
1191		wakeup(p);
1192	}
1193
1194	/*
1195	 * Remove the page table page from the processes address space.
1196	 */
1197	pde[p->pindex] = 0;
1198	--pmap->pm_stats.resident_count;
1199
1200	if (p->hold_count)  {
1201		panic("pmap_release: freeing held page table page");
1202	}
1203	/*
1204	 * Page directory pages need to have the kernel
1205	 * stuff cleared, so they can go into the zero queue also.
1206	 */
1207	if (p->pindex == PTDPTDI) {
1208		bzero(pde + KPTDI, nkpt * PTESIZE);
1209#ifdef SMP
1210		pde[MPPTDI] = 0;
1211#endif
1212		pde[APTDPTDI] = 0;
1213		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1214	}
1215
1216#if defined(PTPHINT)
1217	if (pmap->pm_ptphint &&
1218		(pmap->pm_ptphint->pindex == p->pindex))
1219		pmap->pm_ptphint = NULL;
1220#endif
1221
1222	vm_page_free_zero(p);
1223	splx(s);
1224	return 1;
1225}
1226
1227/*
1228 * this routine is called if the page table page is not
1229 * mapped correctly.
1230 */
1231static vm_page_t
1232_pmap_allocpte(pmap, ptepindex)
1233	pmap_t	pmap;
1234	unsigned ptepindex;
1235{
1236	vm_offset_t pteva, ptepa;
1237	vm_page_t m;
1238	int needszero = 0;
1239
1240	/*
1241	 * Find or fabricate a new pagetable page
1242	 */
1243retry:
1244	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
1245	if (m == NULL) {
1246		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
1247		if (m == NULL)
1248			goto retry;
1249		if ((m->flags & PG_ZERO) == 0)
1250			needszero = 1;
1251		m->flags &= ~(PG_ZERO|PG_BUSY);
1252		m->valid = VM_PAGE_BITS_ALL;
1253	} else {
1254		if ((m->flags & PG_BUSY) || m->busy) {
1255			m->flags |= PG_WANTED;
1256			tsleep(m, PVM, "ptewai", 0);
1257			goto retry;
1258		}
1259	}
1260
1261	if (m->queue != PQ_NONE) {
1262		int s = splvm();
1263		vm_page_unqueue(m);
1264		splx(s);
1265	}
1266
1267	if (m->wire_count == 0)
1268		++cnt.v_wire_count;
1269	++m->wire_count;
1270
1271	/*
1272	 * Increment the hold count for the page table page
1273	 * (denoting a new mapping.)
1274	 */
1275	++m->hold_count;
1276
1277	/*
1278	 * Map the pagetable page into the process address space, if
1279	 * it isn't already there.
1280	 */
1281
1282	pmap->pm_stats.resident_count++;
1283
1284	ptepa = VM_PAGE_TO_PHYS(m);
1285	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
1286
1287#if defined(PTPHINT)
1288	/*
1289	 * Set the page table hint
1290	 */
1291	pmap->pm_ptphint = m;
1292#endif
1293
1294	/*
1295	 * Try to use the new mapping, but if we cannot, then
1296	 * do it with the routine that maps the page explicitly.
1297	 */
1298	if (needszero) {
1299		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1300			(((unsigned) PTDpde) & PG_FRAME)) {
1301			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1302			bzero((caddr_t) pteva, PAGE_SIZE);
1303		} else {
1304			pmap_zero_page(ptepa);
1305		}
1306	}
1307
1308	m->valid = VM_PAGE_BITS_ALL;
1309	m->flags |= PG_MAPPED;
1310
1311	return m;
1312}
1313
1314static vm_page_t
1315pmap_allocpte(pmap, va)
1316	pmap_t	pmap;
1317	vm_offset_t va;
1318{
1319	unsigned ptepindex;
1320	vm_offset_t ptepa;
1321	vm_page_t m;
1322
1323	/*
1324	 * Calculate pagetable page index
1325	 */
1326	ptepindex = va >> PDRSHIFT;
1327
1328	/*
1329	 * Get the page directory entry
1330	 */
1331	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1332
1333	/*
1334	 * This supports switching from a 4MB page to a
1335	 * normal 4K page.
1336	 */
1337	if (ptepa & PG_PS) {
1338		pmap->pm_pdir[ptepindex] = 0;
1339		ptepa = 0;
1340		invltlb();
1341	}
1342
1343	/*
1344	 * If the page table page is mapped, we just increment the
1345	 * hold count, and activate it.
1346	 */
1347	if (ptepa) {
1348#if defined(PTPHINT)
1349		/*
1350		 * In order to get the page table page, try the
1351		 * hint first.
1352		 */
1353		if (pmap->pm_ptphint &&
1354			(pmap->pm_ptphint->pindex == ptepindex)) {
1355			m = pmap->pm_ptphint;
1356		} else {
1357			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1358			pmap->pm_ptphint = m;
1359		}
1360#else
1361		m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1362#endif
1363		++m->hold_count;
1364		return m;
1365	}
1366	/*
1367	 * Here if the pte page isn't mapped, or if it has been deallocated.
1368	 */
1369	return _pmap_allocpte(pmap, ptepindex);
1370}
1371
1372
1373/***************************************************
1374* Pmap allocation/deallocation routines.
1375 ***************************************************/
1376
1377/*
1378 * Release any resources held by the given physical map.
1379 * Called when a pmap initialized by pmap_pinit is being released.
1380 * Should only be called if the map contains no valid mappings.
1381 */
1382void
1383pmap_release(pmap)
1384	register struct pmap *pmap;
1385{
1386	vm_page_t p,n,ptdpg;
1387	vm_object_t object = pmap->pm_pteobj;
1388
1389#if defined(DIAGNOSTIC)
1390	if (object->ref_count != 1)
1391		panic("pmap_release: pteobj reference count != 1");
1392#endif
1393
1394	ptdpg = NULL;
1395retry:
1396	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1397		n = TAILQ_NEXT(p, listq);
1398		if (p->pindex == PTDPTDI) {
1399			ptdpg = p;
1400			continue;
1401		}
1402		if (!pmap_release_free_page(pmap, p))
1403			goto retry;
1404	}
1405
1406	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1407		goto retry;
1408
1409	vm_object_deallocate(object);
1410	if (pdstackptr < PDSTACKMAX) {
1411		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
1412		++pdstackptr;
1413	} else {
1414		int pdstmp = pdstackptr - 1;
1415		kmem_free(kernel_map, pdstack[pdstmp], PAGE_SIZE);
1416		pdstack[pdstmp] = (vm_offset_t) pmap->pm_pdir;
1417	}
1418	pmap->pm_pdir = 0;
1419}
1420
1421/*
1422 * grow the number of kernel page table entries, if needed
1423 */
1424void
1425pmap_growkernel(vm_offset_t addr)
1426{
1427	struct proc *p;
1428	struct pmap *pmap;
1429	int s;
1430	vm_offset_t ptpkva, ptppaddr;
1431	vm_page_t nkpg;
1432#ifdef SMP
1433	int i;
1434#endif
1435	pd_entry_t newpdir;
1436	vm_pindex_t ptpidx;
1437
1438	s = splhigh();
1439	if (kernel_vm_end == 0) {
1440		kernel_vm_end = KERNBASE;
1441		nkpt = 0;
1442		while (pdir_pde(PTD, kernel_vm_end)) {
1443			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1444			++nkpt;
1445		}
1446	}
1447	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1448	while (kernel_vm_end < addr) {
1449		if (pdir_pde(PTD, kernel_vm_end)) {
1450			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1451			continue;
1452		}
1453		++nkpt;
1454		ptpkva = (vm_offset_t) vtopte(addr);
1455		ptpidx = (ptpkva >> PAGE_SHIFT);
1456		/*
1457		 * This index is bogus, but out of the way
1458		 */
1459		nkpg = vm_page_alloc(kernel_object,
1460			ptpidx, VM_ALLOC_SYSTEM);
1461		if (!nkpg)
1462			panic("pmap_growkernel: no memory to grow kernel");
1463
1464		vm_page_wire(nkpg);
1465		vm_page_remove(nkpg);
1466		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1467		pmap_zero_page(ptppaddr);
1468		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW);
1469		pdir_pde(PTD, kernel_vm_end) = newpdir;
1470
1471#ifdef SMP
1472		for (i = 0; i < mp_ncpus; i++) {
1473			if (IdlePTDS[i])
1474				pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir;
1475		}
1476#endif
1477
1478		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1479			if (p->p_vmspace) {
1480				pmap = &p->p_vmspace->vm_pmap;
1481				*pmap_pde(pmap, kernel_vm_end) = newpdir;
1482			}
1483		}
1484		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
1485		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1486	}
1487	splx(s);
1488}
1489
1490/*
1491 *	Retire the given physical map from service.
1492 *	Should only be called if the map contains
1493 *	no valid mappings.
1494 */
1495void
1496pmap_destroy(pmap)
1497	register pmap_t pmap;
1498{
1499	int count;
1500
1501	if (pmap == NULL)
1502		return;
1503
1504	count = --pmap->pm_count;
1505	if (count == 0) {
1506		pmap_release(pmap);
1507		panic("destroying a pmap is not yet implemented");
1508		/* free((caddr_t) pmap, M_VMPMAP); */
1509	}
1510}
1511
1512/*
1513 *	Add a reference to the specified pmap.
1514 */
1515void
1516pmap_reference(pmap)
1517	pmap_t pmap;
1518{
1519	if (pmap != NULL) {
1520		pmap->pm_count++;
1521	}
1522}
1523
1524/***************************************************
1525* page management routines.
1526 ***************************************************/
1527
1528/*
1529 * free the pv_entry back to the free list
1530 */
1531static inline void
1532free_pv_entry(pv)
1533	pv_entry_t pv;
1534{
1535	zfreei(pvzone, pv);
1536}
1537
1538/*
1539 * get a new pv_entry, allocating a block from the system
1540 * when needed.
1541 * the memory allocation is performed bypassing the malloc code
1542 * because of the possibility of allocations at interrupt time.
1543 */
1544static inline pv_entry_t
1545get_pv_entry(void)
1546{
1547	return zalloci(pvzone);
1548}
1549
1550/*
1551 * If it is the first entry on the list, it is actually
1552 * in the header and we must copy the following entry up
1553 * to the header.  Otherwise we must search the list for
1554 * the entry.  In either case we free the now unused entry.
1555 */
1556
1557static int
1558pmap_remove_entry(pmap, ppv, va)
1559	struct pmap *pmap;
1560	pv_table_t *ppv;
1561	vm_offset_t va;
1562{
1563	pv_entry_t pv;
1564	int rtval;
1565	int s;
1566
1567	s = splvm();
1568	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1569		for (pv = TAILQ_FIRST(&ppv->pv_list);
1570			pv;
1571			pv = TAILQ_NEXT(pv, pv_list)) {
1572			if (pmap == pv->pv_pmap && va == pv->pv_va)
1573				break;
1574		}
1575	} else {
1576		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1577			pv;
1578			pv = TAILQ_NEXT(pv, pv_plist)) {
1579			if (va == pv->pv_va)
1580				break;
1581		}
1582	}
1583
1584	rtval = 0;
1585	if (pv) {
1586		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1587		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1588		--ppv->pv_list_count;
1589		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
1590			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1591		}
1592
1593		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1594		free_pv_entry(pv);
1595	}
1596
1597	splx(s);
1598	return rtval;
1599}
1600
1601/*
1602 * Create a pv entry for page at pa for
1603 * (pmap, va).
1604 */
1605static void
1606pmap_insert_entry(pmap, va, mpte, pa)
1607	pmap_t pmap;
1608	vm_offset_t va;
1609	vm_page_t mpte;
1610	vm_offset_t pa;
1611{
1612
1613	int s;
1614	pv_entry_t pv;
1615	pv_table_t *ppv;
1616
1617	s = splvm();
1618	pv = get_pv_entry();
1619	pv->pv_va = va;
1620	pv->pv_pmap = pmap;
1621	pv->pv_ptem = mpte;
1622
1623	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1624
1625	ppv = pa_to_pvh(pa);
1626	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1627	++ppv->pv_list_count;
1628
1629	splx(s);
1630}
1631
1632/*
1633 * pmap_remove_pte: do the things to unmap a page in a process
1634 */
1635static int
1636pmap_remove_pte(pmap, ptq, va)
1637	struct pmap *pmap;
1638	unsigned *ptq;
1639	vm_offset_t va;
1640{
1641	unsigned oldpte;
1642	pv_table_t *ppv;
1643
1644	oldpte = *ptq;
1645	*ptq = 0;
1646	if (oldpte & PG_W)
1647		pmap->pm_stats.wired_count -= 1;
1648	/*
1649	 * Machines that don't support invlpg, also don't support
1650	 * PG_G.
1651	 */
1652	if (oldpte & PG_G)
1653		invlpg(va);
1654	pmap->pm_stats.resident_count -= 1;
1655	if (oldpte & PG_MANAGED) {
1656		ppv = pa_to_pvh(oldpte);
1657		if (oldpte & PG_M) {
1658#if defined(PMAP_DIAGNOSTIC)
1659			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1660				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
1661			}
1662#endif
1663			if (pmap_track_modified(va))
1664				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1665		}
1666		return pmap_remove_entry(pmap, ppv, va);
1667	} else {
1668		return pmap_unuse_pt(pmap, va, NULL);
1669	}
1670
1671	return 0;
1672}
1673
1674/*
1675 * Remove a single page from a process address space
1676 */
1677static void
1678pmap_remove_page(pmap, va)
1679	struct pmap *pmap;
1680	register vm_offset_t va;
1681{
1682	register unsigned *ptq;
1683
1684	/*
1685	 * if there is no pte for this address, just skip it!!!
1686	 */
1687	if (*pmap_pde(pmap, va) == 0) {
1688		return;
1689	}
1690
1691	/*
1692	 * get a local va for mappings for this pmap.
1693	 */
1694	ptq = get_ptbase(pmap) + i386_btop(va);
1695	if (*ptq) {
1696		(void) pmap_remove_pte(pmap, ptq, va);
1697		invltlb_1pg(va);
1698	}
1699	return;
1700}
1701
1702/*
1703 *	Remove the given range of addresses from the specified map.
1704 *
1705 *	It is assumed that the start and end are properly
1706 *	rounded to the page size.
1707 */
1708void
1709pmap_remove(pmap, sva, eva)
1710	struct pmap *pmap;
1711	register vm_offset_t sva;
1712	register vm_offset_t eva;
1713{
1714	register unsigned *ptbase;
1715	vm_offset_t pdnxt;
1716	vm_offset_t ptpaddr;
1717	vm_offset_t sindex, eindex;
1718	int anyvalid;
1719
1720	if (pmap == NULL)
1721		return;
1722
1723	if (pmap->pm_stats.resident_count == 0)
1724		return;
1725
1726	/*
1727	 * special handling of removing one page.  a very
1728	 * common operation and easy to short circuit some
1729	 * code.
1730	 */
1731	if (((sva + PAGE_SIZE) == eva) &&
1732		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1733		pmap_remove_page(pmap, sva);
1734		return;
1735	}
1736
1737	anyvalid = 0;
1738
1739	/*
1740	 * Get a local virtual address for the mappings that are being
1741	 * worked with.
1742	 */
1743	ptbase = get_ptbase(pmap);
1744
1745	sindex = i386_btop(sva);
1746	eindex = i386_btop(eva);
1747
1748	for (; sindex < eindex; sindex = pdnxt) {
1749		unsigned pdirindex;
1750
1751		/*
1752		 * Calculate index for next page table.
1753		 */
1754		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1755		if (pmap->pm_stats.resident_count == 0)
1756			break;
1757
1758		pdirindex = sindex / NPDEPG;
1759		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1760			pmap->pm_pdir[pdirindex] = 0;
1761			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1762			anyvalid++;
1763			continue;
1764		}
1765
1766		/*
1767		 * Weed out invalid mappings. Note: we assume that the page
1768		 * directory table is always allocated, and in kernel virtual.
1769		 */
1770		if (ptpaddr == 0)
1771			continue;
1772
1773		/*
1774		 * Limit our scan to either the end of the va represented
1775		 * by the current page table page, or to the end of the
1776		 * range being removed.
1777		 */
1778		if (pdnxt > eindex) {
1779			pdnxt = eindex;
1780		}
1781
1782		for ( ;sindex != pdnxt; sindex++) {
1783			vm_offset_t va;
1784			if (ptbase[sindex] == 0) {
1785				continue;
1786			}
1787			va = i386_ptob(sindex);
1788
1789			anyvalid++;
1790			if (pmap_remove_pte(pmap,
1791				ptbase + sindex, va))
1792				break;
1793		}
1794	}
1795
1796	if (anyvalid) {
1797		invltlb();
1798	}
1799}
1800
1801/*
1802 *	Routine:	pmap_remove_all
1803 *	Function:
1804 *		Removes this physical page from
1805 *		all physical maps in which it resides.
1806 *		Reflects back modify bits to the pager.
1807 *
1808 *	Notes:
1809 *		Original versions of this routine were very
1810 *		inefficient because they iteratively called
1811 *		pmap_remove (slow...)
1812 */
1813
1814static void
1815pmap_remove_all(pa)
1816	vm_offset_t pa;
1817{
1818	register pv_entry_t pv;
1819	pv_table_t *ppv;
1820	register unsigned *pte, tpte;
1821	int nmodify;
1822	int update_needed;
1823	int s;
1824
1825	nmodify = 0;
1826	update_needed = 0;
1827#if defined(PMAP_DIAGNOSTIC)
1828	/*
1829	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1830	 * pages!
1831	 */
1832	if (!pmap_is_managed(pa)) {
1833		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1834	}
1835#endif
1836
1837	s = splvm();
1838	ppv = pa_to_pvh(pa);
1839	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1840		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1841
1842		pv->pv_pmap->pm_stats.resident_count--;
1843
1844		tpte = *pte;
1845		*pte = 0;
1846		if (tpte & PG_W)
1847			pv->pv_pmap->pm_stats.wired_count--;
1848		/*
1849		 * Update the vm_page_t clean and reference bits.
1850		 */
1851		if (tpte & PG_M) {
1852#if defined(PMAP_DIAGNOSTIC)
1853			if (pmap_nw_modified((pt_entry_t) tpte)) {
1854				printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
1855			}
1856#endif
1857			if (pmap_track_modified(pv->pv_va))
1858				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1859		}
1860		if (!update_needed &&
1861			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1862			(pv->pv_pmap == kernel_pmap))) {
1863			update_needed = 1;
1864		}
1865
1866		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1867		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1868		--ppv->pv_list_count;
1869		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1870		free_pv_entry(pv);
1871	}
1872	ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1873
1874
1875	if (update_needed)
1876		invltlb();
1877	splx(s);
1878	return;
1879}
1880
1881/*
1882 *	Set the physical protection on the
1883 *	specified range of this map as requested.
1884 */
1885void
1886pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1887{
1888	register unsigned *ptbase;
1889	vm_offset_t pdnxt;
1890	vm_offset_t ptpaddr;
1891	vm_offset_t sindex, eindex;
1892	int anychanged;
1893
1894
1895	if (pmap == NULL)
1896		return;
1897
1898	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1899		pmap_remove(pmap, sva, eva);
1900		return;
1901	}
1902
1903	anychanged = 0;
1904
1905	ptbase = get_ptbase(pmap);
1906
1907	sindex = i386_btop(sva);
1908	eindex = i386_btop(eva);
1909
1910	for (; sindex < eindex; sindex = pdnxt) {
1911
1912		unsigned pdirindex;
1913
1914		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1915
1916		pdirindex = sindex / NPDEPG;
1917		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1918			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1919			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1920			anychanged++;
1921			continue;
1922		}
1923
1924		/*
1925		 * Weed out invalid mappings. Note: we assume that the page
1926		 * directory table is always allocated, and in kernel virtual.
1927		 */
1928		if (ptpaddr == 0)
1929			continue;
1930
1931		if (pdnxt > eindex) {
1932			pdnxt = eindex;
1933		}
1934
1935		for (; sindex != pdnxt; sindex++) {
1936
1937			unsigned pbits = ptbase[sindex];
1938
1939			if (prot & VM_PROT_WRITE) {
1940				if ((pbits & (PG_RW|PG_V)) == PG_V) {
1941					if (pbits & PG_MANAGED) {
1942						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
1943						m->flags |= PG_WRITEABLE;
1944						m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
1945					}
1946					ptbase[sindex] = pbits | PG_RW;
1947					anychanged = 1;
1948				}
1949			} else if (pbits & PG_RW) {
1950				if (pbits & PG_M) {
1951					vm_offset_t sva = i386_ptob(sindex);
1952					if ((pbits & PG_MANAGED) && pmap_track_modified(sva)) {
1953						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
1954						m->dirty = VM_PAGE_BITS_ALL;
1955					}
1956				}
1957				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
1958				anychanged = 1;
1959			}
1960		}
1961	}
1962	if (anychanged)
1963		invltlb();
1964}
1965
1966/*
1967 *	Insert the given physical page (p) at
1968 *	the specified virtual address (v) in the
1969 *	target physical map with the protection requested.
1970 *
1971 *	If specified, the page will be wired down, meaning
1972 *	that the related pte can not be reclaimed.
1973 *
1974 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1975 *	or lose information.  That is, this routine must actually
1976 *	insert this page into the given map NOW.
1977 */
1978void
1979pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
1980	   boolean_t wired)
1981{
1982	register unsigned *pte;
1983	vm_offset_t opa;
1984	vm_offset_t origpte, newpte;
1985	vm_page_t mpte;
1986
1987	if (pmap == NULL)
1988		return;
1989
1990	va &= PG_FRAME;
1991#ifdef PMAP_DIAGNOSTIC
1992	if (va > VM_MAX_KERNEL_ADDRESS)
1993		panic("pmap_enter: toobig");
1994	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
1995		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
1996#endif
1997
1998	mpte = NULL;
1999	/*
2000	 * In the case that a page table page is not
2001	 * resident, we are creating it here.
2002	 */
2003	if (va < UPT_MIN_ADDRESS) {
2004		mpte = pmap_allocpte(pmap, va);
2005	}
2006#if 0 && defined(PMAP_DIAGNOSTIC)
2007	else {
2008		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
2009		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
2010			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
2011				pmap->pm_pdir[PTDPTDI], origpte, va);
2012		}
2013		if (smp_active) {
2014			pdeaddr = (vm_offset_t *) IdlePTDS[cpuid];
2015			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
2016				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
2017					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
2018				printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
2019				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
2020					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
2021			}
2022		}
2023	}
2024#endif
2025
2026	pte = pmap_pte(pmap, va);
2027	/*
2028	 * Page Directory table entry not valid, we need a new PT page
2029	 */
2030	if (pte == NULL) {
2031		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
2032			pmap->pm_pdir[PTDPTDI], va);
2033	}
2034
2035	origpte = *(vm_offset_t *)pte;
2036	pa &= PG_FRAME;
2037	opa = origpte & PG_FRAME;
2038	if (origpte & PG_PS)
2039		panic("pmap_enter: attempted pmap_enter on 4MB page");
2040
2041	/*
2042	 * Mapping has not changed, must be protection or wiring change.
2043	 */
2044	if (origpte && (opa == pa)) {
2045		/*
2046		 * Wiring change, just update stats. We don't worry about
2047		 * wiring PT pages as they remain resident as long as there
2048		 * are valid mappings in them. Hence, if a user page is wired,
2049		 * the PT page will be also.
2050		 */
2051		if (wired && ((origpte & PG_W) == 0))
2052			pmap->pm_stats.wired_count++;
2053		else if (!wired && (origpte & PG_W))
2054			pmap->pm_stats.wired_count--;
2055
2056#if defined(PMAP_DIAGNOSTIC)
2057		if (pmap_nw_modified((pt_entry_t) origpte)) {
2058			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
2059		}
2060#endif
2061
2062		/*
2063		 * We might be turning off write access to the page,
2064		 * so we go ahead and sense modify status.
2065		 */
2066		if (origpte & PG_MANAGED) {
2067			vm_page_t m;
2068			if (origpte & PG_M) {
2069				if (pmap_track_modified(va)) {
2070					m = PHYS_TO_VM_PAGE(pa);
2071					m->dirty = VM_PAGE_BITS_ALL;
2072				}
2073			}
2074			pa |= PG_MANAGED;
2075		}
2076
2077		if (mpte)
2078			--mpte->hold_count;
2079
2080		goto validate;
2081	}
2082	/*
2083	 * Mapping has changed, invalidate old range and fall through to
2084	 * handle validating new mapping.
2085	 */
2086	if (opa) {
2087		int err;
2088		err = pmap_remove_pte(pmap, pte, va);
2089		if (err)
2090			panic("pmap_enter: pte vanished, va: 0x%x", va);
2091	}
2092
2093	/*
2094	 * Enter on the PV list if part of our managed memory Note that we
2095	 * raise IPL while manipulating pv_table since pmap_enter can be
2096	 * called at interrupt time.
2097	 */
2098	if (pmap_is_managed(pa)) {
2099		pmap_insert_entry(pmap, va, mpte, pa);
2100		pa |= PG_MANAGED;
2101	}
2102
2103	/*
2104	 * Increment counters
2105	 */
2106	pmap->pm_stats.resident_count++;
2107	if (wired)
2108		pmap->pm_stats.wired_count++;
2109
2110validate:
2111	/*
2112	 * Now validate mapping with desired protection/wiring.
2113	 */
2114	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2115
2116	if (wired)
2117		newpte |= PG_W;
2118	if (va < UPT_MIN_ADDRESS)
2119		newpte |= PG_U;
2120	if (pmap == kernel_pmap)
2121		newpte |= pgeflag;
2122
2123	/*
2124	 * if the mapping or permission bits are different, we need
2125	 * to update the pte.
2126	 */
2127	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2128		*pte = newpte;
2129		if (origpte)
2130			invltlb_1pg(va);
2131	}
2132}
2133
2134/*
2135 * this code makes some *MAJOR* assumptions:
2136 * 1. Current pmap & pmap exists.
2137 * 2. Not wired.
2138 * 3. Read access.
2139 * 4. No page table pages.
2140 * 5. Tlbflush is deferred to calling procedure.
2141 * 6. Page IS managed.
2142 * but is *MUCH* faster than pmap_enter...
2143 */
2144
2145static vm_page_t
2146pmap_enter_quick(pmap, va, pa, mpte)
2147	register pmap_t pmap;
2148	vm_offset_t va;
2149	register vm_offset_t pa;
2150	vm_page_t mpte;
2151{
2152	register unsigned *pte;
2153
2154	/*
2155	 * In the case that a page table page is not
2156	 * resident, we are creating it here.
2157	 */
2158	if (va < UPT_MIN_ADDRESS) {
2159		unsigned ptepindex;
2160		vm_offset_t ptepa;
2161
2162		/*
2163		 * Calculate pagetable page index
2164		 */
2165		ptepindex = va >> PDRSHIFT;
2166		if (mpte && (mpte->pindex == ptepindex)) {
2167			++mpte->hold_count;
2168		} else {
2169retry:
2170			/*
2171			 * Get the page directory entry
2172			 */
2173			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2174
2175			/*
2176			 * If the page table page is mapped, we just increment
2177			 * the hold count, and activate it.
2178			 */
2179			if (ptepa) {
2180				if (ptepa & PG_PS)
2181					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2182#if defined(PTPHINT)
2183				if (pmap->pm_ptphint &&
2184					(pmap->pm_ptphint->pindex == ptepindex)) {
2185					mpte = pmap->pm_ptphint;
2186				} else {
2187					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2188					pmap->pm_ptphint = mpte;
2189				}
2190#else
2191				mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2192#endif
2193				if (mpte == NULL)
2194					goto retry;
2195				++mpte->hold_count;
2196			} else {
2197				mpte = _pmap_allocpte(pmap, ptepindex);
2198			}
2199		}
2200	} else {
2201		mpte = NULL;
2202	}
2203
2204	/*
2205	 * This call to vtopte makes the assumption that we are
2206	 * entering the page into the current pmap.  In order to support
2207	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2208	 * But that isn't as quick as vtopte.
2209	 */
2210	pte = (unsigned *)vtopte(va);
2211	if (*pte) {
2212		if (mpte)
2213			pmap_unwire_pte_hold(pmap, mpte);
2214		return 0;
2215	}
2216
2217	/*
2218	 * Enter on the PV list if part of our managed memory Note that we
2219	 * raise IPL while manipulating pv_table since pmap_enter can be
2220	 * called at interrupt time.
2221	 */
2222	pmap_insert_entry(pmap, va, mpte, pa);
2223
2224	/*
2225	 * Increment counters
2226	 */
2227	pmap->pm_stats.resident_count++;
2228
2229	/*
2230	 * Now validate mapping with RO protection
2231	 */
2232	*pte = pa | PG_V | PG_U | PG_MANAGED;
2233
2234	return mpte;
2235}
2236
2237#define MAX_INIT_PT (96)
2238/*
2239 * pmap_object_init_pt preloads the ptes for a given object
2240 * into the specified pmap.  This eliminates the blast of soft
2241 * faults on process startup and immediately after an mmap.
2242 */
2243void
2244pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2245	pmap_t pmap;
2246	vm_offset_t addr;
2247	vm_object_t object;
2248	vm_pindex_t pindex;
2249	vm_size_t size;
2250	int limit;
2251{
2252	vm_offset_t tmpidx;
2253	int psize;
2254	vm_page_t p, mpte;
2255	int objpgs;
2256
2257	if (!pmap)
2258		return;
2259
2260	/*
2261	 * This code maps large physical mmap regions into the
2262	 * processor address space.  Note that some shortcuts
2263	 * are taken, but the code works.
2264	 */
2265	if (pseflag &&
2266		(object->type == OBJT_DEVICE) &&
2267		((addr & (NBPDR - 1)) == 0) &&
2268		((size & (NBPDR - 1)) == 0) ) {
2269		int i;
2270		int s;
2271		vm_page_t m[1];
2272		unsigned int ptepindex;
2273		int npdes;
2274		vm_offset_t ptepa;
2275
2276		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2277			return;
2278
2279		s = splhigh();
2280retry:
2281		p = vm_page_lookup(object, pindex);
2282		if (p && (p->flags & PG_BUSY)) {
2283			tsleep(p, PVM, "init4p", 0);
2284			goto retry;
2285		}
2286		splx(s);
2287
2288		if (p == NULL) {
2289			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2290			if (p == NULL)
2291				return;
2292			m[0] = p;
2293
2294			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2295				PAGE_WAKEUP(p);
2296				vm_page_free(p);
2297				return;
2298			}
2299
2300			p = vm_page_lookup(object, pindex);
2301			PAGE_WAKEUP(p);
2302		}
2303
2304		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2305		if (ptepa & (NBPDR - 1)) {
2306			return;
2307		}
2308
2309		p->valid = VM_PAGE_BITS_ALL;
2310
2311		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2312		npdes = size >> PDRSHIFT;
2313		for(i=0;i<npdes;i++) {
2314			pmap->pm_pdir[ptepindex] =
2315				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2316			ptepa += NBPDR;
2317			ptepindex += 1;
2318		}
2319		p->flags |= PG_MAPPED;
2320		invltlb();
2321		return;
2322	}
2323
2324	psize = i386_btop(size);
2325
2326	if ((object->type != OBJT_VNODE) ||
2327		(limit && (psize > MAX_INIT_PT) &&
2328			(object->resident_page_count > MAX_INIT_PT))) {
2329		return;
2330	}
2331
2332	if (psize + pindex > object->size)
2333		psize = object->size - pindex;
2334
2335	mpte = NULL;
2336	/*
2337	 * if we are processing a major portion of the object, then scan the
2338	 * entire thing.
2339	 */
2340	if (psize > (object->size >> 2)) {
2341		objpgs = psize;
2342
2343		for (p = TAILQ_FIRST(&object->memq);
2344		    ((objpgs > 0) && (p != NULL));
2345		    p = TAILQ_NEXT(p, listq)) {
2346
2347			tmpidx = p->pindex;
2348			if (tmpidx < pindex) {
2349				continue;
2350			}
2351			tmpidx -= pindex;
2352			if (tmpidx >= psize) {
2353				continue;
2354			}
2355			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2356			    (p->busy == 0) &&
2357			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2358				if ((p->queue - p->pc) == PQ_CACHE)
2359					vm_page_deactivate(p);
2360				p->flags |= PG_BUSY;
2361				mpte = pmap_enter_quick(pmap,
2362					addr + i386_ptob(tmpidx),
2363					VM_PAGE_TO_PHYS(p), mpte);
2364				p->flags |= PG_MAPPED;
2365				PAGE_WAKEUP(p);
2366			}
2367			objpgs -= 1;
2368		}
2369	} else {
2370		/*
2371		 * else lookup the pages one-by-one.
2372		 */
2373		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2374			p = vm_page_lookup(object, tmpidx + pindex);
2375			if (p &&
2376			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2377			    (p->busy == 0) &&
2378			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2379				if ((p->queue - p->pc) == PQ_CACHE)
2380					vm_page_deactivate(p);
2381				p->flags |= PG_BUSY;
2382				mpte = pmap_enter_quick(pmap,
2383					addr + i386_ptob(tmpidx),
2384					VM_PAGE_TO_PHYS(p), mpte);
2385				p->flags |= PG_MAPPED;
2386				PAGE_WAKEUP(p);
2387			}
2388		}
2389	}
2390	return;
2391}
2392
2393/*
2394 * pmap_prefault provides a quick way of clustering
2395 * pagefaults into a processes address space.  It is a "cousin"
2396 * of pmap_object_init_pt, except it runs at page fault time instead
2397 * of mmap time.
2398 */
2399#define PFBAK 2
2400#define PFFOR 2
2401#define PAGEORDER_SIZE (PFBAK+PFFOR)
2402
2403static int pmap_prefault_pageorder[] = {
2404	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
2405};
2406
2407void
2408pmap_prefault(pmap, addra, entry, object)
2409	pmap_t pmap;
2410	vm_offset_t addra;
2411	vm_map_entry_t entry;
2412	vm_object_t object;
2413{
2414	int i;
2415	vm_offset_t starta;
2416	vm_offset_t addr;
2417	vm_pindex_t pindex;
2418	vm_page_t m, mpte;
2419
2420	if (entry->object.vm_object != object)
2421		return;
2422
2423	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2424		return;
2425
2426	starta = addra - PFBAK * PAGE_SIZE;
2427	if (starta < entry->start) {
2428		starta = entry->start;
2429	} else if (starta > addra) {
2430		starta = 0;
2431	}
2432
2433	mpte = NULL;
2434	for (i = 0; i < PAGEORDER_SIZE; i++) {
2435		vm_object_t lobject;
2436		unsigned *pte;
2437
2438		addr = addra + pmap_prefault_pageorder[i];
2439		if (addr < starta || addr >= entry->end)
2440			continue;
2441
2442		if ((*pmap_pde(pmap, addr)) == NULL)
2443			continue;
2444
2445		pte = (unsigned *) vtopte(addr);
2446		if (*pte)
2447			continue;
2448
2449		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2450		lobject = object;
2451		for (m = vm_page_lookup(lobject, pindex);
2452		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2453		    lobject = lobject->backing_object) {
2454			if (lobject->backing_object_offset & PAGE_MASK)
2455				break;
2456			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2457			m = vm_page_lookup(lobject->backing_object, pindex);
2458		}
2459
2460		/*
2461		 * give-up when a page is not in memory
2462		 */
2463		if (m == NULL)
2464			break;
2465
2466		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2467		    (m->busy == 0) &&
2468		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2469
2470			if ((m->queue - m->pc) == PQ_CACHE) {
2471				vm_page_deactivate(m);
2472			}
2473			m->flags |= PG_BUSY;
2474			mpte = pmap_enter_quick(pmap, addr,
2475				VM_PAGE_TO_PHYS(m), mpte);
2476			m->flags |= PG_MAPPED;
2477			PAGE_WAKEUP(m);
2478		}
2479	}
2480}
2481
2482/*
2483 *	Routine:	pmap_change_wiring
2484 *	Function:	Change the wiring attribute for a map/virtual-address
2485 *			pair.
2486 *	In/out conditions:
2487 *			The mapping must already exist in the pmap.
2488 */
2489void
2490pmap_change_wiring(pmap, va, wired)
2491	register pmap_t pmap;
2492	vm_offset_t va;
2493	boolean_t wired;
2494{
2495	register unsigned *pte;
2496
2497	if (pmap == NULL)
2498		return;
2499
2500	pte = pmap_pte(pmap, va);
2501
2502	if (wired && !pmap_pte_w(pte))
2503		pmap->pm_stats.wired_count++;
2504	else if (!wired && pmap_pte_w(pte))
2505		pmap->pm_stats.wired_count--;
2506
2507	/*
2508	 * Wiring is not a hardware characteristic so there is no need to
2509	 * invalidate TLB.
2510	 */
2511	pmap_pte_set_w(pte, wired);
2512}
2513
2514
2515
2516/*
2517 *	Copy the range specified by src_addr/len
2518 *	from the source map to the range dst_addr/len
2519 *	in the destination map.
2520 *
2521 *	This routine is only advisory and need not do anything.
2522 */
2523
2524void
2525pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2526	pmap_t dst_pmap, src_pmap;
2527	vm_offset_t dst_addr;
2528	vm_size_t len;
2529	vm_offset_t src_addr;
2530{
2531	vm_offset_t addr;
2532	vm_offset_t end_addr = src_addr + len;
2533	vm_offset_t pdnxt;
2534	unsigned src_frame, dst_frame;
2535
2536	if (dst_addr != src_addr)
2537		return;
2538
2539	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2540	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2541		return;
2542	}
2543
2544	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2545	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2546		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2547		invltlb();
2548	}
2549
2550	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2551		unsigned *src_pte, *dst_pte;
2552		vm_page_t dstmpte, srcmpte;
2553		vm_offset_t srcptepaddr;
2554		unsigned ptepindex;
2555
2556		if (addr >= UPT_MIN_ADDRESS)
2557			panic("pmap_copy: invalid to pmap_copy page tables\n");
2558
2559		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2560		ptepindex = addr >> PDRSHIFT;
2561
2562		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2563		if (srcptepaddr == 0)
2564			continue;
2565
2566		if (srcptepaddr & PG_PS) {
2567			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2568				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2569				dst_pmap->pm_stats.resident_count += NBPDR;
2570			}
2571			continue;
2572		}
2573
2574		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2575		if ((srcmpte == NULL) ||
2576			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2577			continue;
2578
2579		if (pdnxt > end_addr)
2580			pdnxt = end_addr;
2581
2582		src_pte = (unsigned *) vtopte(addr);
2583		dst_pte = (unsigned *) avtopte(addr);
2584		while (addr < pdnxt) {
2585			unsigned ptetemp;
2586			ptetemp = *src_pte;
2587			/*
2588			 * we only virtual copy managed pages
2589			 */
2590			if ((ptetemp & PG_MANAGED) != 0) {
2591				/*
2592				 * We have to check after allocpte for the
2593				 * pte still being around...  allocpte can
2594				 * block.
2595				 */
2596				dstmpte = pmap_allocpte(dst_pmap, addr);
2597				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2598					/*
2599					 * Clear the modified and
2600					 * accessed (referenced) bits
2601					 * during the copy.
2602					 */
2603					*dst_pte = ptetemp & ~(PG_M|PG_A);
2604					dst_pmap->pm_stats.resident_count++;
2605					pmap_insert_entry(dst_pmap, addr,
2606						dstmpte,
2607						(ptetemp & PG_FRAME));
2608	 			} else {
2609					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2610				}
2611				if (dstmpte->hold_count >= srcmpte->hold_count)
2612					break;
2613			}
2614			addr += PAGE_SIZE;
2615			++src_pte;
2616			++dst_pte;
2617		}
2618	}
2619}
2620
2621/*
2622 *	Routine:	pmap_kernel
2623 *	Function:
2624 *		Returns the physical map handle for the kernel.
2625 */
2626pmap_t
2627pmap_kernel()
2628{
2629	return (kernel_pmap);
2630}
2631
2632/*
2633 *	pmap_zero_page zeros the specified (machine independent)
2634 *	page by mapping the page into virtual memory and using
2635 *	bzero to clear its contents, one machine dependent page
2636 *	at a time.
2637 */
2638void
2639pmap_zero_page(phys)
2640	vm_offset_t phys;
2641{
2642#ifdef SMP
2643	if (*(int *) prv_CMAP3)
2644		panic("pmap_zero_page: prv_CMAP3 busy");
2645
2646	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME);
2647	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2648
2649	bzero(&prv_CPAGE3, PAGE_SIZE);
2650
2651	*(int *) prv_CMAP3 = 0;
2652	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2653#else
2654	if (*(int *) CMAP2)
2655		panic("pmap_zero_page: CMAP busy");
2656
2657	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
2658	bzero(CADDR2, PAGE_SIZE);
2659	*(int *) CMAP2 = 0;
2660	invltlb_1pg((vm_offset_t) CADDR2);
2661#endif
2662}
2663
2664/*
2665 *	pmap_copy_page copies the specified (machine independent)
2666 *	page by mapping the page into virtual memory and using
2667 *	bcopy to copy the page, one machine dependent page at a
2668 *	time.
2669 */
2670void
2671pmap_copy_page(src, dst)
2672	vm_offset_t src;
2673	vm_offset_t dst;
2674{
2675#ifdef SMP
2676	if (*(int *) prv_CMAP1)
2677		panic("pmap_copy_page: prv_CMAP1 busy");
2678	if (*(int *) prv_CMAP2)
2679		panic("pmap_copy_page: prv_CMAP2 busy");
2680
2681	*(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2682	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2683
2684	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2685
2686	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2687
2688	*(int *) prv_CMAP1 = 0;
2689	*(int *) prv_CMAP2 = 0;
2690	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2691#else
2692	if (*(int *) CMAP1 || *(int *) CMAP2)
2693		panic("pmap_copy_page: CMAP busy");
2694
2695	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2696	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2697
2698	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2699
2700	*(int *) CMAP1 = 0;
2701	*(int *) CMAP2 = 0;
2702	invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
2703#endif
2704}
2705
2706
2707/*
2708 *	Routine:	pmap_pageable
2709 *	Function:
2710 *		Make the specified pages (by pmap, offset)
2711 *		pageable (or not) as requested.
2712 *
2713 *		A page which is not pageable may not take
2714 *		a fault; therefore, its page table entry
2715 *		must remain valid for the duration.
2716 *
2717 *		This routine is merely advisory; pmap_enter
2718 *		will specify that these pages are to be wired
2719 *		down (or not) as appropriate.
2720 */
2721void
2722pmap_pageable(pmap, sva, eva, pageable)
2723	pmap_t pmap;
2724	vm_offset_t sva, eva;
2725	boolean_t pageable;
2726{
2727}
2728
2729/*
2730 * this routine returns true if a physical page resides
2731 * in the given pmap.
2732 */
2733boolean_t
2734pmap_page_exists(pmap, pa)
2735	pmap_t pmap;
2736	vm_offset_t pa;
2737{
2738	register pv_entry_t pv;
2739	pv_table_t *ppv;
2740	int s;
2741
2742	if (!pmap_is_managed(pa))
2743		return FALSE;
2744
2745	s = splvm();
2746
2747	ppv = pa_to_pvh(pa);
2748	/*
2749	 * Not found, check current mappings returning immediately if found.
2750	 */
2751	for (pv = TAILQ_FIRST(&ppv->pv_list);
2752		pv;
2753		pv = TAILQ_NEXT(pv, pv_list)) {
2754		if (pv->pv_pmap == pmap) {
2755			splx(s);
2756			return TRUE;
2757		}
2758	}
2759	splx(s);
2760	return (FALSE);
2761}
2762
2763#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2764/*
2765 * Remove all pages from specified address space
2766 * this aids process exit speeds.  Also, this code
2767 * is special cased for current process only, but
2768 * can have the more generic (and slightly slower)
2769 * mode enabled.  This is much faster than pmap_remove
2770 * in the case of running down an entire address space.
2771 */
2772void
2773pmap_remove_pages(pmap, sva, eva)
2774	pmap_t pmap;
2775	vm_offset_t sva, eva;
2776{
2777	unsigned *pte, tpte;
2778	pv_table_t *ppv;
2779	pv_entry_t pv, npv;
2780	int s;
2781
2782#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2783	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2784		printf("warning: pmap_remove_pages called with non-current pmap\n");
2785		return;
2786	}
2787#endif
2788
2789	s = splvm();
2790	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2791		pv;
2792		pv = npv) {
2793
2794		if (pv->pv_va >= eva || pv->pv_va < sva) {
2795			npv = TAILQ_NEXT(pv, pv_plist);
2796			continue;
2797		}
2798
2799#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2800		pte = (unsigned *)vtopte(pv->pv_va);
2801#else
2802		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2803#endif
2804		tpte = *pte;
2805
2806/*
2807 * We cannot remove wired pages from a process' mapping at this time
2808 */
2809		if (tpte & PG_W) {
2810			npv = TAILQ_NEXT(pv, pv_plist);
2811			continue;
2812		}
2813		*pte = 0;
2814
2815		ppv = pa_to_pvh(tpte);
2816
2817		pv->pv_pmap->pm_stats.resident_count--;
2818
2819		/*
2820		 * Update the vm_page_t clean and reference bits.
2821		 */
2822		if (tpte & PG_M) {
2823			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2824		}
2825
2826
2827		npv = TAILQ_NEXT(pv, pv_plist);
2828		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2829
2830		--ppv->pv_list_count;
2831		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2832		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2833			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
2834		}
2835
2836		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2837		free_pv_entry(pv);
2838	}
2839	splx(s);
2840	invltlb();
2841}
2842
2843/*
2844 * pmap_testbit tests bits in pte's
2845 * note that the testbit/changebit routines are inline,
2846 * and a lot of things compile-time evaluate.
2847 */
2848static boolean_t
2849pmap_testbit(pa, bit)
2850	register vm_offset_t pa;
2851	int bit;
2852{
2853	register pv_entry_t pv;
2854	pv_table_t *ppv;
2855	unsigned *pte;
2856	int s;
2857
2858	if (!pmap_is_managed(pa))
2859		return FALSE;
2860
2861	ppv = pa_to_pvh(pa);
2862	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
2863		return FALSE;
2864
2865	s = splvm();
2866
2867	for (pv = TAILQ_FIRST(&ppv->pv_list);
2868		pv;
2869		pv = TAILQ_NEXT(pv, pv_list)) {
2870
2871		/*
2872		 * if the bit being tested is the modified bit, then
2873		 * mark clean_map and ptes as never
2874		 * modified.
2875		 */
2876		if (bit & (PG_A|PG_M)) {
2877			if (!pmap_track_modified(pv->pv_va))
2878				continue;
2879		}
2880
2881#if defined(PMAP_DIAGNOSTIC)
2882		if (!pv->pv_pmap) {
2883			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
2884			continue;
2885		}
2886#endif
2887		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2888		if (*pte & bit) {
2889			splx(s);
2890			return TRUE;
2891		}
2892	}
2893	splx(s);
2894	return (FALSE);
2895}
2896
2897/*
2898 * this routine is used to modify bits in ptes
2899 */
2900static void
2901pmap_changebit(pa, bit, setem)
2902	vm_offset_t pa;
2903	int bit;
2904	boolean_t setem;
2905{
2906	register pv_entry_t pv;
2907	pv_table_t *ppv;
2908	register unsigned *pte;
2909	int changed;
2910	int s;
2911
2912	if (!pmap_is_managed(pa))
2913		return;
2914
2915	s = splvm();
2916	changed = 0;
2917	ppv = pa_to_pvh(pa);
2918
2919	/*
2920	 * Loop over all current mappings setting/clearing as appropos If
2921	 * setting RO do we need to clear the VAC?
2922	 */
2923	for (pv = TAILQ_FIRST(&ppv->pv_list);
2924		pv;
2925		pv = TAILQ_NEXT(pv, pv_list)) {
2926
2927		/*
2928		 * don't write protect pager mappings
2929		 */
2930		if (!setem && (bit == PG_RW)) {
2931			if (!pmap_track_modified(pv->pv_va))
2932				continue;
2933		}
2934
2935#if defined(PMAP_DIAGNOSTIC)
2936		if (!pv->pv_pmap) {
2937			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
2938			continue;
2939		}
2940#endif
2941
2942		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2943
2944		if (setem) {
2945			*(int *)pte |= bit;
2946			changed = 1;
2947		} else {
2948			vm_offset_t pbits = *(vm_offset_t *)pte;
2949			if (pbits & bit) {
2950				changed = 1;
2951				if (bit == PG_RW) {
2952					if (pbits & PG_M) {
2953						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2954					}
2955					*(int *)pte = pbits & ~(PG_M|PG_RW);
2956				} else {
2957					*(int *)pte = pbits & ~bit;
2958				}
2959			}
2960		}
2961	}
2962	splx(s);
2963	if (changed)
2964		invltlb();
2965}
2966
2967/*
2968 *      pmap_page_protect:
2969 *
2970 *      Lower the permission for all mappings to a given page.
2971 */
2972void
2973pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
2974{
2975	if ((prot & VM_PROT_WRITE) == 0) {
2976		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2977			pmap_changebit(phys, PG_RW, FALSE);
2978		} else {
2979			pmap_remove_all(phys);
2980		}
2981	}
2982}
2983
2984vm_offset_t
2985pmap_phys_address(ppn)
2986	int ppn;
2987{
2988	return (i386_ptob(ppn));
2989}
2990
2991/*
2992 *	pmap_ts_referenced:
2993 *
2994 *	Return the count of reference bits for a page, clearing all of them.
2995 *
2996 */
2997int
2998pmap_ts_referenced(vm_offset_t pa)
2999{
3000	register pv_entry_t pv;
3001	pv_table_t *ppv;
3002	unsigned *pte;
3003	int s;
3004	int rtval = 0;
3005
3006	if (!pmap_is_managed(pa))
3007		return FALSE;
3008
3009	s = splvm();
3010
3011	ppv = pa_to_pvh(pa);
3012
3013	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3014		splx(s);
3015		return 0;
3016	}
3017
3018	/*
3019	 * Not found, check current mappings returning immediately if found.
3020	 */
3021	for (pv = TAILQ_FIRST(&ppv->pv_list);
3022		pv;
3023		pv = TAILQ_NEXT(pv, pv_list)) {
3024		/*
3025		 * if the bit being tested is the modified bit, then
3026		 * mark clean_map and ptes as never
3027		 * modified.
3028		 */
3029		if (!pmap_track_modified(pv->pv_va))
3030			continue;
3031
3032		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3033		if (pte == NULL) {
3034			continue;
3035		}
3036		if (*pte & PG_A) {
3037			rtval++;
3038			*pte &= ~PG_A;
3039		}
3040	}
3041	splx(s);
3042	if (rtval) {
3043		invltlb();
3044	}
3045	return (rtval);
3046}
3047
3048/*
3049 *	pmap_is_modified:
3050 *
3051 *	Return whether or not the specified physical page was modified
3052 *	in any physical maps.
3053 */
3054boolean_t
3055pmap_is_modified(vm_offset_t pa)
3056{
3057	return pmap_testbit((pa), PG_M);
3058}
3059
3060/*
3061 *	Clear the modify bits on the specified physical page.
3062 */
3063void
3064pmap_clear_modify(vm_offset_t pa)
3065{
3066	pmap_changebit((pa), PG_M, FALSE);
3067}
3068
3069/*
3070 *	pmap_clear_reference:
3071 *
3072 *	Clear the reference bit on the specified physical page.
3073 */
3074void
3075pmap_clear_reference(vm_offset_t pa)
3076{
3077	pmap_changebit((pa), PG_A, FALSE);
3078}
3079
3080/*
3081 * Miscellaneous support routines follow
3082 */
3083
3084static void
3085i386_protection_init()
3086{
3087	register int *kp, prot;
3088
3089	kp = protection_codes;
3090	for (prot = 0; prot < 8; prot++) {
3091		switch (prot) {
3092		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3093			/*
3094			 * Read access is also 0. There isn't any execute bit,
3095			 * so just make it readable.
3096			 */
3097		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3098		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3099		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3100			*kp++ = 0;
3101			break;
3102		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3103		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3104		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3105		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3106			*kp++ = PG_RW;
3107			break;
3108		}
3109	}
3110}
3111
3112/*
3113 * Map a set of physical memory pages into the kernel virtual
3114 * address space. Return a pointer to where it is mapped. This
3115 * routine is intended to be used for mapping device memory,
3116 * NOT real memory.
3117 */
3118void *
3119pmap_mapdev(pa, size)
3120	vm_offset_t pa;
3121	vm_size_t size;
3122{
3123	vm_offset_t va, tmpva;
3124	unsigned *pte;
3125
3126	size = roundup(size, PAGE_SIZE);
3127
3128	va = kmem_alloc_pageable(kernel_map, size);
3129	if (!va)
3130		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3131
3132	pa = pa & PG_FRAME;
3133	for (tmpva = va; size > 0;) {
3134		pte = (unsigned *)vtopte(tmpva);
3135		*pte = pa | PG_RW | PG_V | pgeflag;
3136		size -= PAGE_SIZE;
3137		tmpva += PAGE_SIZE;
3138		pa += PAGE_SIZE;
3139	}
3140	invltlb();
3141
3142	return ((void *) va);
3143}
3144
3145/*
3146 * perform the pmap work for mincore
3147 */
3148int
3149pmap_mincore(pmap, addr)
3150	pmap_t pmap;
3151	vm_offset_t addr;
3152{
3153
3154	unsigned *ptep, pte;
3155	int val = 0;
3156
3157	ptep = pmap_pte(pmap, addr);
3158	if (ptep == 0) {
3159		return 0;
3160	}
3161
3162	if (pte = *ptep) {
3163		vm_offset_t pa;
3164		val = MINCORE_INCORE;
3165		pa = pte & PG_FRAME;
3166
3167		/*
3168		 * Modified by us
3169		 */
3170		if (pte & PG_M)
3171			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3172		/*
3173		 * Modified by someone
3174		 */
3175		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
3176			pmap_is_modified(pa))
3177			val |= MINCORE_MODIFIED_OTHER;
3178		/*
3179		 * Referenced by us
3180		 */
3181		if (pte & PG_U)
3182			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3183
3184		/*
3185		 * Referenced by someone
3186		 */
3187		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
3188			pmap_ts_referenced(pa)) {
3189			val |= MINCORE_REFERENCED_OTHER;
3190			PHYS_TO_VM_PAGE(pa)->flags |= PG_REFERENCED;
3191		}
3192	}
3193	return val;
3194}
3195
3196void
3197pmap_activate(struct proc *p)
3198{
3199	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3200		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3201}
3202
3203vm_offset_t
3204pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3205
3206	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3207		return addr;
3208	}
3209
3210	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3211	return addr;
3212}
3213
3214
3215#if defined(PMAP_DEBUG)
3216pmap_pid_dump(int pid) {
3217	pmap_t pmap;
3218	struct proc *p;
3219	int npte = 0;
3220	int index;
3221	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3222		if (p->p_pid != pid)
3223			continue;
3224
3225		if (p->p_vmspace) {
3226			int i,j;
3227			index = 0;
3228			pmap = &p->p_vmspace->vm_pmap;
3229			for(i=0;i<1024;i++) {
3230				pd_entry_t *pde;
3231				unsigned *pte;
3232				unsigned base = i << PDRSHIFT;
3233
3234				pde = &pmap->pm_pdir[i];
3235				if (pde && pmap_pde_v(pde)) {
3236					for(j=0;j<1024;j++) {
3237						unsigned va = base + (j << PAGE_SHIFT);
3238						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3239							if (index) {
3240								index = 0;
3241								printf("\n");
3242							}
3243							return npte;
3244						}
3245						pte = pmap_pte_quick( pmap, va);
3246						if (pte && pmap_pte_v(pte)) {
3247							vm_offset_t pa;
3248							vm_page_t m;
3249							pa = *(int *)pte;
3250							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3251							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3252								va, pa, m->hold_count, m->wire_count, m->flags);
3253							npte++;
3254							index++;
3255							if (index >= 2) {
3256								index = 0;
3257								printf("\n");
3258							} else {
3259								printf(" ");
3260							}
3261						}
3262					}
3263				}
3264			}
3265		}
3266	}
3267	return npte;
3268}
3269#endif
3270
3271#if defined(DEBUG)
3272
3273static void	pads __P((pmap_t pm));
3274static void	pmap_pvdump __P((vm_offset_t pa));
3275
3276/* print address space of pmap*/
3277static void
3278pads(pm)
3279	pmap_t pm;
3280{
3281	unsigned va, i, j;
3282	unsigned *ptep;
3283
3284	if (pm == kernel_pmap)
3285		return;
3286	for (i = 0; i < 1024; i++)
3287		if (pm->pm_pdir[i])
3288			for (j = 0; j < 1024; j++) {
3289				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3290				if (pm == kernel_pmap && va < KERNBASE)
3291					continue;
3292				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3293					continue;
3294				ptep = pmap_pte_quick(pm, va);
3295				if (pmap_pte_v(ptep))
3296					printf("%x:%x ", va, *(int *) ptep);
3297			};
3298
3299}
3300
3301static void
3302pmap_pvdump(pa)
3303	vm_offset_t pa;
3304{
3305	pv_table_t *ppv;
3306	register pv_entry_t pv;
3307
3308	printf("pa %x", pa);
3309	ppv = pa_to_pvh(pa);
3310	for (pv = TAILQ_FIRST(&ppv->pv_list);
3311		pv;
3312		pv = TAILQ_NEXT(pv, pv_list)) {
3313#ifdef used_to_be
3314		printf(" -> pmap %x, va %x, flags %x",
3315		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
3316#endif
3317		printf(" -> pmap %x, va %x",
3318		    pv->pv_pmap, pv->pv_va);
3319		pads(pv->pv_pmap);
3320	}
3321	printf(" ");
3322}
3323#endif
3324