pmap.c revision 27535
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.149 1997/07/17 19:45:01 dyson Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_cpu.h"
72
73#define PMAP_LOCK 1
74#define PMAP_PVLIST 1
75
76#include <sys/param.h>
77#include <sys/systm.h>
78#include <sys/proc.h>
79#include <sys/malloc.h>
80#include <sys/msgbuf.h>
81#include <sys/vmmeter.h>
82#include <sys/mman.h>
83
84#include <vm/vm.h>
85#include <vm/vm_param.h>
86#include <vm/vm_prot.h>
87#include <sys/lock.h>
88#include <vm/vm_kern.h>
89#include <vm/vm_page.h>
90#include <vm/vm_map.h>
91#include <vm/vm_object.h>
92#include <vm/vm_extern.h>
93#include <vm/vm_pageout.h>
94#include <vm/vm_pager.h>
95
96#include <sys/user.h>
97
98#include <machine/cpu.h>
99#include <machine/cputypes.h>
100#include <machine/md_var.h>
101#include <machine/specialreg.h>
102#if defined(SMP) || defined(APIC_IO)
103#include <machine/smp.h>
104#include <machine/apic.h>
105#endif /* SMP || APIC_IO */
106
107#define PMAP_KEEP_PDIRS
108#ifndef PMAP_SHPGPERPROC
109#define PMAP_SHPGPERPROC 200
110#endif
111
112#if defined(DIAGNOSTIC)
113#define PMAP_DIAGNOSTIC
114#endif
115
116#if !defined(PMAP_DIAGNOSTIC)
117#define PMAP_INLINE __inline
118#else
119#define PMAP_INLINE
120#endif
121
122#define PTPHINT
123
124static void	init_pv_entries __P((int));
125
126/*
127 * Get PDEs and PTEs for user/kernel address space
128 */
129#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
130#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
131
132#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
133#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
134#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
135#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
136#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
137
138#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
139#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
140
141/*
142 * Given a map and a machine independent protection code,
143 * convert to a vax protection code.
144 */
145#define pte_prot(m, p)	(protection_codes[p])
146static int protection_codes[8];
147
148#define	pa_index(pa)		atop((pa) - vm_first_phys)
149#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
150
151static struct pmap kernel_pmap_store;
152pmap_t kernel_pmap;
153
154vm_offset_t avail_start;	/* PA of first available physical page */
155vm_offset_t avail_end;		/* PA of last available physical page */
156vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
157vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
158static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
159static vm_offset_t vm_first_phys;
160static int pgeflag;		/* PG_G or-in */
161static int pseflag;		/* PG_PS or-in */
162
163static int nkpt;
164static vm_page_t nkpg;
165vm_offset_t kernel_vm_end;
166
167extern vm_offset_t clean_sva, clean_eva;
168
169#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2)
170
171/*
172 * Data for the pv entry allocation mechanism
173 */
174static int pv_freelistcnt;
175TAILQ_HEAD (,pv_entry) pv_freelist = {0};
176static vm_offset_t pvva;
177static int npvvapg;
178
179/*
180 * All those kernel PT submaps that BSD is so fond of
181 */
182pt_entry_t *CMAP1 = 0;
183static pt_entry_t *CMAP2, *ptmmap;
184static pv_table_t *pv_table;
185caddr_t CADDR1 = 0, ptvmmap = 0;
186static caddr_t CADDR2;
187static pt_entry_t *msgbufmap;
188struct msgbuf *msgbufp=0;
189
190pt_entry_t *PMAP1 = 0;
191unsigned *PADDR1 = 0;
192
193static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
194static unsigned * get_ptbase __P((pmap_t pmap));
195static pv_entry_t get_pv_entry __P((void));
196static void	i386_protection_init __P((void));
197static void	pmap_alloc_pv_entry __P((void));
198static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
199
200static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
201static void	pmap_remove_all __P((vm_offset_t pa));
202static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
203				      vm_offset_t pa, vm_page_t mpte));
204static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
205					vm_offset_t sva));
206static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
207static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
208					vm_offset_t va));
209static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
210static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
211		vm_page_t mpte, vm_offset_t pa));
212
213static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
214
215static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
216static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
217static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
218static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
219static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
220static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
221vm_offset_t pmap_kmem_choose(vm_offset_t addr) ;
222
223#define PDSTACKMAX 6
224static vm_offset_t pdstack[PDSTACKMAX];
225static int pdstackptr;
226unsigned pdir4mb;
227
228/*
229 *	Routine:	pmap_pte
230 *	Function:
231 *		Extract the page table entry associated
232 *		with the given map/virtual_address pair.
233 */
234
235PMAP_INLINE unsigned *
236pmap_pte(pmap, va)
237	register pmap_t pmap;
238	vm_offset_t va;
239{
240	if (pmap && *pmap_pde(pmap, va)) {
241		return get_ptbase(pmap) + i386_btop(va);
242	}
243	return (0);
244}
245
246/*
247 * Move the kernel virtual free pointer to the next
248 * 4MB.  This is used to help improve performance
249 * by using a large (4MB) page for much of the kernel
250 * (.text, .data, .bss)
251 */
252vm_offset_t
253pmap_kmem_choose(vm_offset_t addr) {
254	vm_offset_t newaddr = addr;
255	if (cpu_feature & CPUID_PSE) {
256		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
257	}
258	return newaddr;
259}
260
261/*
262 *	Bootstrap the system enough to run with virtual memory.
263 *
264 *	On the i386 this is called after mapping has already been enabled
265 *	and just syncs the pmap module with what has already been done.
266 *	[We can't call it easily with mapping off since the kernel is not
267 *	mapped with PA == VA, hence we would have to relocate every address
268 *	from the linked base (virtual) address "KERNBASE" to the actual
269 *	(physical) address starting relative to 0]
270 */
271void
272pmap_bootstrap(firstaddr, loadaddr)
273	vm_offset_t firstaddr;
274	vm_offset_t loadaddr;
275{
276	vm_offset_t va;
277	pt_entry_t *pte;
278	int i, j;
279
280	avail_start = firstaddr;
281
282	/*
283	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
284	 * large. It should instead be correctly calculated in locore.s and
285	 * not based on 'first' (which is a physical address, not a virtual
286	 * address, for the start of unused physical memory). The kernel
287	 * page tables are NOT double mapped and thus should not be included
288	 * in this calculation.
289	 */
290	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
291	virtual_avail = pmap_kmem_choose(virtual_avail);
292
293	virtual_end = VM_MAX_KERNEL_ADDRESS;
294
295	/*
296	 * Initialize protection array.
297	 */
298	i386_protection_init();
299
300	/*
301	 * The kernel's pmap is statically allocated so we don't have to use
302	 * pmap_create, which is unlikely to work correctly at this part of
303	 * the boot sequence (XXX and which no longer exists).
304	 */
305	kernel_pmap = &kernel_pmap_store;
306
307	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
308
309	kernel_pmap->pm_count = 1;
310#if PMAP_PVLIST
311	TAILQ_INIT(&kernel_pmap->pm_pvlist);
312#endif
313	nkpt = NKPT;
314
315	/*
316	 * Reserve some special page table entries/VA space for temporary
317	 * mapping of pages.
318	 */
319#define	SYSMAP(c, p, v, n)	\
320	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
321
322	va = virtual_avail;
323	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
324
325	/*
326	 * CMAP1/CMAP2 are used for zeroing and copying pages.
327	 */
328	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
329	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
330
331	/*
332	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
333	 * XXX ptmmap is not used.
334	 */
335	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
336
337	/*
338	 * msgbufp is used to map the system message buffer.
339	 * XXX msgbufmap is not used.
340	 */
341	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
342	       atop(round_page(sizeof(struct msgbuf))))
343
344	/*
345	 * ptemap is used for pmap_pte_quick
346	 */
347	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
348
349	virtual_avail = va;
350
351	*(int *) CMAP1 = *(int *) CMAP2 = 0;
352	*(int *) PTD = 0;
353
354#ifdef SMP
355	if (cpu_apic_address == 0)
356		panic("pmap_bootstrap: no local apic!");
357
358	/* 0 = private page */
359	/* 1 = page table page */
360	/* 2 = local apic */
361	/* 16-31 = io apics */
362	SMP_prvpt[2] = PG_V | PG_RW | ((u_long)cpu_apic_address & PG_FRAME);
363
364	for (i = 0; i < mp_napics; i++) {
365		for (j = 0; j < 16; j++) {
366			/* same page frame as a previous IO apic? */
367			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) ==
368			    ((u_long)io_apic_address[0] & PG_FRAME)) {
369				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
370				break;
371			}
372			/* use this slot if available */
373			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
374				SMP_prvpt[j + 16] = PG_V | PG_RW |
375				    ((u_long)io_apic_address[i] & PG_FRAME);
376				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
377				break;
378			}
379		}
380		if (j == 16)
381			panic("no space to map IO apic %d!", i);
382	}
383#endif
384
385	invltlb();
386
387	pgeflag = 0;
388#if !defined(SMP)
389	if (cpu_feature & CPUID_PGE) {
390		pgeflag = PG_G;
391	}
392#endif
393
394/*
395 * Initialize the 4MB page size flag
396 */
397	pseflag = 0;
398/*
399 * The 4MB page version of the initial
400 * kernel page mapping.
401 */
402	pdir4mb = 0;
403
404#if !defined(DISABLE_PSE)
405	if (cpu_feature & CPUID_PSE) {
406		unsigned ptditmp;
407		/*
408		 * Enable the PSE mode
409		 */
410		load_cr4(rcr4() | CR4_PSE);
411
412		/*
413		 * Note that we have enabled PSE mode
414		 */
415		pseflag = PG_PS;
416		ptditmp = (unsigned) kernel_pmap->pm_pdir[KPTDI];
417		ptditmp &= ~(NBPDR - 1);
418		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
419		pdir4mb = ptditmp;
420		/*
421		 * We can do the mapping here for the single processor
422		 * case.  We simply ignore the old page table page from
423		 * now on.
424		 */
425#if !defined(SMP)
426		PTD[KPTDI] = (pd_entry_t) ptditmp;
427		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
428		invltlb();
429#endif
430	}
431#endif
432}
433
434/*
435 * Set 4mb pdir for mp startup, and global flags
436 */
437void
438pmap_set_opt(unsigned *pdir) {
439	int i;
440
441	if (pseflag && (cpu_feature & CPUID_PSE)) {
442		load_cr4(rcr4() | CR4_PSE);
443		if (pdir4mb) {
444			(unsigned) pdir[KPTDI] = pdir4mb;
445		}
446	}
447
448	if (pgeflag && (cpu_feature & CPUID_PGE)) {
449		load_cr4(rcr4() | CR4_PGE);
450		for(i = KPTDI; i < KPTDI + nkpt; i++) {
451			if (pdir[i]) {
452				pdir[i] |= PG_G;
453			}
454		}
455	}
456}
457
458/*
459 * Setup the PTD for the boot processor
460 */
461void
462pmap_set_opt_bsp(void) {
463	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
464	pmap_set_opt((unsigned *)PTD);
465	invltlb();
466}
467
468/*
469 *	Initialize the pmap module.
470 *	Called by vm_init, to initialize any structures that the pmap
471 *	system needs to map virtual memory.
472 *	pmap_init has been enhanced to support in a fairly consistant
473 *	way, discontiguous physical memory.
474 */
475void
476pmap_init(phys_start, phys_end)
477	vm_offset_t phys_start, phys_end;
478{
479	vm_offset_t addr;
480	vm_size_t s;
481	int i, npg;
482
483	/*
484	 * calculate the number of pv_entries needed
485	 */
486	vm_first_phys = phys_avail[0];
487	for (i = 0; phys_avail[i + 1]; i += 2);
488	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
489
490	/*
491	 * Allocate memory for random pmap data structures.  Includes the
492	 * pv_head_table.
493	 */
494	s = (vm_size_t) (sizeof(pv_table_t) * npg);
495	s = round_page(s);
496
497	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
498	pv_table = (pv_table_t *) addr;
499	for(i = 0; i < npg; i++) {
500		vm_offset_t pa;
501		TAILQ_INIT(&pv_table[i].pv_list);
502		pv_table[i].pv_list_count = 0;
503		pa = vm_first_phys + i * PAGE_SIZE;
504		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
505	}
506	TAILQ_INIT(&pv_freelist);
507
508	/*
509	 * init the pv free list
510	 */
511	init_pv_entries(npg);
512	/*
513	 * Now it is safe to enable pv_table recording.
514	 */
515	pmap_initialized = TRUE;
516}
517
518/*
519 *	Used to map a range of physical addresses into kernel
520 *	virtual address space.
521 *
522 *	For now, VM is already on, we only need to map the
523 *	specified memory.
524 */
525vm_offset_t
526pmap_map(virt, start, end, prot)
527	vm_offset_t virt;
528	vm_offset_t start;
529	vm_offset_t end;
530	int prot;
531{
532	while (start < end) {
533		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
534		virt += PAGE_SIZE;
535		start += PAGE_SIZE;
536	}
537	return (virt);
538}
539
540
541/***************************************************
542 * Low level helper routines.....
543 ***************************************************/
544
545#if defined(PMAP_DIAGNOSTIC)
546
547/*
548 * This code checks for non-writeable/modified pages.
549 * This should be an invalid condition.
550 */
551static int
552pmap_nw_modified(pt_entry_t ptea) {
553	int pte;
554
555	pte = (int) ptea;
556
557	if ((pte & (PG_M|PG_RW)) == PG_M)
558		return 1;
559	else
560		return 0;
561}
562#endif
563
564
565/*
566 * this routine defines the region(s) of memory that should
567 * not be tested for the modified bit.
568 */
569static PMAP_INLINE int
570pmap_track_modified( vm_offset_t va) {
571	if ((va < clean_sva) || (va >= clean_eva))
572		return 1;
573	else
574		return 0;
575}
576
577static PMAP_INLINE void
578invltlb_1pg( vm_offset_t va) {
579#if defined(I386_CPU)
580	if (cpu_class == CPUCLASS_386) {
581		invltlb();
582	} else
583#endif
584	{
585		invlpg(va);
586	}
587}
588
589static PMAP_INLINE void
590invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
591#if defined(I386_CPU)
592	if (cpu_class == CPUCLASS_386) {
593		invltlb();
594	} else
595#endif
596	{
597		invlpg(va1);
598		invlpg(va2);
599	}
600}
601
602static unsigned *
603get_ptbase(pmap)
604	pmap_t pmap;
605{
606	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
607
608	/* are we current address space or kernel? */
609	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
610		return (unsigned *) PTmap;
611	}
612	/* otherwise, we are alternate address space */
613	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
614		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
615		invltlb();
616	}
617	return (unsigned *) APTmap;
618}
619
620/*
621 * Super fast pmap_pte routine best used when scanning
622 * the pv lists.  This eliminates many coarse-grained
623 * invltlb calls.  Note that many of the pv list
624 * scans are across different pmaps.  It is very wasteful
625 * to do an entire invltlb for checking a single mapping.
626 */
627
628static unsigned *
629pmap_pte_quick(pmap, va)
630	register pmap_t pmap;
631	vm_offset_t va;
632{
633	unsigned pde, newpf;
634	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
635		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
636		unsigned index = i386_btop(va);
637		/* are we current address space or kernel? */
638		if ((pmap == kernel_pmap) ||
639			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
640			return (unsigned *) PTmap + index;
641		}
642		newpf = pde & PG_FRAME;
643		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
644			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
645			invltlb_1pg((vm_offset_t) PADDR1);
646		}
647		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
648	}
649	return (0);
650}
651
652/*
653 *	Routine:	pmap_extract
654 *	Function:
655 *		Extract the physical page address associated
656 *		with the given map/virtual_address pair.
657 */
658vm_offset_t
659pmap_extract(pmap, va)
660	register pmap_t pmap;
661	vm_offset_t va;
662{
663	vm_offset_t rtval;
664	vm_offset_t pdirindex;
665	pdirindex = va >> PDRSHIFT;
666	if (pmap) {
667		unsigned *pte;
668		if (((rtval = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
669			rtval &= ~(NBPDR - 1);
670			rtval |= va & (NBPDR - 1);
671			return rtval;
672		}
673		pte = get_ptbase(pmap) + i386_btop(va);
674		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
675		return rtval;
676	}
677	return 0;
678
679}
680
681/*
682 * determine if a page is managed (memory vs. device)
683 */
684static PMAP_INLINE int
685pmap_is_managed(pa)
686	vm_offset_t pa;
687{
688	int i;
689
690	if (!pmap_initialized)
691		return 0;
692
693	for (i = 0; phys_avail[i + 1]; i += 2) {
694		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
695			return 1;
696	}
697	return 0;
698}
699
700
701/***************************************************
702 * Low level mapping routines.....
703 ***************************************************/
704
705/*
706 * Add a list of wired pages to the kva
707 * this routine is only used for temporary
708 * kernel mappings that do not need to have
709 * page modification or references recorded.
710 * Note that old mappings are simply written
711 * over.  The page *must* be wired.
712 */
713void
714pmap_qenter(va, m, count)
715	vm_offset_t va;
716	vm_page_t *m;
717	int count;
718{
719	int i;
720	register unsigned *pte;
721
722	for (i = 0; i < count; i++) {
723		vm_offset_t tva = va + i * PAGE_SIZE;
724		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
725		unsigned opte;
726		pte = (unsigned *)vtopte(tva);
727		opte = *pte;
728		*pte = npte;
729		if (opte)
730			invltlb_1pg(tva);
731	}
732}
733
734/*
735 * this routine jerks page mappings from the
736 * kernel -- it is meant only for temporary mappings.
737 */
738void
739pmap_qremove(va, count)
740	vm_offset_t va;
741	int count;
742{
743	int i;
744	register unsigned *pte;
745
746	for (i = 0; i < count; i++) {
747		pte = (unsigned *)vtopte(va);
748		*pte = 0;
749		invltlb_1pg(va);
750		va += PAGE_SIZE;
751	}
752}
753
754/*
755 * add a wired page to the kva
756 * note that in order for the mapping to take effect -- you
757 * should do a invltlb after doing the pmap_kenter...
758 */
759PMAP_INLINE void
760pmap_kenter(va, pa)
761	vm_offset_t va;
762	register vm_offset_t pa;
763{
764	register unsigned *pte;
765	unsigned npte, opte;
766
767	npte = pa | PG_RW | PG_V | pgeflag;
768	pte = (unsigned *)vtopte(va);
769	opte = *pte;
770	*pte = npte;
771	if (opte)
772		invltlb_1pg(va);
773}
774
775/*
776 * remove a page from the kernel pagetables
777 */
778PMAP_INLINE void
779pmap_kremove(va)
780	vm_offset_t va;
781{
782	register unsigned *pte;
783
784	pte = (unsigned *)vtopte(va);
785	*pte = 0;
786	invltlb_1pg(va);
787}
788
789static vm_page_t
790pmap_page_alloc(object, pindex)
791	vm_object_t object;
792	vm_pindex_t pindex;
793{
794	vm_page_t m;
795	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
796	if (m == NULL) {
797		VM_WAIT;
798	}
799	return m;
800}
801
802static vm_page_t
803pmap_page_lookup(object, pindex)
804	vm_object_t object;
805	vm_pindex_t pindex;
806{
807	vm_page_t m;
808retry:
809	m = vm_page_lookup(object, pindex);
810	if (m) {
811		if (m->flags & PG_BUSY) {
812			m->flags |= PG_WANTED;
813			tsleep(m, PVM, "pplookp", 0);
814			goto retry;
815		}
816	}
817
818	return m;
819}
820
821/*
822 * Create the UPAGES for a new process.
823 * This routine directly affects the fork perf for a process.
824 */
825void
826pmap_new_proc(p)
827	struct proc *p;
828{
829	int i;
830	vm_object_t upobj;
831	vm_page_t m;
832	struct user *up;
833	unsigned *ptek;
834
835	/*
836	 * allocate object for the upages
837	 */
838	upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
839	p->p_upages_obj = upobj;
840
841	/* get a kernel virtual address for the UPAGES for this proc */
842	up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE);
843	if (up == NULL)
844		panic("pmap_new_proc: u_map allocation failed");
845
846	ptek = (unsigned *) vtopte((vm_offset_t) up);
847
848	for(i=0;i<UPAGES;i++) {
849		/*
850		 * Get a kernel stack page
851		 */
852		while ((m = vm_page_alloc(upobj,
853			i, VM_ALLOC_NORMAL)) == NULL) {
854			VM_WAIT;
855		}
856
857		/*
858		 * Wire the page
859		 */
860		m->wire_count++;
861		++cnt.v_wire_count;
862
863		/*
864		 * Enter the page into the kernel address space.
865		 */
866		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
867
868		m->flags &= ~(PG_ZERO|PG_BUSY);
869		m->flags |= PG_MAPPED|PG_WRITEABLE;
870		m->valid = VM_PAGE_BITS_ALL;
871	}
872
873	p->p_addr = up;
874}
875
876/*
877 * Dispose the UPAGES for a process that has exited.
878 * This routine directly impacts the exit perf of a process.
879 */
880void
881pmap_dispose_proc(p)
882	struct proc *p;
883{
884	int i;
885	vm_object_t upobj;
886	vm_page_t m;
887	unsigned *ptek;
888
889	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
890
891	upobj = p->p_upages_obj;
892
893	for(i=0;i<UPAGES;i++) {
894		unsigned oldpte;
895		if ((m = vm_page_lookup(upobj, i)) == NULL)
896			panic("pmap_dispose_proc: upage already missing???");
897		oldpte = *(ptek + i);
898		*(ptek + i) = 0;
899		if (oldpte & PG_G)
900			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
901		vm_page_unwire(m);
902		vm_page_free(m);
903	}
904
905	vm_object_deallocate(upobj);
906
907	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
908}
909
910/*
911 * Allow the UPAGES for a process to be prejudicially paged out.
912 */
913void
914pmap_swapout_proc(p)
915	struct proc *p;
916{
917	int i;
918	vm_object_t upobj;
919	vm_page_t m;
920
921	upobj = p->p_upages_obj;
922	/*
923	 * let the upages be paged
924	 */
925	for(i=0;i<UPAGES;i++) {
926		if ((m = vm_page_lookup(upobj, i)) == NULL)
927			panic("pmap_swapout_proc: upage already missing???");
928		m->dirty = VM_PAGE_BITS_ALL;
929		vm_page_unwire(m);
930		vm_page_deactivate(m);
931		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
932	}
933}
934
935/*
936 * Bring the UPAGES for a specified process back in.
937 */
938void
939pmap_swapin_proc(p)
940	struct proc *p;
941{
942	int i;
943	vm_object_t upobj;
944	vm_page_t m;
945	unsigned *pte;
946
947	upobj = p->p_upages_obj;
948	for(i=0;i<UPAGES;i++) {
949		int s;
950		s = splvm();
951retry:
952		if ((m = vm_page_lookup(upobj, i)) == NULL) {
953			if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) {
954				VM_WAIT;
955				goto retry;
956			}
957		} else {
958			if ((m->flags & PG_BUSY) || m->busy) {
959				m->flags |= PG_WANTED;
960				tsleep(m, PVM, "swinuw",0);
961				goto retry;
962			}
963			m->flags |= PG_BUSY;
964		}
965		vm_page_wire(m);
966		splx(s);
967
968		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
969			VM_PAGE_TO_PHYS(m));
970
971		if (m->valid != VM_PAGE_BITS_ALL) {
972			int rv;
973			rv = vm_pager_get_pages(upobj, &m, 1, 0);
974			if (rv != VM_PAGER_OK)
975				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
976			m->valid = VM_PAGE_BITS_ALL;
977		}
978		PAGE_WAKEUP(m);
979		m->flags |= PG_MAPPED|PG_WRITEABLE;
980	}
981}
982
983/***************************************************
984 * Page table page management routines.....
985 ***************************************************/
986
987/*
988 * This routine unholds page table pages, and if the hold count
989 * drops to zero, then it decrements the wire count.
990 */
991static int
992_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
993	int s;
994
995	if (m->flags & PG_BUSY) {
996		s = splvm();
997		while (m->flags & PG_BUSY) {
998			m->flags |= PG_WANTED;
999			tsleep(m, PVM, "pmuwpt", 0);
1000		}
1001		splx(s);
1002	}
1003
1004	if (m->hold_count == 0) {
1005		vm_offset_t pteva;
1006		/*
1007		 * unmap the page table page
1008		 */
1009		pmap->pm_pdir[m->pindex] = 0;
1010		--pmap->pm_stats.resident_count;
1011		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1012			(((unsigned) PTDpde) & PG_FRAME)) {
1013			/*
1014			 * Do a invltlb to make the invalidated mapping
1015			 * take effect immediately.
1016			 */
1017			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1018			invltlb_1pg(pteva);
1019		}
1020
1021#if defined(PTPHINT)
1022		if (pmap->pm_ptphint == m)
1023			pmap->pm_ptphint = NULL;
1024#endif
1025
1026		/*
1027		 * If the page is finally unwired, simply free it.
1028		 */
1029		--m->wire_count;
1030		if (m->wire_count == 0) {
1031
1032			if (m->flags & PG_WANTED) {
1033				m->flags &= ~PG_WANTED;
1034				wakeup(m);
1035			}
1036
1037			vm_page_free_zero(m);
1038			--cnt.v_wire_count;
1039		}
1040		return 1;
1041	}
1042	return 0;
1043}
1044
1045__inline static int
1046pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1047	vm_page_unhold(m);
1048	if (m->hold_count == 0)
1049		return _pmap_unwire_pte_hold(pmap, m);
1050	else
1051		return 0;
1052}
1053
1054/*
1055 * After removing a page table entry, this routine is used to
1056 * conditionally free the page, and manage the hold/wire counts.
1057 */
1058static int
1059pmap_unuse_pt(pmap, va, mpte)
1060	pmap_t pmap;
1061	vm_offset_t va;
1062	vm_page_t mpte;
1063{
1064	unsigned ptepindex;
1065	if (va >= UPT_MIN_ADDRESS)
1066		return 0;
1067
1068	if (mpte == NULL) {
1069		ptepindex = (va >> PDRSHIFT);
1070#if defined(PTPHINT)
1071		if (pmap->pm_ptphint &&
1072			(pmap->pm_ptphint->pindex == ptepindex)) {
1073			mpte = pmap->pm_ptphint;
1074		} else {
1075			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1076			pmap->pm_ptphint = mpte;
1077		}
1078#else
1079		mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1080#endif
1081	}
1082
1083	return pmap_unwire_pte_hold(pmap, mpte);
1084}
1085
1086/*
1087 * Initialize a preallocated and zeroed pmap structure,
1088 * such as one in a vmspace structure.
1089 */
1090void
1091pmap_pinit(pmap)
1092	register struct pmap *pmap;
1093{
1094	vm_page_t ptdpg;
1095	/*
1096	 * No need to allocate page table space yet but we do need a valid
1097	 * page directory table.
1098	 */
1099
1100	if (pdstackptr > 0) {
1101		--pdstackptr;
1102		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
1103	} else {
1104		pmap->pm_pdir =
1105			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1106	}
1107
1108	/*
1109	 * allocate object for the ptes
1110	 */
1111	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1112
1113	/*
1114	 * allocate the page directory page
1115	 */
1116retry:
1117	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
1118	if (ptdpg == NULL)
1119		goto retry;
1120
1121	ptdpg->wire_count = 1;
1122	++cnt.v_wire_count;
1123
1124	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
1125	ptdpg->valid = VM_PAGE_BITS_ALL;
1126
1127	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1128	if ((ptdpg->flags & PG_ZERO) == 0)
1129		bzero(pmap->pm_pdir, PAGE_SIZE);
1130
1131	/* wire in kernel global address entries */
1132	/* XXX copies current process, does not fill in MPPTDI */
1133	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1134
1135	/* install self-referential address mapping entry */
1136	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1137		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
1138
1139	pmap->pm_flags = 0;
1140	pmap->pm_count = 1;
1141	pmap->pm_ptphint = NULL;
1142#if PMAP_PVLIST
1143	TAILQ_INIT(&pmap->pm_pvlist);
1144#endif
1145}
1146
1147static int
1148pmap_release_free_page(pmap, p)
1149	struct pmap *pmap;
1150	vm_page_t p;
1151{
1152	int s;
1153	unsigned *pde = (unsigned *) pmap->pm_pdir;
1154	/*
1155	 * This code optimizes the case of freeing non-busy
1156	 * page-table pages.  Those pages are zero now, and
1157	 * might as well be placed directly into the zero queue.
1158	 */
1159	s = splvm();
1160	if (p->flags & PG_BUSY) {
1161		p->flags |= PG_WANTED;
1162		tsleep(p, PVM, "pmaprl", 0);
1163		splx(s);
1164		return 0;
1165	}
1166
1167	if (p->flags & PG_WANTED) {
1168		p->flags &= ~PG_WANTED;
1169		wakeup(p);
1170	}
1171
1172	/*
1173	 * Remove the page table page from the processes address space.
1174	 */
1175	pde[p->pindex] = 0;
1176	--pmap->pm_stats.resident_count;
1177
1178	if (p->hold_count)  {
1179		panic("pmap_release: freeing held page table page");
1180	}
1181	/*
1182	 * Page directory pages need to have the kernel
1183	 * stuff cleared, so they can go into the zero queue also.
1184	 */
1185	if (p->pindex == PTDPTDI) {
1186		bzero(pde + KPTDI, nkpt * PTESIZE);
1187#ifdef SMP
1188		pde[MPPTDI] = 0;
1189#endif
1190		pde[APTDPTDI] = 0;
1191		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1192	}
1193
1194#if defined(PTPHINT)
1195	if (pmap->pm_ptphint &&
1196		(pmap->pm_ptphint->pindex == p->pindex))
1197		pmap->pm_ptphint = NULL;
1198#endif
1199
1200	vm_page_free_zero(p);
1201	splx(s);
1202	return 1;
1203}
1204
1205/*
1206 * this routine is called if the page table page is not
1207 * mapped correctly.
1208 */
1209static vm_page_t
1210_pmap_allocpte(pmap, ptepindex)
1211	pmap_t	pmap;
1212	unsigned ptepindex;
1213{
1214	vm_offset_t pteva, ptepa;
1215	vm_page_t m;
1216	int needszero = 0;
1217
1218	/*
1219	 * Find or fabricate a new pagetable page
1220	 */
1221retry:
1222	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
1223	if (m == NULL) {
1224		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
1225		if (m == NULL)
1226			goto retry;
1227		if ((m->flags & PG_ZERO) == 0)
1228			needszero = 1;
1229		m->flags &= ~(PG_ZERO|PG_BUSY);
1230		m->valid = VM_PAGE_BITS_ALL;
1231	} else {
1232		if ((m->flags & PG_BUSY) || m->busy) {
1233			m->flags |= PG_WANTED;
1234			tsleep(m, PVM, "ptewai", 0);
1235			goto retry;
1236		}
1237	}
1238
1239	if (m->queue != PQ_NONE) {
1240		int s = splvm();
1241		vm_page_unqueue(m);
1242		splx(s);
1243	}
1244
1245	if (m->wire_count == 0)
1246		++cnt.v_wire_count;
1247	++m->wire_count;
1248
1249	/*
1250	 * Increment the hold count for the page table page
1251	 * (denoting a new mapping.)
1252	 */
1253	++m->hold_count;
1254
1255	/*
1256	 * Map the pagetable page into the process address space, if
1257	 * it isn't already there.
1258	 */
1259
1260	pmap->pm_stats.resident_count++;
1261
1262	ptepa = VM_PAGE_TO_PHYS(m);
1263	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
1264
1265#if defined(PTPHINT)
1266	/*
1267	 * Set the page table hint
1268	 */
1269	pmap->pm_ptphint = m;
1270#endif
1271
1272	/*
1273	 * Try to use the new mapping, but if we cannot, then
1274	 * do it with the routine that maps the page explicitly.
1275	 */
1276	if (needszero) {
1277		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1278			(((unsigned) PTDpde) & PG_FRAME)) {
1279			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1280			bzero((caddr_t) pteva, PAGE_SIZE);
1281		} else {
1282			pmap_zero_page(ptepa);
1283		}
1284	}
1285
1286	m->valid = VM_PAGE_BITS_ALL;
1287	m->flags |= PG_MAPPED;
1288
1289	return m;
1290}
1291
1292static vm_page_t
1293pmap_allocpte(pmap, va)
1294	pmap_t	pmap;
1295	vm_offset_t va;
1296{
1297	unsigned ptepindex;
1298	vm_offset_t ptepa;
1299	vm_page_t m;
1300
1301	/*
1302	 * Calculate pagetable page index
1303	 */
1304	ptepindex = va >> PDRSHIFT;
1305
1306	/*
1307	 * Get the page directory entry
1308	 */
1309	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1310
1311	/*
1312	 * This supports switching from a 4MB page to a
1313	 * normal 4K page.
1314	 */
1315	if (ptepa & PG_PS) {
1316		pmap->pm_pdir[ptepindex] = 0;
1317		ptepa = 0;
1318		invltlb();
1319	}
1320
1321	/*
1322	 * If the page table page is mapped, we just increment the
1323	 * hold count, and activate it.
1324	 */
1325	if (ptepa) {
1326#if defined(PTPHINT)
1327		/*
1328		 * In order to get the page table page, try the
1329		 * hint first.
1330		 */
1331		if (pmap->pm_ptphint &&
1332			(pmap->pm_ptphint->pindex == ptepindex)) {
1333			m = pmap->pm_ptphint;
1334		} else {
1335			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1336			pmap->pm_ptphint = m;
1337		}
1338#else
1339		m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1340#endif
1341		++m->hold_count;
1342		return m;
1343	}
1344	/*
1345	 * Here if the pte page isn't mapped, or if it has been deallocated.
1346	 */
1347	return _pmap_allocpte(pmap, ptepindex);
1348}
1349
1350
1351/***************************************************
1352* Pmap allocation/deallocation routines.
1353 ***************************************************/
1354
1355/*
1356 * Release any resources held by the given physical map.
1357 * Called when a pmap initialized by pmap_pinit is being released.
1358 * Should only be called if the map contains no valid mappings.
1359 */
1360void
1361pmap_release(pmap)
1362	register struct pmap *pmap;
1363{
1364	vm_page_t p,n,ptdpg;
1365	vm_object_t object = pmap->pm_pteobj;
1366
1367#if defined(DIAGNOSTIC)
1368	if (object->ref_count != 1)
1369		panic("pmap_release: pteobj reference count != 1");
1370#endif
1371
1372	ptdpg = NULL;
1373retry:
1374	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1375		n = TAILQ_NEXT(p, listq);
1376		if (p->pindex == PTDPTDI) {
1377			ptdpg = p;
1378			continue;
1379		}
1380		if (!pmap_release_free_page(pmap, p))
1381			goto retry;
1382	}
1383
1384	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1385		goto retry;
1386
1387	vm_object_deallocate(object);
1388	if (pdstackptr < PDSTACKMAX) {
1389		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
1390		++pdstackptr;
1391	} else {
1392		kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE);
1393	}
1394	pmap->pm_pdir = 0;
1395}
1396
1397/*
1398 * grow the number of kernel page table entries, if needed
1399 */
1400void
1401pmap_growkernel(vm_offset_t addr)
1402{
1403	struct proc *p;
1404	struct pmap *pmap;
1405	int s;
1406
1407	s = splhigh();
1408	if (kernel_vm_end == 0) {
1409		kernel_vm_end = KERNBASE;
1410		nkpt = 0;
1411		while (pdir_pde(PTD, kernel_vm_end)) {
1412			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1413			++nkpt;
1414		}
1415	}
1416	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1417	while (kernel_vm_end < addr) {
1418		if (pdir_pde(PTD, kernel_vm_end)) {
1419			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1420			continue;
1421		}
1422		++nkpt;
1423		if (!nkpg) {
1424			vm_offset_t ptpkva = (vm_offset_t) vtopte(addr);
1425			/*
1426			 * This index is bogus, but out of the way
1427			 */
1428			vm_pindex_t ptpidx = (ptpkva >> PAGE_SHIFT);
1429			nkpg = vm_page_alloc(kernel_object,
1430				ptpidx, VM_ALLOC_SYSTEM);
1431			if (!nkpg)
1432				panic("pmap_growkernel: no memory to grow kernel");
1433			vm_page_wire(nkpg);
1434			vm_page_remove(nkpg);
1435			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
1436		}
1437		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag);
1438		nkpg = NULL;
1439
1440		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1441			if (p->p_vmspace) {
1442				pmap = &p->p_vmspace->vm_pmap;
1443				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
1444			}
1445		}
1446		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
1447		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1448	}
1449	splx(s);
1450}
1451
1452/*
1453 *	Retire the given physical map from service.
1454 *	Should only be called if the map contains
1455 *	no valid mappings.
1456 */
1457void
1458pmap_destroy(pmap)
1459	register pmap_t pmap;
1460{
1461	int count;
1462
1463	if (pmap == NULL)
1464		return;
1465
1466	count = --pmap->pm_count;
1467	if (count == 0) {
1468		pmap_release(pmap);
1469		free((caddr_t) pmap, M_VMPMAP);
1470	}
1471}
1472
1473/*
1474 *	Add a reference to the specified pmap.
1475 */
1476void
1477pmap_reference(pmap)
1478	pmap_t pmap;
1479{
1480	if (pmap != NULL) {
1481		pmap->pm_count++;
1482	}
1483}
1484
1485/***************************************************
1486* page management routines.
1487 ***************************************************/
1488
1489/*
1490 * free the pv_entry back to the free list
1491 */
1492static PMAP_INLINE void
1493free_pv_entry(pv)
1494	pv_entry_t pv;
1495{
1496	++pv_freelistcnt;
1497	TAILQ_INSERT_HEAD(&pv_freelist, pv, pv_list);
1498}
1499
1500/*
1501 * get a new pv_entry, allocating a block from the system
1502 * when needed.
1503 * the memory allocation is performed bypassing the malloc code
1504 * because of the possibility of allocations at interrupt time.
1505 */
1506static pv_entry_t
1507get_pv_entry()
1508{
1509	pv_entry_t tmp;
1510
1511	/*
1512	 * get more pv_entry pages if needed
1513	 */
1514	if (pv_freelistcnt < PV_FREELIST_MIN || !TAILQ_FIRST(&pv_freelist)) {
1515		pmap_alloc_pv_entry();
1516	}
1517	/*
1518	 * get a pv_entry off of the free list
1519	 */
1520	--pv_freelistcnt;
1521	tmp = TAILQ_FIRST(&pv_freelist);
1522	TAILQ_REMOVE(&pv_freelist, tmp, pv_list);
1523	return tmp;
1524}
1525
1526/*
1527 * This *strange* allocation routine eliminates the possibility of a malloc
1528 * failure (*FATAL*) for a pv_entry_t data structure.
1529 * also -- this code is MUCH MUCH faster than the malloc equiv...
1530 * We really need to do the slab allocator thingie here.
1531 */
1532static void
1533pmap_alloc_pv_entry()
1534{
1535	/*
1536	 * do we have any pre-allocated map-pages left?
1537	 */
1538	if (npvvapg) {
1539		vm_page_t m;
1540
1541		/*
1542		 * allocate a physical page out of the vm system
1543		 */
1544		m = vm_page_alloc(kernel_object,
1545		    OFF_TO_IDX(pvva - vm_map_min(kernel_map)),
1546		    VM_ALLOC_INTERRUPT);
1547		if (m) {
1548			int newentries;
1549			int i;
1550			pv_entry_t entry;
1551
1552			newentries = (PAGE_SIZE / sizeof(struct pv_entry));
1553			/*
1554			 * wire the page
1555			 */
1556			vm_page_wire(m);
1557			m->flags &= ~PG_BUSY;
1558			/*
1559			 * let the kernel see it
1560			 */
1561			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
1562
1563			entry = (pv_entry_t) pvva;
1564			/*
1565			 * update the allocation pointers
1566			 */
1567			pvva += PAGE_SIZE;
1568			--npvvapg;
1569
1570			/*
1571			 * free the entries into the free list
1572			 */
1573			for (i = 0; i < newentries; i++) {
1574				free_pv_entry(entry);
1575				entry++;
1576			}
1577		}
1578	}
1579	if (!TAILQ_FIRST(&pv_freelist))
1580		panic("get_pv_entry: cannot get a pv_entry_t");
1581}
1582
1583/*
1584 * init the pv_entry allocation system
1585 */
1586void
1587init_pv_entries(npg)
1588	int npg;
1589{
1590	/*
1591	 * Allocate enough kvm space for one entry per page, and
1592	 * each process having PMAP_SHPGPERPROC pages shared with other
1593	 * processes.  (The system can panic if this is too small, but also
1594	 * can fail on bootup if this is too big.)
1595	 * XXX The pv management mechanism needs to be fixed so that systems
1596	 * with lots of shared mappings amongst lots of processes will still
1597	 * work.  The fix will likely be that once we run out of pv entries
1598	 * we will free other entries (and the associated mappings), with
1599	 * some policy yet to be determined.
1600	 */
1601	npvvapg = ((PMAP_SHPGPERPROC * maxproc + npg) * sizeof(struct pv_entry)
1602		+ PAGE_SIZE - 1) / PAGE_SIZE;
1603	pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE);
1604	/*
1605	 * get the first batch of entries
1606	 */
1607	pmap_alloc_pv_entry();
1608}
1609
1610/*
1611 * If it is the first entry on the list, it is actually
1612 * in the header and we must copy the following entry up
1613 * to the header.  Otherwise we must search the list for
1614 * the entry.  In either case we free the now unused entry.
1615 */
1616
1617static int
1618pmap_remove_entry(pmap, ppv, va)
1619	struct pmap *pmap;
1620	pv_table_t *ppv;
1621	vm_offset_t va;
1622{
1623	pv_entry_t pv;
1624	int rtval;
1625	int s;
1626
1627	s = splvm();
1628#if PMAP_PVLIST
1629	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1630#endif
1631		for (pv = TAILQ_FIRST(&ppv->pv_list);
1632			pv;
1633			pv = TAILQ_NEXT(pv, pv_list)) {
1634			if (pmap == pv->pv_pmap && va == pv->pv_va)
1635				break;
1636		}
1637#if PMAP_PVLIST
1638	} else {
1639		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1640			pv;
1641			pv = TAILQ_NEXT(pv, pv_plist)) {
1642			if (va == pv->pv_va)
1643				break;
1644		}
1645	}
1646#endif
1647
1648	rtval = 0;
1649	if (pv) {
1650		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1651		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1652		--ppv->pv_list_count;
1653		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
1654			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1655		}
1656
1657#if PMAP_PVLIST
1658		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1659#endif
1660		free_pv_entry(pv);
1661	}
1662
1663	splx(s);
1664	return rtval;
1665}
1666
1667/*
1668 * Create a pv entry for page at pa for
1669 * (pmap, va).
1670 */
1671static void
1672pmap_insert_entry(pmap, va, mpte, pa)
1673	pmap_t pmap;
1674	vm_offset_t va;
1675	vm_page_t mpte;
1676	vm_offset_t pa;
1677{
1678
1679	int s;
1680	pv_entry_t pv;
1681	pv_table_t *ppv;
1682
1683	s = splvm();
1684	pv = get_pv_entry();
1685	pv->pv_va = va;
1686	pv->pv_pmap = pmap;
1687	pv->pv_ptem = mpte;
1688
1689#if PMAP_PVLIST
1690	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1691#endif
1692
1693	ppv = pa_to_pvh(pa);
1694	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1695	++ppv->pv_list_count;
1696
1697	splx(s);
1698}
1699
1700/*
1701 * pmap_remove_pte: do the things to unmap a page in a process
1702 */
1703static int
1704pmap_remove_pte(pmap, ptq, va)
1705	struct pmap *pmap;
1706	unsigned *ptq;
1707	vm_offset_t va;
1708{
1709	unsigned oldpte;
1710	pv_table_t *ppv;
1711
1712	oldpte = *ptq;
1713	*ptq = 0;
1714	if (oldpte & PG_W)
1715		pmap->pm_stats.wired_count -= 1;
1716	/*
1717	 * Machines that don't support invlpg, also don't support
1718	 * PG_G.
1719	 */
1720	if (oldpte & PG_G)
1721		invlpg(va);
1722	pmap->pm_stats.resident_count -= 1;
1723	if (oldpte & PG_MANAGED) {
1724		ppv = pa_to_pvh(oldpte);
1725		if (oldpte & PG_M) {
1726#if defined(PMAP_DIAGNOSTIC)
1727			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1728				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
1729			}
1730#endif
1731			if (pmap_track_modified(va))
1732				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1733		}
1734		return pmap_remove_entry(pmap, ppv, va);
1735	} else {
1736		return pmap_unuse_pt(pmap, va, NULL);
1737	}
1738
1739	return 0;
1740}
1741
1742/*
1743 * Remove a single page from a process address space
1744 */
1745static void
1746pmap_remove_page(pmap, va)
1747	struct pmap *pmap;
1748	register vm_offset_t va;
1749{
1750	register unsigned *ptq;
1751
1752	/*
1753	 * if there is no pte for this address, just skip it!!!
1754	 */
1755	if (*pmap_pde(pmap, va) == 0) {
1756		return;
1757	}
1758
1759	/*
1760	 * get a local va for mappings for this pmap.
1761	 */
1762	ptq = get_ptbase(pmap) + i386_btop(va);
1763	if (*ptq) {
1764		(void) pmap_remove_pte(pmap, ptq, va);
1765		invltlb_1pg(va);
1766	}
1767	return;
1768}
1769
1770/*
1771 *	Remove the given range of addresses from the specified map.
1772 *
1773 *	It is assumed that the start and end are properly
1774 *	rounded to the page size.
1775 */
1776void
1777pmap_remove(pmap, sva, eva)
1778	struct pmap *pmap;
1779	register vm_offset_t sva;
1780	register vm_offset_t eva;
1781{
1782	register unsigned *ptbase;
1783	vm_offset_t pdnxt;
1784	vm_offset_t ptpaddr;
1785	vm_offset_t sindex, eindex;
1786	int anyvalid;
1787
1788	if (pmap == NULL)
1789		return;
1790
1791	if (pmap->pm_stats.resident_count == 0)
1792		return;
1793
1794	/*
1795	 * special handling of removing one page.  a very
1796	 * common operation and easy to short circuit some
1797	 * code.
1798	 */
1799	if (((sva + PAGE_SIZE) == eva) &&
1800		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1801		pmap_remove_page(pmap, sva);
1802		return;
1803	}
1804
1805	anyvalid = 0;
1806
1807	/*
1808	 * Get a local virtual address for the mappings that are being
1809	 * worked with.
1810	 */
1811	ptbase = get_ptbase(pmap);
1812
1813	sindex = i386_btop(sva);
1814	eindex = i386_btop(eva);
1815
1816	for (; sindex < eindex; sindex = pdnxt) {
1817		unsigned pdirindex;
1818
1819		/*
1820		 * Calculate index for next page table.
1821		 */
1822		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1823		if (pmap->pm_stats.resident_count == 0)
1824			break;
1825
1826		pdirindex = sindex / NPDEPG;
1827		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1828			pmap->pm_pdir[pdirindex] = 0;
1829			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1830			anyvalid++;
1831			continue;
1832		}
1833
1834		/*
1835		 * Weed out invalid mappings. Note: we assume that the page
1836		 * directory table is always allocated, and in kernel virtual.
1837		 */
1838		if (ptpaddr == 0)
1839			continue;
1840
1841		/*
1842		 * Limit our scan to either the end of the va represented
1843		 * by the current page table page, or to the end of the
1844		 * range being removed.
1845		 */
1846		if (pdnxt > eindex) {
1847			pdnxt = eindex;
1848		}
1849
1850		for ( ;sindex != pdnxt; sindex++) {
1851			vm_offset_t va;
1852			if (ptbase[sindex] == 0) {
1853				continue;
1854			}
1855			va = i386_ptob(sindex);
1856
1857			anyvalid++;
1858			if (pmap_remove_pte(pmap,
1859				ptbase + sindex, va))
1860				break;
1861		}
1862	}
1863
1864	if (anyvalid) {
1865		invltlb();
1866	}
1867}
1868
1869/*
1870 *	Routine:	pmap_remove_all
1871 *	Function:
1872 *		Removes this physical page from
1873 *		all physical maps in which it resides.
1874 *		Reflects back modify bits to the pager.
1875 *
1876 *	Notes:
1877 *		Original versions of this routine were very
1878 *		inefficient because they iteratively called
1879 *		pmap_remove (slow...)
1880 */
1881
1882static void
1883pmap_remove_all(pa)
1884	vm_offset_t pa;
1885{
1886	register pv_entry_t pv;
1887	pv_table_t *ppv;
1888	register unsigned *pte, tpte;
1889	int nmodify;
1890	int update_needed;
1891	int s;
1892
1893	nmodify = 0;
1894	update_needed = 0;
1895#if defined(PMAP_DIAGNOSTIC)
1896	/*
1897	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1898	 * pages!
1899	 */
1900	if (!pmap_is_managed(pa)) {
1901		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1902	}
1903#endif
1904
1905	s = splvm();
1906	ppv = pa_to_pvh(pa);
1907	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1908		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1909
1910		pv->pv_pmap->pm_stats.resident_count--;
1911
1912		tpte = *pte;
1913		*pte = 0;
1914		if (tpte & PG_W)
1915			pv->pv_pmap->pm_stats.wired_count--;
1916		/*
1917		 * Update the vm_page_t clean and reference bits.
1918		 */
1919		if (tpte & PG_M) {
1920#if defined(PMAP_DIAGNOSTIC)
1921			if (pmap_nw_modified((pt_entry_t) tpte)) {
1922				printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
1923			}
1924#endif
1925			if (pmap_track_modified(pv->pv_va))
1926				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1927		}
1928		if (!update_needed &&
1929			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1930			(pv->pv_pmap == kernel_pmap))) {
1931			update_needed = 1;
1932		}
1933
1934#if PMAP_PVLIST
1935		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1936#endif
1937		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1938		--ppv->pv_list_count;
1939		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1940		free_pv_entry(pv);
1941	}
1942	ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1943
1944
1945	if (update_needed)
1946		invltlb();
1947	splx(s);
1948	return;
1949}
1950
1951/*
1952 *	Set the physical protection on the
1953 *	specified range of this map as requested.
1954 */
1955void
1956pmap_protect(pmap, sva, eva, prot)
1957	register pmap_t pmap;
1958	vm_offset_t sva, eva;
1959	vm_prot_t prot;
1960{
1961	register unsigned *ptbase;
1962	vm_offset_t pdnxt;
1963	vm_offset_t ptpaddr;
1964	vm_offset_t sindex, eindex;
1965	int anychanged;
1966
1967
1968	if (pmap == NULL)
1969		return;
1970
1971	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1972		pmap_remove(pmap, sva, eva);
1973		return;
1974	}
1975
1976	anychanged = 0;
1977
1978	ptbase = get_ptbase(pmap);
1979
1980	sindex = i386_btop(sva);
1981	eindex = i386_btop(eva);
1982
1983	for (; sindex < eindex; sindex = pdnxt) {
1984
1985		unsigned pdirindex;
1986
1987		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1988
1989		pdirindex = sindex / NPDEPG;
1990		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1991			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1992			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1993			anychanged++;
1994			continue;
1995		}
1996
1997		/*
1998		 * Weed out invalid mappings. Note: we assume that the page
1999		 * directory table is always allocated, and in kernel virtual.
2000		 */
2001		if (ptpaddr == 0)
2002			continue;
2003
2004		if (pdnxt > eindex) {
2005			pdnxt = eindex;
2006		}
2007
2008		for (; sindex != pdnxt; sindex++) {
2009
2010			unsigned pbits = ptbase[sindex];
2011
2012			if (prot & VM_PROT_WRITE) {
2013				if ((pbits & (PG_RW|PG_V)) == PG_V) {
2014					if (pbits & PG_MANAGED) {
2015						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
2016						m->flags |= PG_WRITEABLE;
2017						m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
2018					}
2019					ptbase[sindex] = pbits | PG_RW;
2020					anychanged = 1;
2021				}
2022			} else if (pbits & PG_RW) {
2023				if (pbits & PG_M) {
2024					vm_offset_t sva = i386_ptob(sindex);
2025					if ((pbits & PG_MANAGED) && pmap_track_modified(sva)) {
2026						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
2027						m->dirty = VM_PAGE_BITS_ALL;
2028					}
2029				}
2030				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
2031				anychanged = 1;
2032			}
2033		}
2034	}
2035	if (anychanged)
2036		invltlb();
2037}
2038
2039/*
2040 *	Insert the given physical page (p) at
2041 *	the specified virtual address (v) in the
2042 *	target physical map with the protection requested.
2043 *
2044 *	If specified, the page will be wired down, meaning
2045 *	that the related pte can not be reclaimed.
2046 *
2047 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2048 *	or lose information.  That is, this routine must actually
2049 *	insert this page into the given map NOW.
2050 */
2051void
2052pmap_enter(pmap, va, pa, prot, wired)
2053	register pmap_t pmap;
2054	vm_offset_t va;
2055	register vm_offset_t pa;
2056	vm_prot_t prot;
2057	boolean_t wired;
2058{
2059	register unsigned *pte;
2060	vm_offset_t opa;
2061	vm_offset_t origpte, newpte;
2062	vm_page_t mpte;
2063
2064	if (pmap == NULL)
2065		return;
2066
2067	va &= PG_FRAME;
2068#ifdef PMAP_DIAGNOSTIC
2069	if (va > VM_MAX_KERNEL_ADDRESS)
2070		panic("pmap_enter: toobig");
2071	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
2072		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
2073#endif
2074
2075	mpte = NULL;
2076	/*
2077	 * In the case that a page table page is not
2078	 * resident, we are creating it here.
2079	 */
2080	if (va < UPT_MIN_ADDRESS)
2081		mpte = pmap_allocpte(pmap, va);
2082
2083	pte = pmap_pte(pmap, va);
2084	/*
2085	 * Page Directory table entry not valid, we need a new PT page
2086	 */
2087	if (pte == NULL) {
2088		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
2089			pmap->pm_pdir[PTDPTDI], va);
2090	}
2091
2092	origpte = *(vm_offset_t *)pte;
2093	pa &= PG_FRAME;
2094	opa = origpte & PG_FRAME;
2095	if (origpte & PG_PS)
2096		panic("pmap_enter: attempted pmap_enter on 4MB page");
2097
2098	/*
2099	 * Mapping has not changed, must be protection or wiring change.
2100	 */
2101	if (origpte && (opa == pa)) {
2102		/*
2103		 * Wiring change, just update stats. We don't worry about
2104		 * wiring PT pages as they remain resident as long as there
2105		 * are valid mappings in them. Hence, if a user page is wired,
2106		 * the PT page will be also.
2107		 */
2108		if (wired && ((origpte & PG_W) == 0))
2109			pmap->pm_stats.wired_count++;
2110		else if (!wired && (origpte & PG_W))
2111			pmap->pm_stats.wired_count--;
2112
2113#if defined(PMAP_DIAGNOSTIC)
2114		if (pmap_nw_modified((pt_entry_t) origpte)) {
2115			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
2116		}
2117#endif
2118
2119		/*
2120		 * We might be turning off write access to the page,
2121		 * so we go ahead and sense modify status.
2122		 */
2123		if (origpte & PG_MANAGED) {
2124			vm_page_t m;
2125			if (origpte & PG_M) {
2126				if (pmap_track_modified(va)) {
2127					m = PHYS_TO_VM_PAGE(pa);
2128					m->dirty = VM_PAGE_BITS_ALL;
2129				}
2130			}
2131			pa |= PG_MANAGED;
2132		}
2133
2134		if (mpte)
2135			--mpte->hold_count;
2136
2137		goto validate;
2138	}
2139	/*
2140	 * Mapping has changed, invalidate old range and fall through to
2141	 * handle validating new mapping.
2142	 */
2143	if (opa) {
2144		int err;
2145		err = pmap_remove_pte(pmap, pte, va);
2146		if (err)
2147			panic("pmap_enter: pte vanished, va: 0x%x", va);
2148	}
2149
2150	/*
2151	 * Enter on the PV list if part of our managed memory Note that we
2152	 * raise IPL while manipulating pv_table since pmap_enter can be
2153	 * called at interrupt time.
2154	 */
2155	if (pmap_is_managed(pa)) {
2156		pmap_insert_entry(pmap, va, mpte, pa);
2157		pa |= PG_MANAGED;
2158	}
2159
2160	/*
2161	 * Increment counters
2162	 */
2163	pmap->pm_stats.resident_count++;
2164	if (wired)
2165		pmap->pm_stats.wired_count++;
2166
2167validate:
2168	/*
2169	 * Now validate mapping with desired protection/wiring.
2170	 */
2171	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2172
2173	if (wired)
2174		newpte |= PG_W;
2175	if (va < UPT_MIN_ADDRESS)
2176		newpte |= PG_U;
2177	if (pmap == kernel_pmap)
2178		newpte |= pgeflag;
2179
2180	/*
2181	 * if the mapping or permission bits are different, we need
2182	 * to update the pte.
2183	 */
2184	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2185		*pte = newpte;
2186		if (origpte)
2187			invltlb_1pg(va);
2188	}
2189}
2190
2191/*
2192 * this code makes some *MAJOR* assumptions:
2193 * 1. Current pmap & pmap exists.
2194 * 2. Not wired.
2195 * 3. Read access.
2196 * 4. No page table pages.
2197 * 5. Tlbflush is deferred to calling procedure.
2198 * 6. Page IS managed.
2199 * but is *MUCH* faster than pmap_enter...
2200 */
2201
2202static vm_page_t
2203pmap_enter_quick(pmap, va, pa, mpte)
2204	register pmap_t pmap;
2205	vm_offset_t va;
2206	register vm_offset_t pa;
2207	vm_page_t mpte;
2208{
2209	register unsigned *pte;
2210
2211	/*
2212	 * In the case that a page table page is not
2213	 * resident, we are creating it here.
2214	 */
2215	if (va < UPT_MIN_ADDRESS) {
2216		unsigned ptepindex;
2217		vm_offset_t ptepa;
2218
2219		/*
2220		 * Calculate pagetable page index
2221		 */
2222		ptepindex = va >> PDRSHIFT;
2223		if (mpte && (mpte->pindex == ptepindex)) {
2224			++mpte->hold_count;
2225		} else {
2226retry:
2227			/*
2228			 * Get the page directory entry
2229			 */
2230			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2231
2232			/*
2233			 * If the page table page is mapped, we just increment
2234			 * the hold count, and activate it.
2235			 */
2236			if (ptepa) {
2237				if (ptepa & PG_PS)
2238					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2239#if defined(PTPHINT)
2240				if (pmap->pm_ptphint &&
2241					(pmap->pm_ptphint->pindex == ptepindex)) {
2242					mpte = pmap->pm_ptphint;
2243				} else {
2244					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2245					pmap->pm_ptphint = mpte;
2246				}
2247#else
2248				mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2249#endif
2250				if (mpte == NULL)
2251					goto retry;
2252				++mpte->hold_count;
2253			} else {
2254				mpte = _pmap_allocpte(pmap, ptepindex);
2255			}
2256		}
2257	} else {
2258		mpte = NULL;
2259	}
2260
2261	/*
2262	 * This call to vtopte makes the assumption that we are
2263	 * entering the page into the current pmap.  In order to support
2264	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2265	 * But that isn't as quick as vtopte.
2266	 */
2267	pte = (unsigned *)vtopte(va);
2268	if (*pte) {
2269		if (mpte)
2270			pmap_unwire_pte_hold(pmap, mpte);
2271		return 0;
2272	}
2273
2274	/*
2275	 * Enter on the PV list if part of our managed memory Note that we
2276	 * raise IPL while manipulating pv_table since pmap_enter can be
2277	 * called at interrupt time.
2278	 */
2279	pmap_insert_entry(pmap, va, mpte, pa);
2280
2281	/*
2282	 * Increment counters
2283	 */
2284	pmap->pm_stats.resident_count++;
2285
2286	/*
2287	 * Now validate mapping with RO protection
2288	 */
2289	*pte = pa | PG_V | PG_U | PG_MANAGED;
2290
2291	return mpte;
2292}
2293
2294#define MAX_INIT_PT (96)
2295/*
2296 * pmap_object_init_pt preloads the ptes for a given object
2297 * into the specified pmap.  This eliminates the blast of soft
2298 * faults on process startup and immediately after an mmap.
2299 */
2300void
2301pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2302	pmap_t pmap;
2303	vm_offset_t addr;
2304	vm_object_t object;
2305	vm_pindex_t pindex;
2306	vm_size_t size;
2307	int limit;
2308{
2309	vm_offset_t tmpidx;
2310	int psize;
2311	vm_page_t p, mpte;
2312	int objpgs;
2313
2314	if (!pmap)
2315		return;
2316
2317	/*
2318	 * This code maps large physical mmap regions into the
2319	 * processor address space.  Note that some shortcuts
2320	 * are taken, but the code works.
2321	 */
2322	if (pseflag &&
2323		(object->type == OBJT_DEVICE) &&
2324		((addr & (NBPDR - 1)) == 0) &&
2325		((size & (NBPDR - 1)) == 0) ) {
2326		int i;
2327		int s;
2328		vm_page_t m[1];
2329		unsigned int ptepindex;
2330		int npdes;
2331		vm_offset_t ptepa;
2332
2333		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2334			return;
2335
2336		s = splhigh();
2337retry:
2338		p = vm_page_lookup(object, pindex);
2339		if (p && (p->flags & PG_BUSY)) {
2340			tsleep(p, PVM, "init4p", 0);
2341			goto retry;
2342		}
2343		splx(s);
2344
2345		if (p == NULL) {
2346			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2347			if (p == NULL)
2348				return;
2349			m[0] = p;
2350
2351			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2352				PAGE_WAKEUP(p);
2353				vm_page_free(p);
2354				return;
2355			}
2356
2357			p = vm_page_lookup(object, pindex);
2358			PAGE_WAKEUP(p);
2359		}
2360
2361		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2362		if (ptepa & (NBPDR - 1)) {
2363			return;
2364		}
2365
2366		p->valid = VM_PAGE_BITS_ALL;
2367
2368		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2369		npdes = size >> PDRSHIFT;
2370		for(i=0;i<npdes;i++) {
2371			pmap->pm_pdir[ptepindex] =
2372				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2373			ptepa += NBPDR;
2374			ptepindex += 1;
2375		}
2376		p->flags |= PG_MAPPED;
2377		invltlb();
2378		return;
2379	}
2380
2381	psize = i386_btop(size);
2382
2383	if ((object->type != OBJT_VNODE) ||
2384		(limit && (psize > MAX_INIT_PT) &&
2385			(object->resident_page_count > MAX_INIT_PT))) {
2386		return;
2387	}
2388
2389	if (psize + pindex > object->size)
2390		psize = object->size - pindex;
2391
2392	mpte = NULL;
2393	/*
2394	 * if we are processing a major portion of the object, then scan the
2395	 * entire thing.
2396	 */
2397	if (psize > (object->size >> 2)) {
2398		objpgs = psize;
2399
2400		for (p = TAILQ_FIRST(&object->memq);
2401		    ((objpgs > 0) && (p != NULL));
2402		    p = TAILQ_NEXT(p, listq)) {
2403
2404			tmpidx = p->pindex;
2405			if (tmpidx < pindex) {
2406				continue;
2407			}
2408			tmpidx -= pindex;
2409			if (tmpidx >= psize) {
2410				continue;
2411			}
2412			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2413			    (p->busy == 0) &&
2414			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2415				if ((p->queue - p->pc) == PQ_CACHE)
2416					vm_page_deactivate(p);
2417				p->flags |= PG_BUSY;
2418				mpte = pmap_enter_quick(pmap,
2419					addr + i386_ptob(tmpidx),
2420					VM_PAGE_TO_PHYS(p), mpte);
2421				p->flags |= PG_MAPPED;
2422				PAGE_WAKEUP(p);
2423			}
2424			objpgs -= 1;
2425		}
2426	} else {
2427		/*
2428		 * else lookup the pages one-by-one.
2429		 */
2430		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2431			p = vm_page_lookup(object, tmpidx + pindex);
2432			if (p &&
2433			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2434			    (p->busy == 0) &&
2435			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2436				if ((p->queue - p->pc) == PQ_CACHE)
2437					vm_page_deactivate(p);
2438				p->flags |= PG_BUSY;
2439				mpte = pmap_enter_quick(pmap,
2440					addr + i386_ptob(tmpidx),
2441					VM_PAGE_TO_PHYS(p), mpte);
2442				p->flags |= PG_MAPPED;
2443				PAGE_WAKEUP(p);
2444			}
2445		}
2446	}
2447	return;
2448}
2449
2450/*
2451 * pmap_prefault provides a quick way of clustering
2452 * pagefaults into a processes address space.  It is a "cousin"
2453 * of pmap_object_init_pt, except it runs at page fault time instead
2454 * of mmap time.
2455 */
2456#define PFBAK 2
2457#define PFFOR 2
2458#define PAGEORDER_SIZE (PFBAK+PFFOR)
2459
2460static int pmap_prefault_pageorder[] = {
2461	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
2462};
2463
2464void
2465pmap_prefault(pmap, addra, entry, object)
2466	pmap_t pmap;
2467	vm_offset_t addra;
2468	vm_map_entry_t entry;
2469	vm_object_t object;
2470{
2471	int i;
2472	vm_offset_t starta;
2473	vm_offset_t addr;
2474	vm_pindex_t pindex;
2475	vm_page_t m, mpte;
2476
2477	if (entry->object.vm_object != object)
2478		return;
2479
2480	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2481		return;
2482
2483	starta = addra - PFBAK * PAGE_SIZE;
2484	if (starta < entry->start) {
2485		starta = entry->start;
2486	} else if (starta > addra) {
2487		starta = 0;
2488	}
2489
2490	mpte = NULL;
2491	for (i = 0; i < PAGEORDER_SIZE; i++) {
2492		vm_object_t lobject;
2493		unsigned *pte;
2494
2495		addr = addra + pmap_prefault_pageorder[i];
2496		if (addr < starta || addr >= entry->end)
2497			continue;
2498
2499		if ((*pmap_pde(pmap, addr)) == NULL)
2500			continue;
2501
2502		pte = (unsigned *) vtopte(addr);
2503		if (*pte)
2504			continue;
2505
2506		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2507		lobject = object;
2508		for (m = vm_page_lookup(lobject, pindex);
2509		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2510		    lobject = lobject->backing_object) {
2511			if (lobject->backing_object_offset & PAGE_MASK)
2512				break;
2513			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2514			m = vm_page_lookup(lobject->backing_object, pindex);
2515		}
2516
2517		/*
2518		 * give-up when a page is not in memory
2519		 */
2520		if (m == NULL)
2521			break;
2522
2523		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2524		    (m->busy == 0) &&
2525		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2526
2527			if ((m->queue - m->pc) == PQ_CACHE) {
2528				vm_page_deactivate(m);
2529			}
2530			m->flags |= PG_BUSY;
2531			mpte = pmap_enter_quick(pmap, addr,
2532				VM_PAGE_TO_PHYS(m), mpte);
2533			m->flags |= PG_MAPPED;
2534			PAGE_WAKEUP(m);
2535		}
2536	}
2537}
2538
2539/*
2540 *	Routine:	pmap_change_wiring
2541 *	Function:	Change the wiring attribute for a map/virtual-address
2542 *			pair.
2543 *	In/out conditions:
2544 *			The mapping must already exist in the pmap.
2545 */
2546void
2547pmap_change_wiring(pmap, va, wired)
2548	register pmap_t pmap;
2549	vm_offset_t va;
2550	boolean_t wired;
2551{
2552	register unsigned *pte;
2553
2554	if (pmap == NULL)
2555		return;
2556
2557	pte = pmap_pte(pmap, va);
2558
2559	if (wired && !pmap_pte_w(pte))
2560		pmap->pm_stats.wired_count++;
2561	else if (!wired && pmap_pte_w(pte))
2562		pmap->pm_stats.wired_count--;
2563
2564	/*
2565	 * Wiring is not a hardware characteristic so there is no need to
2566	 * invalidate TLB.
2567	 */
2568	pmap_pte_set_w(pte, wired);
2569}
2570
2571
2572
2573/*
2574 *	Copy the range specified by src_addr/len
2575 *	from the source map to the range dst_addr/len
2576 *	in the destination map.
2577 *
2578 *	This routine is only advisory and need not do anything.
2579 */
2580
2581void
2582pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2583	pmap_t dst_pmap, src_pmap;
2584	vm_offset_t dst_addr;
2585	vm_size_t len;
2586	vm_offset_t src_addr;
2587{
2588	vm_offset_t addr;
2589	vm_offset_t end_addr = src_addr + len;
2590	vm_offset_t pdnxt;
2591	unsigned src_frame, dst_frame;
2592
2593	if (dst_addr != src_addr)
2594		return;
2595
2596	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2597	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2598		return;
2599	}
2600
2601	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2602	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2603		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2604		invltlb();
2605	}
2606
2607	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2608		unsigned *src_pte, *dst_pte;
2609		vm_page_t dstmpte, srcmpte;
2610		vm_offset_t srcptepaddr;
2611		unsigned ptepindex;
2612
2613		if (addr >= UPT_MIN_ADDRESS)
2614			panic("pmap_copy: invalid to pmap_copy page tables\n");
2615
2616		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2617		ptepindex = addr >> PDRSHIFT;
2618
2619		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2620		if (srcptepaddr == 0)
2621			continue;
2622
2623		if (srcptepaddr & PG_PS) {
2624			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2625				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2626				dst_pmap->pm_stats.resident_count += NBPDR;
2627			}
2628			continue;
2629		}
2630
2631		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2632		if ((srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2633			continue;
2634
2635		if (pdnxt > end_addr)
2636			pdnxt = end_addr;
2637
2638		src_pte = (unsigned *) vtopte(addr);
2639		dst_pte = (unsigned *) avtopte(addr);
2640		while (addr < pdnxt) {
2641			unsigned ptetemp;
2642			ptetemp = *src_pte;
2643			/*
2644			 * we only virtual copy managed pages
2645			 */
2646			if ((ptetemp & PG_MANAGED) != 0) {
2647				/*
2648				 * We have to check after allocpte for the
2649				 * pte still being around...  allocpte can
2650				 * block.
2651				 */
2652				dstmpte = pmap_allocpte(dst_pmap, addr);
2653				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2654					/*
2655					 * Clear the modified and
2656					 * accessed (referenced) bits
2657					 * during the copy.
2658					 */
2659					*dst_pte = ptetemp & ~(PG_M|PG_A);
2660					dst_pmap->pm_stats.resident_count++;
2661					pmap_insert_entry(dst_pmap, addr,
2662						dstmpte,
2663						(ptetemp & PG_FRAME));
2664	 			} else {
2665					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2666				}
2667				if (dstmpte->hold_count >= srcmpte->hold_count)
2668					break;
2669			}
2670			addr += PAGE_SIZE;
2671			++src_pte;
2672			++dst_pte;
2673		}
2674	}
2675}
2676
2677/*
2678 *	Routine:	pmap_kernel
2679 *	Function:
2680 *		Returns the physical map handle for the kernel.
2681 */
2682pmap_t
2683pmap_kernel()
2684{
2685	return (kernel_pmap);
2686}
2687
2688/*
2689 *	pmap_zero_page zeros the specified (machine independent)
2690 *	page by mapping the page into virtual memory and using
2691 *	bzero to clear its contents, one machine dependent page
2692 *	at a time.
2693 */
2694void
2695pmap_zero_page(phys)
2696	vm_offset_t phys;
2697{
2698	if (*(int *) CMAP2)
2699		panic("pmap_zero_page: CMAP busy");
2700
2701	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
2702	bzero(CADDR2, PAGE_SIZE);
2703	*(int *) CMAP2 = 0;
2704	invltlb_1pg((vm_offset_t) CADDR2);
2705}
2706
2707/*
2708 *	pmap_copy_page copies the specified (machine independent)
2709 *	page by mapping the page into virtual memory and using
2710 *	bcopy to copy the page, one machine dependent page at a
2711 *	time.
2712 */
2713void
2714pmap_copy_page(src, dst)
2715	vm_offset_t src;
2716	vm_offset_t dst;
2717{
2718	if (*(int *) CMAP1 || *(int *) CMAP2)
2719		panic("pmap_copy_page: CMAP busy");
2720
2721	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2722	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2723
2724	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2725
2726	*(int *) CMAP1 = 0;
2727	*(int *) CMAP2 = 0;
2728	invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
2729}
2730
2731
2732/*
2733 *	Routine:	pmap_pageable
2734 *	Function:
2735 *		Make the specified pages (by pmap, offset)
2736 *		pageable (or not) as requested.
2737 *
2738 *		A page which is not pageable may not take
2739 *		a fault; therefore, its page table entry
2740 *		must remain valid for the duration.
2741 *
2742 *		This routine is merely advisory; pmap_enter
2743 *		will specify that these pages are to be wired
2744 *		down (or not) as appropriate.
2745 */
2746void
2747pmap_pageable(pmap, sva, eva, pageable)
2748	pmap_t pmap;
2749	vm_offset_t sva, eva;
2750	boolean_t pageable;
2751{
2752}
2753
2754/*
2755 * this routine returns true if a physical page resides
2756 * in the given pmap.
2757 */
2758boolean_t
2759pmap_page_exists(pmap, pa)
2760	pmap_t pmap;
2761	vm_offset_t pa;
2762{
2763	register pv_entry_t pv;
2764	pv_table_t *ppv;
2765	int s;
2766
2767	if (!pmap_is_managed(pa))
2768		return FALSE;
2769
2770	s = splvm();
2771
2772	ppv = pa_to_pvh(pa);
2773	/*
2774	 * Not found, check current mappings returning immediately if found.
2775	 */
2776	for (pv = TAILQ_FIRST(&ppv->pv_list);
2777		pv;
2778		pv = TAILQ_NEXT(pv, pv_list)) {
2779		if (pv->pv_pmap == pmap) {
2780			splx(s);
2781			return TRUE;
2782		}
2783	}
2784	splx(s);
2785	return (FALSE);
2786}
2787
2788#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2789/*
2790 * Remove all pages from specified address space
2791 * this aids process exit speeds.  Also, this code
2792 * is special cased for current process only, but
2793 * can have the more generic (and slightly slower)
2794 * mode enabled.  This is much faster than pmap_remove
2795 * in the case of running down an entire address space.
2796 */
2797void
2798pmap_remove_pages(pmap, sva, eva)
2799	pmap_t pmap;
2800	vm_offset_t sva, eva;
2801{
2802	unsigned *pte, tpte;
2803	pv_table_t *ppv;
2804	pv_entry_t pv, npv;
2805	int s;
2806
2807#if PMAP_PVLIST
2808
2809#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2810	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2811		printf("warning: pmap_remove_pages called with non-current pmap\n");
2812		return;
2813	}
2814#endif
2815
2816	s = splvm();
2817	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2818		pv;
2819		pv = npv) {
2820
2821		if (pv->pv_va >= eva || pv->pv_va < sva) {
2822			npv = TAILQ_NEXT(pv, pv_plist);
2823			continue;
2824		}
2825
2826#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2827		pte = (unsigned *)vtopte(pv->pv_va);
2828#else
2829		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2830#endif
2831		tpte = *pte;
2832
2833/*
2834 * We cannot remove wired pages from a process' mapping at this time
2835 */
2836		if (tpte & PG_W) {
2837			npv = TAILQ_NEXT(pv, pv_plist);
2838			continue;
2839		}
2840		*pte = 0;
2841
2842		ppv = pa_to_pvh(tpte);
2843
2844		pv->pv_pmap->pm_stats.resident_count--;
2845
2846		/*
2847		 * Update the vm_page_t clean and reference bits.
2848		 */
2849		if (tpte & PG_M) {
2850			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2851		}
2852
2853
2854		npv = TAILQ_NEXT(pv, pv_plist);
2855		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2856
2857		--ppv->pv_list_count;
2858		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2859		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2860			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
2861		}
2862
2863		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2864		free_pv_entry(pv);
2865	}
2866	splx(s);
2867	invltlb();
2868#endif
2869}
2870
2871/*
2872 * pmap_testbit tests bits in pte's
2873 * note that the testbit/changebit routines are inline,
2874 * and a lot of things compile-time evaluate.
2875 */
2876static boolean_t
2877pmap_testbit(pa, bit)
2878	register vm_offset_t pa;
2879	int bit;
2880{
2881	register pv_entry_t pv;
2882	pv_table_t *ppv;
2883	unsigned *pte;
2884	int s;
2885
2886	if (!pmap_is_managed(pa))
2887		return FALSE;
2888
2889	ppv = pa_to_pvh(pa);
2890	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
2891		return FALSE;
2892
2893	s = splvm();
2894
2895	for (pv = TAILQ_FIRST(&ppv->pv_list);
2896		pv;
2897		pv = TAILQ_NEXT(pv, pv_list)) {
2898
2899		/*
2900		 * if the bit being tested is the modified bit, then
2901		 * mark clean_map and ptes as never
2902		 * modified.
2903		 */
2904		if (bit & (PG_A|PG_M)) {
2905			if (!pmap_track_modified(pv->pv_va))
2906				continue;
2907		}
2908
2909#if defined(PMAP_DIAGNOSTIC)
2910		if (!pv->pv_pmap) {
2911			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
2912			continue;
2913		}
2914#endif
2915		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2916		if (*pte & bit) {
2917			splx(s);
2918			return TRUE;
2919		}
2920	}
2921	splx(s);
2922	return (FALSE);
2923}
2924
2925/*
2926 * this routine is used to modify bits in ptes
2927 */
2928static void
2929pmap_changebit(pa, bit, setem)
2930	vm_offset_t pa;
2931	int bit;
2932	boolean_t setem;
2933{
2934	register pv_entry_t pv;
2935	pv_table_t *ppv;
2936	register unsigned *pte;
2937	int changed;
2938	int s;
2939
2940	if (!pmap_is_managed(pa))
2941		return;
2942
2943	s = splvm();
2944	changed = 0;
2945	ppv = pa_to_pvh(pa);
2946
2947	/*
2948	 * Loop over all current mappings setting/clearing as appropos If
2949	 * setting RO do we need to clear the VAC?
2950	 */
2951	for (pv = TAILQ_FIRST(&ppv->pv_list);
2952		pv;
2953		pv = TAILQ_NEXT(pv, pv_list)) {
2954
2955		/*
2956		 * don't write protect pager mappings
2957		 */
2958		if (!setem && (bit == PG_RW)) {
2959			if (!pmap_track_modified(pv->pv_va))
2960				continue;
2961		}
2962
2963#if defined(PMAP_DIAGNOSTIC)
2964		if (!pv->pv_pmap) {
2965			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
2966			continue;
2967		}
2968#endif
2969
2970		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2971
2972		if (setem) {
2973			*(int *)pte |= bit;
2974			changed = 1;
2975		} else {
2976			vm_offset_t pbits = *(vm_offset_t *)pte;
2977			if (pbits & bit) {
2978				changed = 1;
2979				if (bit == PG_RW) {
2980					if (pbits & PG_M) {
2981						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2982					}
2983					*(int *)pte = pbits & ~(PG_M|PG_RW);
2984				} else {
2985					*(int *)pte = pbits & ~bit;
2986				}
2987			}
2988		}
2989	}
2990	splx(s);
2991	if (changed)
2992		invltlb();
2993}
2994
2995/*
2996 *      pmap_page_protect:
2997 *
2998 *      Lower the permission for all mappings to a given page.
2999 */
3000void
3001pmap_page_protect(phys, prot)
3002	vm_offset_t phys;
3003	vm_prot_t prot;
3004{
3005	if ((prot & VM_PROT_WRITE) == 0) {
3006		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
3007			pmap_changebit(phys, PG_RW, FALSE);
3008		} else {
3009			pmap_remove_all(phys);
3010		}
3011	}
3012}
3013
3014vm_offset_t
3015pmap_phys_address(ppn)
3016	int ppn;
3017{
3018	return (i386_ptob(ppn));
3019}
3020
3021/*
3022 *	pmap_ts_referenced:
3023 *
3024 *	Return the count of reference bits for a page, clearing all of them.
3025 *
3026 */
3027int
3028pmap_ts_referenced(vm_offset_t pa)
3029{
3030	register pv_entry_t pv;
3031	pv_table_t *ppv;
3032	unsigned *pte;
3033	int s;
3034	int rtval = 0;
3035
3036	if (!pmap_is_managed(pa))
3037		return FALSE;
3038
3039	s = splvm();
3040
3041	ppv = pa_to_pvh(pa);
3042
3043	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3044		splx(s);
3045		return 0;
3046	}
3047
3048	/*
3049	 * Not found, check current mappings returning immediately if found.
3050	 */
3051	for (pv = TAILQ_FIRST(&ppv->pv_list);
3052		pv;
3053		pv = TAILQ_NEXT(pv, pv_list)) {
3054		/*
3055		 * if the bit being tested is the modified bit, then
3056		 * mark clean_map and ptes as never
3057		 * modified.
3058		 */
3059		if (!pmap_track_modified(pv->pv_va))
3060			continue;
3061
3062		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3063		if (pte == NULL) {
3064			continue;
3065		}
3066		if (*pte & PG_A) {
3067			rtval++;
3068			*pte &= ~PG_A;
3069		}
3070	}
3071	splx(s);
3072	if (rtval) {
3073		invltlb();
3074	}
3075	return (rtval);
3076}
3077
3078/*
3079 *	pmap_is_modified:
3080 *
3081 *	Return whether or not the specified physical page was modified
3082 *	in any physical maps.
3083 */
3084boolean_t
3085pmap_is_modified(vm_offset_t pa)
3086{
3087	return pmap_testbit((pa), PG_M);
3088}
3089
3090/*
3091 *	Clear the modify bits on the specified physical page.
3092 */
3093void
3094pmap_clear_modify(vm_offset_t pa)
3095{
3096	pmap_changebit((pa), PG_M, FALSE);
3097}
3098
3099/*
3100 *	pmap_clear_reference:
3101 *
3102 *	Clear the reference bit on the specified physical page.
3103 */
3104void
3105pmap_clear_reference(vm_offset_t pa)
3106{
3107	pmap_changebit((pa), PG_A, FALSE);
3108}
3109
3110/*
3111 * Miscellaneous support routines follow
3112 */
3113
3114static void
3115i386_protection_init()
3116{
3117	register int *kp, prot;
3118
3119	kp = protection_codes;
3120	for (prot = 0; prot < 8; prot++) {
3121		switch (prot) {
3122		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3123			/*
3124			 * Read access is also 0. There isn't any execute bit,
3125			 * so just make it readable.
3126			 */
3127		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3128		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3129		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3130			*kp++ = 0;
3131			break;
3132		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3133		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3134		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3135		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3136			*kp++ = PG_RW;
3137			break;
3138		}
3139	}
3140}
3141
3142/*
3143 * Map a set of physical memory pages into the kernel virtual
3144 * address space. Return a pointer to where it is mapped. This
3145 * routine is intended to be used for mapping device memory,
3146 * NOT real memory. The non-cacheable bits are set on each
3147 * mapped page.
3148 */
3149void *
3150pmap_mapdev(pa, size)
3151	vm_offset_t pa;
3152	vm_size_t size;
3153{
3154	vm_offset_t va, tmpva;
3155	unsigned *pte;
3156
3157	size = roundup(size, PAGE_SIZE);
3158
3159	va = kmem_alloc_pageable(kernel_map, size);
3160	if (!va)
3161		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3162
3163	pa = pa & PG_FRAME;
3164	for (tmpva = va; size > 0;) {
3165		pte = (unsigned *)vtopte(tmpva);
3166		*pte = pa | PG_RW | PG_V | pgeflag;
3167		size -= PAGE_SIZE;
3168		tmpva += PAGE_SIZE;
3169		pa += PAGE_SIZE;
3170	}
3171	invltlb();
3172
3173	return ((void *) va);
3174}
3175
3176/*
3177 * perform the pmap work for mincore
3178 */
3179int
3180pmap_mincore(pmap, addr)
3181	pmap_t pmap;
3182	vm_offset_t addr;
3183{
3184
3185	unsigned *ptep, pte;
3186	int val = 0;
3187
3188	ptep = pmap_pte(pmap, addr);
3189	if (ptep == 0) {
3190		return 0;
3191	}
3192
3193	if (pte = *ptep) {
3194		vm_offset_t pa;
3195		val = MINCORE_INCORE;
3196		pa = pte & PG_FRAME;
3197
3198		/*
3199		 * Modified by us
3200		 */
3201		if (pte & PG_M)
3202			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3203		/*
3204		 * Modified by someone
3205		 */
3206		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
3207			pmap_is_modified(pa))
3208			val |= MINCORE_MODIFIED_OTHER;
3209		/*
3210		 * Referenced by us
3211		 */
3212		if (pte & PG_U)
3213			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3214
3215		/*
3216		 * Referenced by someone
3217		 */
3218		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
3219			pmap_ts_referenced(pa)) {
3220			val |= MINCORE_REFERENCED_OTHER;
3221			PHYS_TO_VM_PAGE(pa)->flags |= PG_REFERENCED;
3222		}
3223	}
3224	return val;
3225}
3226
3227void
3228pmap_activate(struct proc *p)
3229{
3230	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3231		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3232}
3233
3234vm_offset_t
3235pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3236
3237	if ((size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3238		return addr;
3239	}
3240
3241	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3242	return addr;
3243}
3244
3245
3246#if defined(PMAP_DEBUG)
3247pmap_pid_dump(int pid) {
3248	pmap_t pmap;
3249	struct proc *p;
3250	int npte = 0;
3251	int index;
3252	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3253		if (p->p_pid != pid)
3254			continue;
3255
3256		if (p->p_vmspace) {
3257			int i,j;
3258			index = 0;
3259			pmap = &p->p_vmspace->vm_pmap;
3260			for(i=0;i<1024;i++) {
3261				pd_entry_t *pde;
3262				unsigned *pte;
3263				unsigned base = i << PDRSHIFT;
3264
3265				pde = &pmap->pm_pdir[i];
3266				if (pde && pmap_pde_v(pde)) {
3267					for(j=0;j<1024;j++) {
3268						unsigned va = base + (j << PAGE_SHIFT);
3269						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3270							if (index) {
3271								index = 0;
3272								printf("\n");
3273							}
3274							return npte;
3275						}
3276						pte = pmap_pte_quick( pmap, va);
3277						if (pte && pmap_pte_v(pte)) {
3278							vm_offset_t pa;
3279							vm_page_t m;
3280							pa = *(int *)pte;
3281							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3282							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3283								va, pa, m->hold_count, m->wire_count, m->flags);
3284							npte++;
3285							index++;
3286							if (index >= 2) {
3287								index = 0;
3288								printf("\n");
3289							} else {
3290								printf(" ");
3291							}
3292						}
3293					}
3294				}
3295			}
3296		}
3297	}
3298	return npte;
3299}
3300#endif
3301
3302#if defined(DEBUG)
3303
3304static void	pads __P((pmap_t pm));
3305static void	pmap_pvdump __P((vm_offset_t pa));
3306
3307/* print address space of pmap*/
3308static void
3309pads(pm)
3310	pmap_t pm;
3311{
3312	unsigned va, i, j;
3313	unsigned *ptep;
3314
3315	if (pm == kernel_pmap)
3316		return;
3317	for (i = 0; i < 1024; i++)
3318		if (pm->pm_pdir[i])
3319			for (j = 0; j < 1024; j++) {
3320				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3321				if (pm == kernel_pmap && va < KERNBASE)
3322					continue;
3323				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3324					continue;
3325				ptep = pmap_pte_quick(pm, va);
3326				if (pmap_pte_v(ptep))
3327					printf("%x:%x ", va, *(int *) ptep);
3328			};
3329
3330}
3331
3332static void
3333pmap_pvdump(pa)
3334	vm_offset_t pa;
3335{
3336	pv_table_t *ppv;
3337	register pv_entry_t pv;
3338
3339	printf("pa %x", pa);
3340	ppv = pa_to_pvh(pa);
3341	for (pv = TAILQ_FIRST(&ppv->pv_list);
3342		pv;
3343		pv = TAILQ_NEXT(pv, pv_list)) {
3344#ifdef used_to_be
3345		printf(" -> pmap %x, va %x, flags %x",
3346		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
3347#endif
3348		printf(" -> pmap %x, va %x",
3349		    pv->pv_pmap, pv->pv_va);
3350		pads(pv->pv_pmap);
3351	}
3352	printf(" ");
3353}
3354#endif
3355