pmap.c revision 28808
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.159 1997/08/25 21:53:01 bde Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_cpu.h"
72
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/msgbuf.h>
78#include <sys/vmmeter.h>
79#include <sys/mman.h>
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_prot.h>
84#include <sys/lock.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/vm_zone.h>
93
94#include <sys/user.h>
95
96#include <machine/cpu.h>
97#include <machine/cputypes.h>
98#include <machine/md_var.h>
99#include <machine/specialreg.h>
100#if defined(SMP) || defined(APIC_IO)
101#include <machine/smp.h>
102#include <machine/apic.h>
103#endif /* SMP || APIC_IO */
104
105#define PMAP_KEEP_PDIRS
106#ifndef PMAP_SHPGPERPROC
107#define PMAP_SHPGPERPROC 200
108#endif
109
110#if defined(DIAGNOSTIC)
111#define PMAP_DIAGNOSTIC
112#endif
113
114#if !defined(PMAP_DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#define PTPHINT
121
122/*
123 * Get PDEs and PTEs for user/kernel address space
124 */
125#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
126#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
127
128#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
129#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
130#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
131#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
132#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
133
134#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
135#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
136
137/*
138 * Given a map and a machine independent protection code,
139 * convert to a vax protection code.
140 */
141#define pte_prot(m, p)	(protection_codes[p])
142static int protection_codes[8];
143
144#define	pa_index(pa)		atop((pa) - vm_first_phys)
145#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
146
147static struct pmap kernel_pmap_store;
148pmap_t kernel_pmap;
149
150vm_offset_t avail_start;	/* PA of first available physical page */
151vm_offset_t avail_end;		/* PA of last available physical page */
152vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
153vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
154static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
155static vm_offset_t vm_first_phys;
156int pgeflag;		/* PG_G or-in */
157int pseflag;		/* PG_PS or-in */
158int pv_npg;
159
160static int nkpt;
161static vm_page_t nkpg;
162vm_offset_t kernel_vm_end;
163
164extern vm_offset_t clean_sva, clean_eva;
165
166/*
167 * Data for the pv entry allocation mechanism
168 */
169vm_zone_t pvzone;
170struct vm_zone pvzone_store;
171struct vm_object pvzone_obj;
172#define NPVINIT 8192
173struct pv_entry pvinit[NPVINIT];
174
175/*
176 * All those kernel PT submaps that BSD is so fond of
177 */
178pt_entry_t *CMAP1 = 0;
179static pt_entry_t *CMAP2, *ptmmap;
180static pv_table_t *pv_table;
181caddr_t CADDR1 = 0, ptvmmap = 0;
182static caddr_t CADDR2;
183static pt_entry_t *msgbufmap;
184struct msgbuf *msgbufp=0;
185
186#ifdef SMP
187extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
188extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
189extern pd_entry_t *IdlePTDS[];
190extern pt_entry_t SMP_prvpt[];
191#endif
192
193pt_entry_t *PMAP1 = 0;
194unsigned *PADDR1 = 0;
195
196static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
197static unsigned * get_ptbase __P((pmap_t pmap));
198static pv_entry_t get_pv_entry __P((void));
199static void	i386_protection_init __P((void));
200static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
201
202static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
203static void	pmap_remove_all __P((vm_offset_t pa));
204static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
205				      vm_offset_t pa, vm_page_t mpte));
206static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
207					vm_offset_t sva));
208static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
209static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
210					vm_offset_t va));
211static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
212static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
213		vm_page_t mpte, vm_offset_t pa));
214
215static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
216
217static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
218static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
219static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
220static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
221static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
222static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
223vm_offset_t pmap_kmem_choose(vm_offset_t addr) ;
224
225#define PDSTACKMAX 6
226static vm_offset_t pdstack[PDSTACKMAX];
227static int pdstackptr;
228unsigned pdir4mb;
229
230/*
231 *	Routine:	pmap_pte
232 *	Function:
233 *		Extract the page table entry associated
234 *		with the given map/virtual_address pair.
235 */
236
237PMAP_INLINE unsigned *
238pmap_pte(pmap, va)
239	register pmap_t pmap;
240	vm_offset_t va;
241{
242	unsigned *pdeaddr;
243
244	if (pmap) {
245		pdeaddr = (unsigned *) pmap_pde(pmap, va);
246		if (*pdeaddr & PG_PS)
247			return pdeaddr;
248		if (*pdeaddr) {
249			return get_ptbase(pmap) + i386_btop(va);
250		}
251	}
252	return (0);
253}
254
255/*
256 * Move the kernel virtual free pointer to the next
257 * 4MB.  This is used to help improve performance
258 * by using a large (4MB) page for much of the kernel
259 * (.text, .data, .bss)
260 */
261vm_offset_t
262pmap_kmem_choose(vm_offset_t addr) {
263	vm_offset_t newaddr = addr;
264#ifndef DISABLE_PSE
265	if (cpu_feature & CPUID_PSE) {
266		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
267	}
268#endif
269	return newaddr;
270}
271
272/*
273 *	Bootstrap the system enough to run with virtual memory.
274 *
275 *	On the i386 this is called after mapping has already been enabled
276 *	and just syncs the pmap module with what has already been done.
277 *	[We can't call it easily with mapping off since the kernel is not
278 *	mapped with PA == VA, hence we would have to relocate every address
279 *	from the linked base (virtual) address "KERNBASE" to the actual
280 *	(physical) address starting relative to 0]
281 */
282void
283pmap_bootstrap(firstaddr, loadaddr)
284	vm_offset_t firstaddr;
285	vm_offset_t loadaddr;
286{
287	vm_offset_t va;
288	pt_entry_t *pte;
289	int i, j;
290
291	avail_start = firstaddr;
292
293	/*
294	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
295	 * large. It should instead be correctly calculated in locore.s and
296	 * not based on 'first' (which is a physical address, not a virtual
297	 * address, for the start of unused physical memory). The kernel
298	 * page tables are NOT double mapped and thus should not be included
299	 * in this calculation.
300	 */
301	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
302	virtual_avail = pmap_kmem_choose(virtual_avail);
303
304	virtual_end = VM_MAX_KERNEL_ADDRESS;
305
306	/*
307	 * Initialize protection array.
308	 */
309	i386_protection_init();
310
311	/*
312	 * The kernel's pmap is statically allocated so we don't have to use
313	 * pmap_create, which is unlikely to work correctly at this part of
314	 * the boot sequence (XXX and which no longer exists).
315	 */
316	kernel_pmap = &kernel_pmap_store;
317
318	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
319
320	kernel_pmap->pm_count = 1;
321	TAILQ_INIT(&kernel_pmap->pm_pvlist);
322	nkpt = NKPT;
323
324	/*
325	 * Reserve some special page table entries/VA space for temporary
326	 * mapping of pages.
327	 */
328#define	SYSMAP(c, p, v, n)	\
329	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
330
331	va = virtual_avail;
332	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
333
334	/*
335	 * CMAP1/CMAP2 are used for zeroing and copying pages.
336	 */
337	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
338	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
339
340	/*
341	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
342	 * XXX ptmmap is not used.
343	 */
344	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
345
346	/*
347	 * msgbufp is used to map the system message buffer.
348	 * XXX msgbufmap is not used.
349	 */
350	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
351	       atop(round_page(sizeof(struct msgbuf))))
352
353	/*
354	 * ptemap is used for pmap_pte_quick
355	 */
356	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
357
358	virtual_avail = va;
359
360	*(int *) CMAP1 = *(int *) CMAP2 = 0;
361	*(int *) PTD = 0;
362
363
364	pgeflag = 0;
365#if !defined(SMP)
366	if (cpu_feature & CPUID_PGE) {
367		pgeflag = PG_G;
368	}
369#endif
370
371/*
372 * Initialize the 4MB page size flag
373 */
374	pseflag = 0;
375/*
376 * The 4MB page version of the initial
377 * kernel page mapping.
378 */
379	pdir4mb = 0;
380
381#if !defined(DISABLE_PSE)
382	if (cpu_feature & CPUID_PSE) {
383		unsigned ptditmp;
384		/*
385		 * Enable the PSE mode
386		 */
387		load_cr4(rcr4() | CR4_PSE);
388
389		/*
390		 * Note that we have enabled PSE mode
391		 */
392		pseflag = PG_PS;
393		ptditmp = (unsigned) kernel_pmap->pm_pdir[KPTDI];
394		ptditmp &= ~(NBPDR - 1);
395		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
396		pdir4mb = ptditmp;
397		/*
398		 * We can do the mapping here for the single processor
399		 * case.  We simply ignore the old page table page from
400		 * now on.
401		 */
402#if !defined(SMP)
403		PTD[KPTDI] = (pd_entry_t) ptditmp;
404		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
405		invltlb();
406#endif
407	}
408#endif
409
410#ifdef SMP
411	if (cpu_apic_address == 0)
412		panic("pmap_bootstrap: no local apic!");
413
414	/* 0 = private page */
415	/* 1 = page table page */
416	/* 2 = local apic */
417	/* 16-31 = io apics */
418	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME));
419
420	for (i = 0; i < mp_napics; i++) {
421		for (j = 0; j < 16; j++) {
422			/* same page frame as a previous IO apic? */
423			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) ==
424			    ((u_long)io_apic_address[0] & PG_FRAME)) {
425				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
426				break;
427			}
428			/* use this slot if available */
429			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
430				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
431				    ((u_long)io_apic_address[i] & PG_FRAME));
432				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
433				break;
434			}
435		}
436		if (j == 16)
437			panic("no space to map IO apic %d!", i);
438	}
439
440	/* BSP does this itself, AP's get it pre-set */
441	prv_CMAP1 = (pt_entry_t *)&SMP_prvpt[4];
442	prv_CMAP2 = (pt_entry_t *)&SMP_prvpt[5];
443	prv_CMAP3 = (pt_entry_t *)&SMP_prvpt[6];
444#endif
445
446	invltlb();
447
448}
449
450/*
451 * Set 4mb pdir for mp startup, and global flags
452 */
453void
454pmap_set_opt(unsigned *pdir) {
455	int i;
456
457	if (pseflag && (cpu_feature & CPUID_PSE)) {
458		load_cr4(rcr4() | CR4_PSE);
459		if (pdir4mb) {
460			(unsigned) pdir[KPTDI] = pdir4mb;
461		}
462	}
463
464	if (pgeflag && (cpu_feature & CPUID_PGE)) {
465		load_cr4(rcr4() | CR4_PGE);
466		for(i = KPTDI; i < KPTDI + nkpt; i++) {
467			if (pdir[i]) {
468				pdir[i] |= PG_G;
469			}
470		}
471	}
472}
473
474/*
475 * Setup the PTD for the boot processor
476 */
477void
478pmap_set_opt_bsp(void)
479{
480	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
481	pmap_set_opt((unsigned *)PTD);
482	invltlb();
483}
484
485/*
486 *	Initialize the pmap module.
487 *	Called by vm_init, to initialize any structures that the pmap
488 *	system needs to map virtual memory.
489 *	pmap_init has been enhanced to support in a fairly consistant
490 *	way, discontiguous physical memory.
491 */
492void
493pmap_init(phys_start, phys_end)
494	vm_offset_t phys_start, phys_end;
495{
496	vm_offset_t addr;
497	vm_size_t s;
498	int i;
499
500	/*
501	 * calculate the number of pv_entries needed
502	 */
503	vm_first_phys = phys_avail[0];
504	for (i = 0; phys_avail[i + 1]; i += 2);
505	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
506
507	/*
508	 * Allocate memory for random pmap data structures.  Includes the
509	 * pv_head_table.
510	 */
511	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
512	s = round_page(s);
513
514	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
515	pv_table = (pv_table_t *) addr;
516	for(i = 0; i < pv_npg; i++) {
517		vm_offset_t pa;
518		TAILQ_INIT(&pv_table[i].pv_list);
519		pv_table[i].pv_list_count = 0;
520		pa = vm_first_phys + i * PAGE_SIZE;
521		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
522	}
523
524	/*
525	 * init the pv free list
526	 */
527	pvzone = &pvzone_store;
528	zbootinit(pvzone, "PV ENTRY", sizeof(pvinit[0]), pvinit, NPVINIT);
529
530	/*
531	 * Now it is safe to enable pv_table recording.
532	 */
533	pmap_initialized = TRUE;
534}
535
536void
537pmap_init2() {
538	zinitna(pvzone, &pvzone_obj, NULL, 0,
539		PMAP_SHPGPERPROC * maxproc + pv_npg, ZONE_INTERRUPT, 4);
540}
541
542/*
543 *	Used to map a range of physical addresses into kernel
544 *	virtual address space.
545 *
546 *	For now, VM is already on, we only need to map the
547 *	specified memory.
548 */
549vm_offset_t
550pmap_map(virt, start, end, prot)
551	vm_offset_t virt;
552	vm_offset_t start;
553	vm_offset_t end;
554	int prot;
555{
556	while (start < end) {
557		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
558		virt += PAGE_SIZE;
559		start += PAGE_SIZE;
560	}
561	return (virt);
562}
563
564
565/***************************************************
566 * Low level helper routines.....
567 ***************************************************/
568
569#if defined(PMAP_DIAGNOSTIC)
570
571/*
572 * This code checks for non-writeable/modified pages.
573 * This should be an invalid condition.
574 */
575static int
576pmap_nw_modified(pt_entry_t ptea) {
577	int pte;
578
579	pte = (int) ptea;
580
581	if ((pte & (PG_M|PG_RW)) == PG_M)
582		return 1;
583	else
584		return 0;
585}
586#endif
587
588
589/*
590 * this routine defines the region(s) of memory that should
591 * not be tested for the modified bit.
592 */
593static PMAP_INLINE int
594pmap_track_modified( vm_offset_t va) {
595	if ((va < clean_sva) || (va >= clean_eva))
596		return 1;
597	else
598		return 0;
599}
600
601static PMAP_INLINE void
602invltlb_1pg( vm_offset_t va) {
603#if defined(I386_CPU)
604	if (cpu_class == CPUCLASS_386) {
605		invltlb();
606	} else
607#endif
608	{
609		invlpg(va);
610	}
611}
612
613static PMAP_INLINE void
614invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
615#if defined(I386_CPU)
616	if (cpu_class == CPUCLASS_386) {
617		invltlb();
618	} else
619#endif
620	{
621		invlpg(va1);
622		invlpg(va2);
623	}
624}
625
626static unsigned *
627get_ptbase(pmap)
628	pmap_t pmap;
629{
630	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
631
632	/* are we current address space or kernel? */
633	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
634		return (unsigned *) PTmap;
635	}
636	/* otherwise, we are alternate address space */
637	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
638		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
639		invltlb();
640	}
641	return (unsigned *) APTmap;
642}
643
644/*
645 * Super fast pmap_pte routine best used when scanning
646 * the pv lists.  This eliminates many coarse-grained
647 * invltlb calls.  Note that many of the pv list
648 * scans are across different pmaps.  It is very wasteful
649 * to do an entire invltlb for checking a single mapping.
650 */
651
652static unsigned *
653pmap_pte_quick(pmap, va)
654	register pmap_t pmap;
655	vm_offset_t va;
656{
657	unsigned pde, newpf;
658	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
659		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
660		unsigned index = i386_btop(va);
661		/* are we current address space or kernel? */
662		if ((pmap == kernel_pmap) ||
663			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
664			return (unsigned *) PTmap + index;
665		}
666		newpf = pde & PG_FRAME;
667		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
668			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
669			invltlb_1pg((vm_offset_t) PADDR1);
670		}
671		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
672	}
673	return (0);
674}
675
676/*
677 *	Routine:	pmap_extract
678 *	Function:
679 *		Extract the physical page address associated
680 *		with the given map/virtual_address pair.
681 */
682vm_offset_t
683pmap_extract(pmap, va)
684	register pmap_t pmap;
685	vm_offset_t va;
686{
687	vm_offset_t rtval;
688	vm_offset_t pdirindex;
689	pdirindex = va >> PDRSHIFT;
690	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
691		unsigned *pte;
692		if ((rtval & PG_PS) != 0) {
693			rtval &= ~(NBPDR - 1);
694			rtval |= va & (NBPDR - 1);
695			return rtval;
696		}
697		pte = get_ptbase(pmap) + i386_btop(va);
698		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
699		return rtval;
700	}
701	return 0;
702
703}
704
705/*
706 * determine if a page is managed (memory vs. device)
707 */
708static PMAP_INLINE int
709pmap_is_managed(pa)
710	vm_offset_t pa;
711{
712	int i;
713
714	if (!pmap_initialized)
715		return 0;
716
717	for (i = 0; phys_avail[i + 1]; i += 2) {
718		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
719			return 1;
720	}
721	return 0;
722}
723
724
725/***************************************************
726 * Low level mapping routines.....
727 ***************************************************/
728
729/*
730 * Add a list of wired pages to the kva
731 * this routine is only used for temporary
732 * kernel mappings that do not need to have
733 * page modification or references recorded.
734 * Note that old mappings are simply written
735 * over.  The page *must* be wired.
736 */
737void
738pmap_qenter(va, m, count)
739	vm_offset_t va;
740	vm_page_t *m;
741	int count;
742{
743	int i;
744	register unsigned *pte;
745
746	for (i = 0; i < count; i++) {
747		vm_offset_t tva = va + i * PAGE_SIZE;
748		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
749		unsigned opte;
750		pte = (unsigned *)vtopte(tva);
751		opte = *pte;
752		*pte = npte;
753		if (opte)
754			invltlb_1pg(tva);
755	}
756}
757
758/*
759 * this routine jerks page mappings from the
760 * kernel -- it is meant only for temporary mappings.
761 */
762void
763pmap_qremove(va, count)
764	vm_offset_t va;
765	int count;
766{
767	int i;
768	register unsigned *pte;
769
770	for (i = 0; i < count; i++) {
771		pte = (unsigned *)vtopte(va);
772		*pte = 0;
773		invltlb_1pg(va);
774		va += PAGE_SIZE;
775	}
776}
777
778/*
779 * add a wired page to the kva
780 * note that in order for the mapping to take effect -- you
781 * should do a invltlb after doing the pmap_kenter...
782 */
783PMAP_INLINE void
784pmap_kenter(va, pa)
785	vm_offset_t va;
786	register vm_offset_t pa;
787{
788	register unsigned *pte;
789	unsigned npte, opte;
790
791	npte = pa | PG_RW | PG_V | pgeflag;
792	pte = (unsigned *)vtopte(va);
793	opte = *pte;
794	*pte = npte;
795	if (opte)
796		invltlb_1pg(va);
797}
798
799/*
800 * remove a page from the kernel pagetables
801 */
802PMAP_INLINE void
803pmap_kremove(va)
804	vm_offset_t va;
805{
806	register unsigned *pte;
807
808	pte = (unsigned *)vtopte(va);
809	*pte = 0;
810	invltlb_1pg(va);
811}
812
813static vm_page_t
814pmap_page_alloc(object, pindex)
815	vm_object_t object;
816	vm_pindex_t pindex;
817{
818	vm_page_t m;
819	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
820	if (m == NULL) {
821		VM_WAIT;
822	}
823	return m;
824}
825
826static vm_page_t
827pmap_page_lookup(object, pindex)
828	vm_object_t object;
829	vm_pindex_t pindex;
830{
831	vm_page_t m;
832retry:
833	m = vm_page_lookup(object, pindex);
834	if (m) {
835		if (m->flags & PG_BUSY) {
836			m->flags |= PG_WANTED;
837			tsleep(m, PVM, "pplookp", 0);
838			goto retry;
839		}
840	}
841
842	return m;
843}
844
845/*
846 * Create the UPAGES for a new process.
847 * This routine directly affects the fork perf for a process.
848 */
849void
850pmap_new_proc(p)
851	struct proc *p;
852{
853	int i;
854	vm_object_t upobj;
855	vm_page_t m;
856	struct user *up;
857	unsigned *ptek;
858
859	/*
860	 * allocate object for the upages
861	 */
862	upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
863	p->p_upages_obj = upobj;
864
865	/* get a kernel virtual address for the UPAGES for this proc */
866	up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE);
867	if (up == NULL)
868		panic("pmap_new_proc: u_map allocation failed");
869
870	ptek = (unsigned *) vtopte((vm_offset_t) up);
871
872	for(i=0;i<UPAGES;i++) {
873		/*
874		 * Get a kernel stack page
875		 */
876		while ((m = vm_page_alloc(upobj,
877			i, VM_ALLOC_NORMAL)) == NULL) {
878			VM_WAIT;
879		}
880
881		/*
882		 * Wire the page
883		 */
884		m->wire_count++;
885		++cnt.v_wire_count;
886
887		/*
888		 * Enter the page into the kernel address space.
889		 */
890		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
891
892		m->flags &= ~(PG_ZERO|PG_BUSY);
893		m->flags |= PG_MAPPED|PG_WRITEABLE;
894		m->valid = VM_PAGE_BITS_ALL;
895	}
896
897	p->p_addr = up;
898}
899
900/*
901 * Dispose the UPAGES for a process that has exited.
902 * This routine directly impacts the exit perf of a process.
903 */
904void
905pmap_dispose_proc(p)
906	struct proc *p;
907{
908	int i;
909	vm_object_t upobj;
910	vm_page_t m;
911	unsigned *ptek;
912
913	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
914
915	upobj = p->p_upages_obj;
916
917	for(i=0;i<UPAGES;i++) {
918		unsigned oldpte;
919		if ((m = vm_page_lookup(upobj, i)) == NULL)
920			panic("pmap_dispose_proc: upage already missing???");
921		oldpte = *(ptek + i);
922		*(ptek + i) = 0;
923		if (oldpte & PG_G)
924			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
925		vm_page_unwire(m);
926		vm_page_free(m);
927	}
928
929	vm_object_deallocate(upobj);
930
931	kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
932}
933
934/*
935 * Allow the UPAGES for a process to be prejudicially paged out.
936 */
937void
938pmap_swapout_proc(p)
939	struct proc *p;
940{
941	int i;
942	vm_object_t upobj;
943	vm_page_t m;
944
945	upobj = p->p_upages_obj;
946	/*
947	 * let the upages be paged
948	 */
949	for(i=0;i<UPAGES;i++) {
950		if ((m = vm_page_lookup(upobj, i)) == NULL)
951			panic("pmap_swapout_proc: upage already missing???");
952		m->dirty = VM_PAGE_BITS_ALL;
953		vm_page_unwire(m);
954		vm_page_deactivate(m);
955		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
956	}
957}
958
959/*
960 * Bring the UPAGES for a specified process back in.
961 */
962void
963pmap_swapin_proc(p)
964	struct proc *p;
965{
966	int i;
967	vm_object_t upobj;
968	vm_page_t m;
969	unsigned *pte;
970
971	upobj = p->p_upages_obj;
972	for(i=0;i<UPAGES;i++) {
973		int s;
974		s = splvm();
975retry:
976		if ((m = vm_page_lookup(upobj, i)) == NULL) {
977			if ((m = vm_page_alloc(upobj, i, VM_ALLOC_NORMAL)) == NULL) {
978				VM_WAIT;
979				goto retry;
980			}
981		} else {
982			if ((m->flags & PG_BUSY) || m->busy) {
983				m->flags |= PG_WANTED;
984				tsleep(m, PVM, "swinuw",0);
985				goto retry;
986			}
987			m->flags |= PG_BUSY;
988		}
989		vm_page_wire(m);
990		splx(s);
991
992		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
993			VM_PAGE_TO_PHYS(m));
994
995		if (m->valid != VM_PAGE_BITS_ALL) {
996			int rv;
997			rv = vm_pager_get_pages(upobj, &m, 1, 0);
998			if (rv != VM_PAGER_OK)
999				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1000			m->valid = VM_PAGE_BITS_ALL;
1001		}
1002		PAGE_WAKEUP(m);
1003		m->flags |= PG_MAPPED|PG_WRITEABLE;
1004	}
1005}
1006
1007/***************************************************
1008 * Page table page management routines.....
1009 ***************************************************/
1010
1011/*
1012 * This routine unholds page table pages, and if the hold count
1013 * drops to zero, then it decrements the wire count.
1014 */
1015static int
1016_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1017	int s;
1018
1019	if (m->flags & PG_BUSY) {
1020		s = splvm();
1021		while (m->flags & PG_BUSY) {
1022			m->flags |= PG_WANTED;
1023			tsleep(m, PVM, "pmuwpt", 0);
1024		}
1025		splx(s);
1026	}
1027
1028	if (m->hold_count == 0) {
1029		vm_offset_t pteva;
1030		/*
1031		 * unmap the page table page
1032		 */
1033		pmap->pm_pdir[m->pindex] = 0;
1034		--pmap->pm_stats.resident_count;
1035		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1036			(((unsigned) PTDpde) & PG_FRAME)) {
1037			/*
1038			 * Do a invltlb to make the invalidated mapping
1039			 * take effect immediately.
1040			 */
1041			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1042			invltlb_1pg(pteva);
1043		}
1044
1045#if defined(PTPHINT)
1046		if (pmap->pm_ptphint == m)
1047			pmap->pm_ptphint = NULL;
1048#endif
1049
1050		/*
1051		 * If the page is finally unwired, simply free it.
1052		 */
1053		--m->wire_count;
1054		if (m->wire_count == 0) {
1055
1056			if (m->flags & PG_WANTED) {
1057				m->flags &= ~PG_WANTED;
1058				wakeup(m);
1059			}
1060
1061			vm_page_free_zero(m);
1062			--cnt.v_wire_count;
1063		}
1064		return 1;
1065	}
1066	return 0;
1067}
1068
1069__inline static int
1070pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1071	vm_page_unhold(m);
1072	if (m->hold_count == 0)
1073		return _pmap_unwire_pte_hold(pmap, m);
1074	else
1075		return 0;
1076}
1077
1078/*
1079 * After removing a page table entry, this routine is used to
1080 * conditionally free the page, and manage the hold/wire counts.
1081 */
1082static int
1083pmap_unuse_pt(pmap, va, mpte)
1084	pmap_t pmap;
1085	vm_offset_t va;
1086	vm_page_t mpte;
1087{
1088	unsigned ptepindex;
1089	if (va >= UPT_MIN_ADDRESS)
1090		return 0;
1091
1092	if (mpte == NULL) {
1093		ptepindex = (va >> PDRSHIFT);
1094#if defined(PTPHINT)
1095		if (pmap->pm_ptphint &&
1096			(pmap->pm_ptphint->pindex == ptepindex)) {
1097			mpte = pmap->pm_ptphint;
1098		} else {
1099			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1100			pmap->pm_ptphint = mpte;
1101		}
1102#else
1103		mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1104#endif
1105	}
1106
1107	return pmap_unwire_pte_hold(pmap, mpte);
1108}
1109
1110/*
1111 * Initialize a preallocated and zeroed pmap structure,
1112 * such as one in a vmspace structure.
1113 */
1114void
1115pmap_pinit(pmap)
1116	register struct pmap *pmap;
1117{
1118	vm_page_t ptdpg;
1119	/*
1120	 * No need to allocate page table space yet but we do need a valid
1121	 * page directory table.
1122	 */
1123
1124	if (pdstackptr > 0) {
1125		--pdstackptr;
1126		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
1127	} else {
1128		pmap->pm_pdir =
1129			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1130	}
1131
1132	/*
1133	 * allocate object for the ptes
1134	 */
1135	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1136
1137	/*
1138	 * allocate the page directory page
1139	 */
1140retry:
1141	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
1142	if (ptdpg == NULL)
1143		goto retry;
1144
1145	ptdpg->wire_count = 1;
1146	++cnt.v_wire_count;
1147
1148	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
1149	ptdpg->valid = VM_PAGE_BITS_ALL;
1150
1151	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1152	if ((ptdpg->flags & PG_ZERO) == 0)
1153		bzero(pmap->pm_pdir, PAGE_SIZE);
1154
1155	/* wire in kernel global address entries */
1156	/* XXX copies current process, does not fill in MPPTDI */
1157	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1158
1159	/* install self-referential address mapping entry */
1160	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1161		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
1162
1163	pmap->pm_flags = 0;
1164	pmap->pm_count = 1;
1165	pmap->pm_ptphint = NULL;
1166	TAILQ_INIT(&pmap->pm_pvlist);
1167}
1168
1169static int
1170pmap_release_free_page(pmap, p)
1171	struct pmap *pmap;
1172	vm_page_t p;
1173{
1174	int s;
1175	unsigned *pde = (unsigned *) pmap->pm_pdir;
1176	/*
1177	 * This code optimizes the case of freeing non-busy
1178	 * page-table pages.  Those pages are zero now, and
1179	 * might as well be placed directly into the zero queue.
1180	 */
1181	s = splvm();
1182	if (p->flags & PG_BUSY) {
1183		p->flags |= PG_WANTED;
1184		tsleep(p, PVM, "pmaprl", 0);
1185		splx(s);
1186		return 0;
1187	}
1188
1189	if (p->flags & PG_WANTED) {
1190		p->flags &= ~PG_WANTED;
1191		wakeup(p);
1192	}
1193
1194	/*
1195	 * Remove the page table page from the processes address space.
1196	 */
1197	pde[p->pindex] = 0;
1198	--pmap->pm_stats.resident_count;
1199
1200	if (p->hold_count)  {
1201		panic("pmap_release: freeing held page table page");
1202	}
1203	/*
1204	 * Page directory pages need to have the kernel
1205	 * stuff cleared, so they can go into the zero queue also.
1206	 */
1207	if (p->pindex == PTDPTDI) {
1208		bzero(pde + KPTDI, nkpt * PTESIZE);
1209#ifdef SMP
1210		pde[MPPTDI] = 0;
1211#endif
1212		pde[APTDPTDI] = 0;
1213		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1214	}
1215
1216#if defined(PTPHINT)
1217	if (pmap->pm_ptphint &&
1218		(pmap->pm_ptphint->pindex == p->pindex))
1219		pmap->pm_ptphint = NULL;
1220#endif
1221
1222	vm_page_free_zero(p);
1223	splx(s);
1224	return 1;
1225}
1226
1227/*
1228 * this routine is called if the page table page is not
1229 * mapped correctly.
1230 */
1231static vm_page_t
1232_pmap_allocpte(pmap, ptepindex)
1233	pmap_t	pmap;
1234	unsigned ptepindex;
1235{
1236	vm_offset_t pteva, ptepa;
1237	vm_page_t m;
1238	int needszero = 0;
1239
1240	/*
1241	 * Find or fabricate a new pagetable page
1242	 */
1243retry:
1244	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
1245	if (m == NULL) {
1246		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
1247		if (m == NULL)
1248			goto retry;
1249		if ((m->flags & PG_ZERO) == 0)
1250			needszero = 1;
1251		m->flags &= ~(PG_ZERO|PG_BUSY);
1252		m->valid = VM_PAGE_BITS_ALL;
1253	} else {
1254		if ((m->flags & PG_BUSY) || m->busy) {
1255			m->flags |= PG_WANTED;
1256			tsleep(m, PVM, "ptewai", 0);
1257			goto retry;
1258		}
1259	}
1260
1261	if (m->queue != PQ_NONE) {
1262		int s = splvm();
1263		vm_page_unqueue(m);
1264		splx(s);
1265	}
1266
1267	if (m->wire_count == 0)
1268		++cnt.v_wire_count;
1269	++m->wire_count;
1270
1271	/*
1272	 * Increment the hold count for the page table page
1273	 * (denoting a new mapping.)
1274	 */
1275	++m->hold_count;
1276
1277	/*
1278	 * Map the pagetable page into the process address space, if
1279	 * it isn't already there.
1280	 */
1281
1282	pmap->pm_stats.resident_count++;
1283
1284	ptepa = VM_PAGE_TO_PHYS(m);
1285	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
1286
1287#if defined(PTPHINT)
1288	/*
1289	 * Set the page table hint
1290	 */
1291	pmap->pm_ptphint = m;
1292#endif
1293
1294	/*
1295	 * Try to use the new mapping, but if we cannot, then
1296	 * do it with the routine that maps the page explicitly.
1297	 */
1298	if (needszero) {
1299		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1300			(((unsigned) PTDpde) & PG_FRAME)) {
1301			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1302			bzero((caddr_t) pteva, PAGE_SIZE);
1303		} else {
1304			pmap_zero_page(ptepa);
1305		}
1306	}
1307
1308	m->valid = VM_PAGE_BITS_ALL;
1309	m->flags |= PG_MAPPED;
1310
1311	return m;
1312}
1313
1314static vm_page_t
1315pmap_allocpte(pmap, va)
1316	pmap_t	pmap;
1317	vm_offset_t va;
1318{
1319	unsigned ptepindex;
1320	vm_offset_t ptepa;
1321	vm_page_t m;
1322
1323	/*
1324	 * Calculate pagetable page index
1325	 */
1326	ptepindex = va >> PDRSHIFT;
1327
1328	/*
1329	 * Get the page directory entry
1330	 */
1331	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1332
1333	/*
1334	 * This supports switching from a 4MB page to a
1335	 * normal 4K page.
1336	 */
1337	if (ptepa & PG_PS) {
1338		pmap->pm_pdir[ptepindex] = 0;
1339		ptepa = 0;
1340		invltlb();
1341	}
1342
1343	/*
1344	 * If the page table page is mapped, we just increment the
1345	 * hold count, and activate it.
1346	 */
1347	if (ptepa) {
1348#if defined(PTPHINT)
1349		/*
1350		 * In order to get the page table page, try the
1351		 * hint first.
1352		 */
1353		if (pmap->pm_ptphint &&
1354			(pmap->pm_ptphint->pindex == ptepindex)) {
1355			m = pmap->pm_ptphint;
1356		} else {
1357			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1358			pmap->pm_ptphint = m;
1359		}
1360#else
1361		m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1362#endif
1363		++m->hold_count;
1364		return m;
1365	}
1366	/*
1367	 * Here if the pte page isn't mapped, or if it has been deallocated.
1368	 */
1369	return _pmap_allocpte(pmap, ptepindex);
1370}
1371
1372
1373/***************************************************
1374* Pmap allocation/deallocation routines.
1375 ***************************************************/
1376
1377/*
1378 * Release any resources held by the given physical map.
1379 * Called when a pmap initialized by pmap_pinit is being released.
1380 * Should only be called if the map contains no valid mappings.
1381 */
1382void
1383pmap_release(pmap)
1384	register struct pmap *pmap;
1385{
1386	vm_page_t p,n,ptdpg;
1387	vm_object_t object = pmap->pm_pteobj;
1388
1389#if defined(DIAGNOSTIC)
1390	if (object->ref_count != 1)
1391		panic("pmap_release: pteobj reference count != 1");
1392#endif
1393
1394	ptdpg = NULL;
1395retry:
1396	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1397		n = TAILQ_NEXT(p, listq);
1398		if (p->pindex == PTDPTDI) {
1399			ptdpg = p;
1400			continue;
1401		}
1402		if (!pmap_release_free_page(pmap, p))
1403			goto retry;
1404	}
1405
1406	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1407		goto retry;
1408
1409	vm_object_deallocate(object);
1410	if (pdstackptr < PDSTACKMAX) {
1411		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
1412		++pdstackptr;
1413	} else {
1414		int pdstmp = pdstackptr - 1;
1415		kmem_free(kernel_map, pdstack[pdstmp], PAGE_SIZE);
1416		pdstack[pdstmp] = (vm_offset_t) pmap->pm_pdir;
1417	}
1418	pmap->pm_pdir = 0;
1419}
1420
1421/*
1422 * grow the number of kernel page table entries, if needed
1423 */
1424void
1425pmap_growkernel(vm_offset_t addr)
1426{
1427	struct proc *p;
1428	struct pmap *pmap;
1429	int s;
1430#ifdef SMP
1431	int i;
1432#endif
1433
1434	s = splhigh();
1435	if (kernel_vm_end == 0) {
1436		kernel_vm_end = KERNBASE;
1437		nkpt = 0;
1438		while (pdir_pde(PTD, kernel_vm_end)) {
1439			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1440			++nkpt;
1441		}
1442	}
1443	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1444	while (kernel_vm_end < addr) {
1445		if (pdir_pde(PTD, kernel_vm_end)) {
1446			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1447			continue;
1448		}
1449		++nkpt;
1450		if (!nkpg) {
1451			vm_offset_t ptpkva = (vm_offset_t) vtopte(addr);
1452			/*
1453			 * This index is bogus, but out of the way
1454			 */
1455			vm_pindex_t ptpidx = (ptpkva >> PAGE_SHIFT);
1456			nkpg = vm_page_alloc(kernel_object,
1457				ptpidx, VM_ALLOC_SYSTEM);
1458			if (!nkpg)
1459				panic("pmap_growkernel: no memory to grow kernel");
1460			vm_page_wire(nkpg);
1461			vm_page_remove(nkpg);
1462			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
1463		}
1464		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag);
1465
1466#ifdef SMP
1467		for (i = 0; i < mp_naps; i++) {
1468			if (IdlePTDS[i])
1469				pdir_pde(IdlePTDS[i], kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW | pgeflag);
1470		}
1471#endif
1472
1473		nkpg = NULL;
1474
1475		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1476			if (p->p_vmspace) {
1477				pmap = &p->p_vmspace->vm_pmap;
1478				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
1479			}
1480		}
1481		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
1482		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1483	}
1484	splx(s);
1485}
1486
1487/*
1488 *	Retire the given physical map from service.
1489 *	Should only be called if the map contains
1490 *	no valid mappings.
1491 */
1492void
1493pmap_destroy(pmap)
1494	register pmap_t pmap;
1495{
1496	int count;
1497
1498	if (pmap == NULL)
1499		return;
1500
1501	count = --pmap->pm_count;
1502	if (count == 0) {
1503		pmap_release(pmap);
1504		free((caddr_t) pmap, M_VMPMAP);
1505	}
1506}
1507
1508/*
1509 *	Add a reference to the specified pmap.
1510 */
1511void
1512pmap_reference(pmap)
1513	pmap_t pmap;
1514{
1515	if (pmap != NULL) {
1516		pmap->pm_count++;
1517	}
1518}
1519
1520/***************************************************
1521* page management routines.
1522 ***************************************************/
1523
1524/*
1525 * free the pv_entry back to the free list
1526 */
1527static inline void
1528free_pv_entry(pv)
1529	pv_entry_t pv;
1530{
1531	zfreei(pvzone, pv);
1532}
1533
1534/*
1535 * get a new pv_entry, allocating a block from the system
1536 * when needed.
1537 * the memory allocation is performed bypassing the malloc code
1538 * because of the possibility of allocations at interrupt time.
1539 */
1540static inline pv_entry_t
1541get_pv_entry(void)
1542{
1543	return zalloci(pvzone);
1544}
1545
1546/*
1547 * If it is the first entry on the list, it is actually
1548 * in the header and we must copy the following entry up
1549 * to the header.  Otherwise we must search the list for
1550 * the entry.  In either case we free the now unused entry.
1551 */
1552
1553static int
1554pmap_remove_entry(pmap, ppv, va)
1555	struct pmap *pmap;
1556	pv_table_t *ppv;
1557	vm_offset_t va;
1558{
1559	pv_entry_t pv;
1560	int rtval;
1561	int s;
1562
1563	s = splvm();
1564	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1565		for (pv = TAILQ_FIRST(&ppv->pv_list);
1566			pv;
1567			pv = TAILQ_NEXT(pv, pv_list)) {
1568			if (pmap == pv->pv_pmap && va == pv->pv_va)
1569				break;
1570		}
1571	} else {
1572		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1573			pv;
1574			pv = TAILQ_NEXT(pv, pv_plist)) {
1575			if (va == pv->pv_va)
1576				break;
1577		}
1578	}
1579
1580	rtval = 0;
1581	if (pv) {
1582		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1583		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1584		--ppv->pv_list_count;
1585		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
1586			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1587		}
1588
1589		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1590		free_pv_entry(pv);
1591	}
1592
1593	splx(s);
1594	return rtval;
1595}
1596
1597/*
1598 * Create a pv entry for page at pa for
1599 * (pmap, va).
1600 */
1601static void
1602pmap_insert_entry(pmap, va, mpte, pa)
1603	pmap_t pmap;
1604	vm_offset_t va;
1605	vm_page_t mpte;
1606	vm_offset_t pa;
1607{
1608
1609	int s;
1610	pv_entry_t pv;
1611	pv_table_t *ppv;
1612
1613	s = splvm();
1614	pv = get_pv_entry();
1615	pv->pv_va = va;
1616	pv->pv_pmap = pmap;
1617	pv->pv_ptem = mpte;
1618
1619	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1620
1621	ppv = pa_to_pvh(pa);
1622	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1623	++ppv->pv_list_count;
1624
1625	splx(s);
1626}
1627
1628/*
1629 * pmap_remove_pte: do the things to unmap a page in a process
1630 */
1631static int
1632pmap_remove_pte(pmap, ptq, va)
1633	struct pmap *pmap;
1634	unsigned *ptq;
1635	vm_offset_t va;
1636{
1637	unsigned oldpte;
1638	pv_table_t *ppv;
1639
1640	oldpte = *ptq;
1641	*ptq = 0;
1642	if (oldpte & PG_W)
1643		pmap->pm_stats.wired_count -= 1;
1644	/*
1645	 * Machines that don't support invlpg, also don't support
1646	 * PG_G.
1647	 */
1648	if (oldpte & PG_G)
1649		invlpg(va);
1650	pmap->pm_stats.resident_count -= 1;
1651	if (oldpte & PG_MANAGED) {
1652		ppv = pa_to_pvh(oldpte);
1653		if (oldpte & PG_M) {
1654#if defined(PMAP_DIAGNOSTIC)
1655			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1656				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
1657			}
1658#endif
1659			if (pmap_track_modified(va))
1660				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1661		}
1662		return pmap_remove_entry(pmap, ppv, va);
1663	} else {
1664		return pmap_unuse_pt(pmap, va, NULL);
1665	}
1666
1667	return 0;
1668}
1669
1670/*
1671 * Remove a single page from a process address space
1672 */
1673static void
1674pmap_remove_page(pmap, va)
1675	struct pmap *pmap;
1676	register vm_offset_t va;
1677{
1678	register unsigned *ptq;
1679
1680	/*
1681	 * if there is no pte for this address, just skip it!!!
1682	 */
1683	if (*pmap_pde(pmap, va) == 0) {
1684		return;
1685	}
1686
1687	/*
1688	 * get a local va for mappings for this pmap.
1689	 */
1690	ptq = get_ptbase(pmap) + i386_btop(va);
1691	if (*ptq) {
1692		(void) pmap_remove_pte(pmap, ptq, va);
1693		invltlb_1pg(va);
1694	}
1695	return;
1696}
1697
1698/*
1699 *	Remove the given range of addresses from the specified map.
1700 *
1701 *	It is assumed that the start and end are properly
1702 *	rounded to the page size.
1703 */
1704void
1705pmap_remove(pmap, sva, eva)
1706	struct pmap *pmap;
1707	register vm_offset_t sva;
1708	register vm_offset_t eva;
1709{
1710	register unsigned *ptbase;
1711	vm_offset_t pdnxt;
1712	vm_offset_t ptpaddr;
1713	vm_offset_t sindex, eindex;
1714	int anyvalid;
1715
1716	if (pmap == NULL)
1717		return;
1718
1719	if (pmap->pm_stats.resident_count == 0)
1720		return;
1721
1722	/*
1723	 * special handling of removing one page.  a very
1724	 * common operation and easy to short circuit some
1725	 * code.
1726	 */
1727	if (((sva + PAGE_SIZE) == eva) &&
1728		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1729		pmap_remove_page(pmap, sva);
1730		return;
1731	}
1732
1733	anyvalid = 0;
1734
1735	/*
1736	 * Get a local virtual address for the mappings that are being
1737	 * worked with.
1738	 */
1739	ptbase = get_ptbase(pmap);
1740
1741	sindex = i386_btop(sva);
1742	eindex = i386_btop(eva);
1743
1744	for (; sindex < eindex; sindex = pdnxt) {
1745		unsigned pdirindex;
1746
1747		/*
1748		 * Calculate index for next page table.
1749		 */
1750		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1751		if (pmap->pm_stats.resident_count == 0)
1752			break;
1753
1754		pdirindex = sindex / NPDEPG;
1755		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1756			pmap->pm_pdir[pdirindex] = 0;
1757			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1758			anyvalid++;
1759			continue;
1760		}
1761
1762		/*
1763		 * Weed out invalid mappings. Note: we assume that the page
1764		 * directory table is always allocated, and in kernel virtual.
1765		 */
1766		if (ptpaddr == 0)
1767			continue;
1768
1769		/*
1770		 * Limit our scan to either the end of the va represented
1771		 * by the current page table page, or to the end of the
1772		 * range being removed.
1773		 */
1774		if (pdnxt > eindex) {
1775			pdnxt = eindex;
1776		}
1777
1778		for ( ;sindex != pdnxt; sindex++) {
1779			vm_offset_t va;
1780			if (ptbase[sindex] == 0) {
1781				continue;
1782			}
1783			va = i386_ptob(sindex);
1784
1785			anyvalid++;
1786			if (pmap_remove_pte(pmap,
1787				ptbase + sindex, va))
1788				break;
1789		}
1790	}
1791
1792	if (anyvalid) {
1793		invltlb();
1794	}
1795}
1796
1797/*
1798 *	Routine:	pmap_remove_all
1799 *	Function:
1800 *		Removes this physical page from
1801 *		all physical maps in which it resides.
1802 *		Reflects back modify bits to the pager.
1803 *
1804 *	Notes:
1805 *		Original versions of this routine were very
1806 *		inefficient because they iteratively called
1807 *		pmap_remove (slow...)
1808 */
1809
1810static void
1811pmap_remove_all(pa)
1812	vm_offset_t pa;
1813{
1814	register pv_entry_t pv;
1815	pv_table_t *ppv;
1816	register unsigned *pte, tpte;
1817	int nmodify;
1818	int update_needed;
1819	int s;
1820
1821	nmodify = 0;
1822	update_needed = 0;
1823#if defined(PMAP_DIAGNOSTIC)
1824	/*
1825	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1826	 * pages!
1827	 */
1828	if (!pmap_is_managed(pa)) {
1829		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1830	}
1831#endif
1832
1833	s = splvm();
1834	ppv = pa_to_pvh(pa);
1835	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1836		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1837
1838		pv->pv_pmap->pm_stats.resident_count--;
1839
1840		tpte = *pte;
1841		*pte = 0;
1842		if (tpte & PG_W)
1843			pv->pv_pmap->pm_stats.wired_count--;
1844		/*
1845		 * Update the vm_page_t clean and reference bits.
1846		 */
1847		if (tpte & PG_M) {
1848#if defined(PMAP_DIAGNOSTIC)
1849			if (pmap_nw_modified((pt_entry_t) tpte)) {
1850				printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
1851			}
1852#endif
1853			if (pmap_track_modified(pv->pv_va))
1854				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1855		}
1856		if (!update_needed &&
1857			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1858			(pv->pv_pmap == kernel_pmap))) {
1859			update_needed = 1;
1860		}
1861
1862		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1863		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1864		--ppv->pv_list_count;
1865		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1866		free_pv_entry(pv);
1867	}
1868	ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
1869
1870
1871	if (update_needed)
1872		invltlb();
1873	splx(s);
1874	return;
1875}
1876
1877/*
1878 *	Set the physical protection on the
1879 *	specified range of this map as requested.
1880 */
1881void
1882pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1883{
1884	register unsigned *ptbase;
1885	vm_offset_t pdnxt;
1886	vm_offset_t ptpaddr;
1887	vm_offset_t sindex, eindex;
1888	int anychanged;
1889
1890
1891	if (pmap == NULL)
1892		return;
1893
1894	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1895		pmap_remove(pmap, sva, eva);
1896		return;
1897	}
1898
1899	anychanged = 0;
1900
1901	ptbase = get_ptbase(pmap);
1902
1903	sindex = i386_btop(sva);
1904	eindex = i386_btop(eva);
1905
1906	for (; sindex < eindex; sindex = pdnxt) {
1907
1908		unsigned pdirindex;
1909
1910		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1911
1912		pdirindex = sindex / NPDEPG;
1913		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1914			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1915			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1916			anychanged++;
1917			continue;
1918		}
1919
1920		/*
1921		 * Weed out invalid mappings. Note: we assume that the page
1922		 * directory table is always allocated, and in kernel virtual.
1923		 */
1924		if (ptpaddr == 0)
1925			continue;
1926
1927		if (pdnxt > eindex) {
1928			pdnxt = eindex;
1929		}
1930
1931		for (; sindex != pdnxt; sindex++) {
1932
1933			unsigned pbits = ptbase[sindex];
1934
1935			if (prot & VM_PROT_WRITE) {
1936				if ((pbits & (PG_RW|PG_V)) == PG_V) {
1937					if (pbits & PG_MANAGED) {
1938						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
1939						m->flags |= PG_WRITEABLE;
1940						m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
1941					}
1942					ptbase[sindex] = pbits | PG_RW;
1943					anychanged = 1;
1944				}
1945			} else if (pbits & PG_RW) {
1946				if (pbits & PG_M) {
1947					vm_offset_t sva = i386_ptob(sindex);
1948					if ((pbits & PG_MANAGED) && pmap_track_modified(sva)) {
1949						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
1950						m->dirty = VM_PAGE_BITS_ALL;
1951					}
1952				}
1953				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
1954				anychanged = 1;
1955			}
1956		}
1957	}
1958	if (anychanged)
1959		invltlb();
1960}
1961
1962/*
1963 *	Insert the given physical page (p) at
1964 *	the specified virtual address (v) in the
1965 *	target physical map with the protection requested.
1966 *
1967 *	If specified, the page will be wired down, meaning
1968 *	that the related pte can not be reclaimed.
1969 *
1970 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1971 *	or lose information.  That is, this routine must actually
1972 *	insert this page into the given map NOW.
1973 */
1974void
1975pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
1976	   boolean_t wired)
1977{
1978	register unsigned *pte;
1979	vm_offset_t opa;
1980	vm_offset_t origpte, newpte;
1981	vm_page_t mpte;
1982
1983	if (pmap == NULL)
1984		return;
1985
1986	va &= PG_FRAME;
1987#ifdef PMAP_DIAGNOSTIC
1988	if (va > VM_MAX_KERNEL_ADDRESS)
1989		panic("pmap_enter: toobig");
1990	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
1991		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
1992#endif
1993
1994	mpte = NULL;
1995	/*
1996	 * In the case that a page table page is not
1997	 * resident, we are creating it here.
1998	 */
1999	if (va < UPT_MIN_ADDRESS)
2000		mpte = pmap_allocpte(pmap, va);
2001
2002	pte = pmap_pte(pmap, va);
2003	/*
2004	 * Page Directory table entry not valid, we need a new PT page
2005	 */
2006	if (pte == NULL) {
2007		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
2008			pmap->pm_pdir[PTDPTDI], va);
2009	}
2010
2011	origpte = *(vm_offset_t *)pte;
2012	pa &= PG_FRAME;
2013	opa = origpte & PG_FRAME;
2014	if (origpte & PG_PS)
2015		panic("pmap_enter: attempted pmap_enter on 4MB page");
2016
2017	/*
2018	 * Mapping has not changed, must be protection or wiring change.
2019	 */
2020	if (origpte && (opa == pa)) {
2021		/*
2022		 * Wiring change, just update stats. We don't worry about
2023		 * wiring PT pages as they remain resident as long as there
2024		 * are valid mappings in them. Hence, if a user page is wired,
2025		 * the PT page will be also.
2026		 */
2027		if (wired && ((origpte & PG_W) == 0))
2028			pmap->pm_stats.wired_count++;
2029		else if (!wired && (origpte & PG_W))
2030			pmap->pm_stats.wired_count--;
2031
2032#if defined(PMAP_DIAGNOSTIC)
2033		if (pmap_nw_modified((pt_entry_t) origpte)) {
2034			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
2035		}
2036#endif
2037
2038		/*
2039		 * We might be turning off write access to the page,
2040		 * so we go ahead and sense modify status.
2041		 */
2042		if (origpte & PG_MANAGED) {
2043			vm_page_t m;
2044			if (origpte & PG_M) {
2045				if (pmap_track_modified(va)) {
2046					m = PHYS_TO_VM_PAGE(pa);
2047					m->dirty = VM_PAGE_BITS_ALL;
2048				}
2049			}
2050			pa |= PG_MANAGED;
2051		}
2052
2053		if (mpte)
2054			--mpte->hold_count;
2055
2056		goto validate;
2057	}
2058	/*
2059	 * Mapping has changed, invalidate old range and fall through to
2060	 * handle validating new mapping.
2061	 */
2062	if (opa) {
2063		int err;
2064		err = pmap_remove_pte(pmap, pte, va);
2065		if (err)
2066			panic("pmap_enter: pte vanished, va: 0x%x", va);
2067	}
2068
2069	/*
2070	 * Enter on the PV list if part of our managed memory Note that we
2071	 * raise IPL while manipulating pv_table since pmap_enter can be
2072	 * called at interrupt time.
2073	 */
2074	if (pmap_is_managed(pa)) {
2075		pmap_insert_entry(pmap, va, mpte, pa);
2076		pa |= PG_MANAGED;
2077	}
2078
2079	/*
2080	 * Increment counters
2081	 */
2082	pmap->pm_stats.resident_count++;
2083	if (wired)
2084		pmap->pm_stats.wired_count++;
2085
2086validate:
2087	/*
2088	 * Now validate mapping with desired protection/wiring.
2089	 */
2090	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2091
2092	if (wired)
2093		newpte |= PG_W;
2094	if (va < UPT_MIN_ADDRESS)
2095		newpte |= PG_U;
2096	if (pmap == kernel_pmap)
2097		newpte |= pgeflag;
2098
2099	/*
2100	 * if the mapping or permission bits are different, we need
2101	 * to update the pte.
2102	 */
2103	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2104		*pte = newpte;
2105		if (origpte)
2106			invltlb_1pg(va);
2107	}
2108}
2109
2110/*
2111 * this code makes some *MAJOR* assumptions:
2112 * 1. Current pmap & pmap exists.
2113 * 2. Not wired.
2114 * 3. Read access.
2115 * 4. No page table pages.
2116 * 5. Tlbflush is deferred to calling procedure.
2117 * 6. Page IS managed.
2118 * but is *MUCH* faster than pmap_enter...
2119 */
2120
2121static vm_page_t
2122pmap_enter_quick(pmap, va, pa, mpte)
2123	register pmap_t pmap;
2124	vm_offset_t va;
2125	register vm_offset_t pa;
2126	vm_page_t mpte;
2127{
2128	register unsigned *pte;
2129
2130	/*
2131	 * In the case that a page table page is not
2132	 * resident, we are creating it here.
2133	 */
2134	if (va < UPT_MIN_ADDRESS) {
2135		unsigned ptepindex;
2136		vm_offset_t ptepa;
2137
2138		/*
2139		 * Calculate pagetable page index
2140		 */
2141		ptepindex = va >> PDRSHIFT;
2142		if (mpte && (mpte->pindex == ptepindex)) {
2143			++mpte->hold_count;
2144		} else {
2145retry:
2146			/*
2147			 * Get the page directory entry
2148			 */
2149			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2150
2151			/*
2152			 * If the page table page is mapped, we just increment
2153			 * the hold count, and activate it.
2154			 */
2155			if (ptepa) {
2156				if (ptepa & PG_PS)
2157					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2158#if defined(PTPHINT)
2159				if (pmap->pm_ptphint &&
2160					(pmap->pm_ptphint->pindex == ptepindex)) {
2161					mpte = pmap->pm_ptphint;
2162				} else {
2163					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2164					pmap->pm_ptphint = mpte;
2165				}
2166#else
2167				mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2168#endif
2169				if (mpte == NULL)
2170					goto retry;
2171				++mpte->hold_count;
2172			} else {
2173				mpte = _pmap_allocpte(pmap, ptepindex);
2174			}
2175		}
2176	} else {
2177		mpte = NULL;
2178	}
2179
2180	/*
2181	 * This call to vtopte makes the assumption that we are
2182	 * entering the page into the current pmap.  In order to support
2183	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2184	 * But that isn't as quick as vtopte.
2185	 */
2186	pte = (unsigned *)vtopte(va);
2187	if (*pte) {
2188		if (mpte)
2189			pmap_unwire_pte_hold(pmap, mpte);
2190		return 0;
2191	}
2192
2193	/*
2194	 * Enter on the PV list if part of our managed memory Note that we
2195	 * raise IPL while manipulating pv_table since pmap_enter can be
2196	 * called at interrupt time.
2197	 */
2198	pmap_insert_entry(pmap, va, mpte, pa);
2199
2200	/*
2201	 * Increment counters
2202	 */
2203	pmap->pm_stats.resident_count++;
2204
2205	/*
2206	 * Now validate mapping with RO protection
2207	 */
2208	*pte = pa | PG_V | PG_U | PG_MANAGED;
2209
2210	return mpte;
2211}
2212
2213#define MAX_INIT_PT (96)
2214/*
2215 * pmap_object_init_pt preloads the ptes for a given object
2216 * into the specified pmap.  This eliminates the blast of soft
2217 * faults on process startup and immediately after an mmap.
2218 */
2219void
2220pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2221	pmap_t pmap;
2222	vm_offset_t addr;
2223	vm_object_t object;
2224	vm_pindex_t pindex;
2225	vm_size_t size;
2226	int limit;
2227{
2228	vm_offset_t tmpidx;
2229	int psize;
2230	vm_page_t p, mpte;
2231	int objpgs;
2232
2233	if (!pmap)
2234		return;
2235
2236	/*
2237	 * This code maps large physical mmap regions into the
2238	 * processor address space.  Note that some shortcuts
2239	 * are taken, but the code works.
2240	 */
2241	if (pseflag &&
2242		(object->type == OBJT_DEVICE) &&
2243		((addr & (NBPDR - 1)) == 0) &&
2244		((size & (NBPDR - 1)) == 0) ) {
2245		int i;
2246		int s;
2247		vm_page_t m[1];
2248		unsigned int ptepindex;
2249		int npdes;
2250		vm_offset_t ptepa;
2251
2252		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2253			return;
2254
2255		s = splhigh();
2256retry:
2257		p = vm_page_lookup(object, pindex);
2258		if (p && (p->flags & PG_BUSY)) {
2259			tsleep(p, PVM, "init4p", 0);
2260			goto retry;
2261		}
2262		splx(s);
2263
2264		if (p == NULL) {
2265			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2266			if (p == NULL)
2267				return;
2268			m[0] = p;
2269
2270			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2271				PAGE_WAKEUP(p);
2272				vm_page_free(p);
2273				return;
2274			}
2275
2276			p = vm_page_lookup(object, pindex);
2277			PAGE_WAKEUP(p);
2278		}
2279
2280		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2281		if (ptepa & (NBPDR - 1)) {
2282			return;
2283		}
2284
2285		p->valid = VM_PAGE_BITS_ALL;
2286
2287		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2288		npdes = size >> PDRSHIFT;
2289		for(i=0;i<npdes;i++) {
2290			pmap->pm_pdir[ptepindex] =
2291				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2292			ptepa += NBPDR;
2293			ptepindex += 1;
2294		}
2295		p->flags |= PG_MAPPED;
2296		invltlb();
2297		return;
2298	}
2299
2300	psize = i386_btop(size);
2301
2302	if ((object->type != OBJT_VNODE) ||
2303		(limit && (psize > MAX_INIT_PT) &&
2304			(object->resident_page_count > MAX_INIT_PT))) {
2305		return;
2306	}
2307
2308	if (psize + pindex > object->size)
2309		psize = object->size - pindex;
2310
2311	mpte = NULL;
2312	/*
2313	 * if we are processing a major portion of the object, then scan the
2314	 * entire thing.
2315	 */
2316	if (psize > (object->size >> 2)) {
2317		objpgs = psize;
2318
2319		for (p = TAILQ_FIRST(&object->memq);
2320		    ((objpgs > 0) && (p != NULL));
2321		    p = TAILQ_NEXT(p, listq)) {
2322
2323			tmpidx = p->pindex;
2324			if (tmpidx < pindex) {
2325				continue;
2326			}
2327			tmpidx -= pindex;
2328			if (tmpidx >= psize) {
2329				continue;
2330			}
2331			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2332			    (p->busy == 0) &&
2333			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2334				if ((p->queue - p->pc) == PQ_CACHE)
2335					vm_page_deactivate(p);
2336				p->flags |= PG_BUSY;
2337				mpte = pmap_enter_quick(pmap,
2338					addr + i386_ptob(tmpidx),
2339					VM_PAGE_TO_PHYS(p), mpte);
2340				p->flags |= PG_MAPPED;
2341				PAGE_WAKEUP(p);
2342			}
2343			objpgs -= 1;
2344		}
2345	} else {
2346		/*
2347		 * else lookup the pages one-by-one.
2348		 */
2349		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2350			p = vm_page_lookup(object, tmpidx + pindex);
2351			if (p &&
2352			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2353			    (p->busy == 0) &&
2354			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2355				if ((p->queue - p->pc) == PQ_CACHE)
2356					vm_page_deactivate(p);
2357				p->flags |= PG_BUSY;
2358				mpte = pmap_enter_quick(pmap,
2359					addr + i386_ptob(tmpidx),
2360					VM_PAGE_TO_PHYS(p), mpte);
2361				p->flags |= PG_MAPPED;
2362				PAGE_WAKEUP(p);
2363			}
2364		}
2365	}
2366	return;
2367}
2368
2369/*
2370 * pmap_prefault provides a quick way of clustering
2371 * pagefaults into a processes address space.  It is a "cousin"
2372 * of pmap_object_init_pt, except it runs at page fault time instead
2373 * of mmap time.
2374 */
2375#define PFBAK 2
2376#define PFFOR 2
2377#define PAGEORDER_SIZE (PFBAK+PFFOR)
2378
2379static int pmap_prefault_pageorder[] = {
2380	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
2381};
2382
2383void
2384pmap_prefault(pmap, addra, entry, object)
2385	pmap_t pmap;
2386	vm_offset_t addra;
2387	vm_map_entry_t entry;
2388	vm_object_t object;
2389{
2390	int i;
2391	vm_offset_t starta;
2392	vm_offset_t addr;
2393	vm_pindex_t pindex;
2394	vm_page_t m, mpte;
2395
2396	if (entry->object.vm_object != object)
2397		return;
2398
2399	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2400		return;
2401
2402	starta = addra - PFBAK * PAGE_SIZE;
2403	if (starta < entry->start) {
2404		starta = entry->start;
2405	} else if (starta > addra) {
2406		starta = 0;
2407	}
2408
2409	mpte = NULL;
2410	for (i = 0; i < PAGEORDER_SIZE; i++) {
2411		vm_object_t lobject;
2412		unsigned *pte;
2413
2414		addr = addra + pmap_prefault_pageorder[i];
2415		if (addr < starta || addr >= entry->end)
2416			continue;
2417
2418		if ((*pmap_pde(pmap, addr)) == NULL)
2419			continue;
2420
2421		pte = (unsigned *) vtopte(addr);
2422		if (*pte)
2423			continue;
2424
2425		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2426		lobject = object;
2427		for (m = vm_page_lookup(lobject, pindex);
2428		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2429		    lobject = lobject->backing_object) {
2430			if (lobject->backing_object_offset & PAGE_MASK)
2431				break;
2432			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2433			m = vm_page_lookup(lobject->backing_object, pindex);
2434		}
2435
2436		/*
2437		 * give-up when a page is not in memory
2438		 */
2439		if (m == NULL)
2440			break;
2441
2442		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2443		    (m->busy == 0) &&
2444		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2445
2446			if ((m->queue - m->pc) == PQ_CACHE) {
2447				vm_page_deactivate(m);
2448			}
2449			m->flags |= PG_BUSY;
2450			mpte = pmap_enter_quick(pmap, addr,
2451				VM_PAGE_TO_PHYS(m), mpte);
2452			m->flags |= PG_MAPPED;
2453			PAGE_WAKEUP(m);
2454		}
2455	}
2456}
2457
2458/*
2459 *	Routine:	pmap_change_wiring
2460 *	Function:	Change the wiring attribute for a map/virtual-address
2461 *			pair.
2462 *	In/out conditions:
2463 *			The mapping must already exist in the pmap.
2464 */
2465void
2466pmap_change_wiring(pmap, va, wired)
2467	register pmap_t pmap;
2468	vm_offset_t va;
2469	boolean_t wired;
2470{
2471	register unsigned *pte;
2472
2473	if (pmap == NULL)
2474		return;
2475
2476	pte = pmap_pte(pmap, va);
2477
2478	if (wired && !pmap_pte_w(pte))
2479		pmap->pm_stats.wired_count++;
2480	else if (!wired && pmap_pte_w(pte))
2481		pmap->pm_stats.wired_count--;
2482
2483	/*
2484	 * Wiring is not a hardware characteristic so there is no need to
2485	 * invalidate TLB.
2486	 */
2487	pmap_pte_set_w(pte, wired);
2488}
2489
2490
2491
2492/*
2493 *	Copy the range specified by src_addr/len
2494 *	from the source map to the range dst_addr/len
2495 *	in the destination map.
2496 *
2497 *	This routine is only advisory and need not do anything.
2498 */
2499
2500void
2501pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2502	pmap_t dst_pmap, src_pmap;
2503	vm_offset_t dst_addr;
2504	vm_size_t len;
2505	vm_offset_t src_addr;
2506{
2507	vm_offset_t addr;
2508	vm_offset_t end_addr = src_addr + len;
2509	vm_offset_t pdnxt;
2510	unsigned src_frame, dst_frame;
2511
2512	if (dst_addr != src_addr)
2513		return;
2514
2515	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2516	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2517		return;
2518	}
2519
2520	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2521	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2522		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2523		invltlb();
2524	}
2525
2526	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2527		unsigned *src_pte, *dst_pte;
2528		vm_page_t dstmpte, srcmpte;
2529		vm_offset_t srcptepaddr;
2530		unsigned ptepindex;
2531
2532		if (addr >= UPT_MIN_ADDRESS)
2533			panic("pmap_copy: invalid to pmap_copy page tables\n");
2534
2535		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2536		ptepindex = addr >> PDRSHIFT;
2537
2538		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2539		if (srcptepaddr == 0)
2540			continue;
2541
2542		if (srcptepaddr & PG_PS) {
2543			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2544				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2545				dst_pmap->pm_stats.resident_count += NBPDR;
2546			}
2547			continue;
2548		}
2549
2550		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2551		if ((srcmpte == NULL) ||
2552			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2553			continue;
2554
2555		if (pdnxt > end_addr)
2556			pdnxt = end_addr;
2557
2558		src_pte = (unsigned *) vtopte(addr);
2559		dst_pte = (unsigned *) avtopte(addr);
2560		while (addr < pdnxt) {
2561			unsigned ptetemp;
2562			ptetemp = *src_pte;
2563			/*
2564			 * we only virtual copy managed pages
2565			 */
2566			if ((ptetemp & PG_MANAGED) != 0) {
2567				/*
2568				 * We have to check after allocpte for the
2569				 * pte still being around...  allocpte can
2570				 * block.
2571				 */
2572				dstmpte = pmap_allocpte(dst_pmap, addr);
2573				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2574					/*
2575					 * Clear the modified and
2576					 * accessed (referenced) bits
2577					 * during the copy.
2578					 */
2579					*dst_pte = ptetemp & ~(PG_M|PG_A);
2580					dst_pmap->pm_stats.resident_count++;
2581					pmap_insert_entry(dst_pmap, addr,
2582						dstmpte,
2583						(ptetemp & PG_FRAME));
2584	 			} else {
2585					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2586				}
2587				if (dstmpte->hold_count >= srcmpte->hold_count)
2588					break;
2589			}
2590			addr += PAGE_SIZE;
2591			++src_pte;
2592			++dst_pte;
2593		}
2594	}
2595}
2596
2597/*
2598 *	Routine:	pmap_kernel
2599 *	Function:
2600 *		Returns the physical map handle for the kernel.
2601 */
2602pmap_t
2603pmap_kernel()
2604{
2605	return (kernel_pmap);
2606}
2607
2608/*
2609 *	pmap_zero_page zeros the specified (machine independent)
2610 *	page by mapping the page into virtual memory and using
2611 *	bzero to clear its contents, one machine dependent page
2612 *	at a time.
2613 */
2614void
2615pmap_zero_page(phys)
2616	vm_offset_t phys;
2617{
2618#ifdef SMP
2619	if (*(int *) prv_CMAP3)
2620		panic("pmap_zero_page: prv_CMAP3 busy");
2621
2622	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME);
2623	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2624
2625	bzero(&prv_CPAGE3, PAGE_SIZE);
2626
2627	*(int *) prv_CMAP3 = 0;
2628	invltlb_1pg((vm_offset_t) &prv_CPAGE3);
2629#else
2630	if (*(int *) CMAP2)
2631		panic("pmap_zero_page: CMAP busy");
2632
2633	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
2634	bzero(CADDR2, PAGE_SIZE);
2635	*(int *) CMAP2 = 0;
2636	invltlb_1pg((vm_offset_t) CADDR2);
2637#endif
2638}
2639
2640/*
2641 *	pmap_copy_page copies the specified (machine independent)
2642 *	page by mapping the page into virtual memory and using
2643 *	bcopy to copy the page, one machine dependent page at a
2644 *	time.
2645 */
2646void
2647pmap_copy_page(src, dst)
2648	vm_offset_t src;
2649	vm_offset_t dst;
2650{
2651#ifdef SMP
2652	if (*(int *) prv_CMAP1)
2653		panic("pmap_copy_page: prv_CMAP1 busy");
2654	if (*(int *) prv_CMAP2)
2655		panic("pmap_copy_page: prv_CMAP2 busy");
2656
2657	*(int *) prv_CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2658	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2659
2660	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2661
2662	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2663
2664	*(int *) prv_CMAP1 = 0;
2665	*(int *) prv_CMAP2 = 0;
2666	invltlb_2pg( (vm_offset_t) &prv_CPAGE1, (vm_offset_t) &prv_CPAGE2);
2667#else
2668	if (*(int *) CMAP1 || *(int *) CMAP2)
2669		panic("pmap_copy_page: CMAP busy");
2670
2671	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2672	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2673
2674	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2675
2676	*(int *) CMAP1 = 0;
2677	*(int *) CMAP2 = 0;
2678	invltlb_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
2679#endif
2680}
2681
2682
2683/*
2684 *	Routine:	pmap_pageable
2685 *	Function:
2686 *		Make the specified pages (by pmap, offset)
2687 *		pageable (or not) as requested.
2688 *
2689 *		A page which is not pageable may not take
2690 *		a fault; therefore, its page table entry
2691 *		must remain valid for the duration.
2692 *
2693 *		This routine is merely advisory; pmap_enter
2694 *		will specify that these pages are to be wired
2695 *		down (or not) as appropriate.
2696 */
2697void
2698pmap_pageable(pmap, sva, eva, pageable)
2699	pmap_t pmap;
2700	vm_offset_t sva, eva;
2701	boolean_t pageable;
2702{
2703}
2704
2705/*
2706 * this routine returns true if a physical page resides
2707 * in the given pmap.
2708 */
2709boolean_t
2710pmap_page_exists(pmap, pa)
2711	pmap_t pmap;
2712	vm_offset_t pa;
2713{
2714	register pv_entry_t pv;
2715	pv_table_t *ppv;
2716	int s;
2717
2718	if (!pmap_is_managed(pa))
2719		return FALSE;
2720
2721	s = splvm();
2722
2723	ppv = pa_to_pvh(pa);
2724	/*
2725	 * Not found, check current mappings returning immediately if found.
2726	 */
2727	for (pv = TAILQ_FIRST(&ppv->pv_list);
2728		pv;
2729		pv = TAILQ_NEXT(pv, pv_list)) {
2730		if (pv->pv_pmap == pmap) {
2731			splx(s);
2732			return TRUE;
2733		}
2734	}
2735	splx(s);
2736	return (FALSE);
2737}
2738
2739#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2740/*
2741 * Remove all pages from specified address space
2742 * this aids process exit speeds.  Also, this code
2743 * is special cased for current process only, but
2744 * can have the more generic (and slightly slower)
2745 * mode enabled.  This is much faster than pmap_remove
2746 * in the case of running down an entire address space.
2747 */
2748void
2749pmap_remove_pages(pmap, sva, eva)
2750	pmap_t pmap;
2751	vm_offset_t sva, eva;
2752{
2753	unsigned *pte, tpte;
2754	pv_table_t *ppv;
2755	pv_entry_t pv, npv;
2756	int s;
2757
2758#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2759	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2760		printf("warning: pmap_remove_pages called with non-current pmap\n");
2761		return;
2762	}
2763#endif
2764
2765	s = splvm();
2766	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2767		pv;
2768		pv = npv) {
2769
2770		if (pv->pv_va >= eva || pv->pv_va < sva) {
2771			npv = TAILQ_NEXT(pv, pv_plist);
2772			continue;
2773		}
2774
2775#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2776		pte = (unsigned *)vtopte(pv->pv_va);
2777#else
2778		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2779#endif
2780		tpte = *pte;
2781
2782/*
2783 * We cannot remove wired pages from a process' mapping at this time
2784 */
2785		if (tpte & PG_W) {
2786			npv = TAILQ_NEXT(pv, pv_plist);
2787			continue;
2788		}
2789		*pte = 0;
2790
2791		ppv = pa_to_pvh(tpte);
2792
2793		pv->pv_pmap->pm_stats.resident_count--;
2794
2795		/*
2796		 * Update the vm_page_t clean and reference bits.
2797		 */
2798		if (tpte & PG_M) {
2799			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2800		}
2801
2802
2803		npv = TAILQ_NEXT(pv, pv_plist);
2804		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2805
2806		--ppv->pv_list_count;
2807		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2808		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2809			ppv->pv_vm_page->flags &= ~(PG_MAPPED|PG_WRITEABLE);
2810		}
2811
2812		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2813		free_pv_entry(pv);
2814	}
2815	splx(s);
2816	invltlb();
2817}
2818
2819/*
2820 * pmap_testbit tests bits in pte's
2821 * note that the testbit/changebit routines are inline,
2822 * and a lot of things compile-time evaluate.
2823 */
2824static boolean_t
2825pmap_testbit(pa, bit)
2826	register vm_offset_t pa;
2827	int bit;
2828{
2829	register pv_entry_t pv;
2830	pv_table_t *ppv;
2831	unsigned *pte;
2832	int s;
2833
2834	if (!pmap_is_managed(pa))
2835		return FALSE;
2836
2837	ppv = pa_to_pvh(pa);
2838	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
2839		return FALSE;
2840
2841	s = splvm();
2842
2843	for (pv = TAILQ_FIRST(&ppv->pv_list);
2844		pv;
2845		pv = TAILQ_NEXT(pv, pv_list)) {
2846
2847		/*
2848		 * if the bit being tested is the modified bit, then
2849		 * mark clean_map and ptes as never
2850		 * modified.
2851		 */
2852		if (bit & (PG_A|PG_M)) {
2853			if (!pmap_track_modified(pv->pv_va))
2854				continue;
2855		}
2856
2857#if defined(PMAP_DIAGNOSTIC)
2858		if (!pv->pv_pmap) {
2859			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
2860			continue;
2861		}
2862#endif
2863		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2864		if (*pte & bit) {
2865			splx(s);
2866			return TRUE;
2867		}
2868	}
2869	splx(s);
2870	return (FALSE);
2871}
2872
2873/*
2874 * this routine is used to modify bits in ptes
2875 */
2876static void
2877pmap_changebit(pa, bit, setem)
2878	vm_offset_t pa;
2879	int bit;
2880	boolean_t setem;
2881{
2882	register pv_entry_t pv;
2883	pv_table_t *ppv;
2884	register unsigned *pte;
2885	int changed;
2886	int s;
2887
2888	if (!pmap_is_managed(pa))
2889		return;
2890
2891	s = splvm();
2892	changed = 0;
2893	ppv = pa_to_pvh(pa);
2894
2895	/*
2896	 * Loop over all current mappings setting/clearing as appropos If
2897	 * setting RO do we need to clear the VAC?
2898	 */
2899	for (pv = TAILQ_FIRST(&ppv->pv_list);
2900		pv;
2901		pv = TAILQ_NEXT(pv, pv_list)) {
2902
2903		/*
2904		 * don't write protect pager mappings
2905		 */
2906		if (!setem && (bit == PG_RW)) {
2907			if (!pmap_track_modified(pv->pv_va))
2908				continue;
2909		}
2910
2911#if defined(PMAP_DIAGNOSTIC)
2912		if (!pv->pv_pmap) {
2913			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
2914			continue;
2915		}
2916#endif
2917
2918		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2919
2920		if (setem) {
2921			*(int *)pte |= bit;
2922			changed = 1;
2923		} else {
2924			vm_offset_t pbits = *(vm_offset_t *)pte;
2925			if (pbits & bit) {
2926				changed = 1;
2927				if (bit == PG_RW) {
2928					if (pbits & PG_M) {
2929						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2930					}
2931					*(int *)pte = pbits & ~(PG_M|PG_RW);
2932				} else {
2933					*(int *)pte = pbits & ~bit;
2934				}
2935			}
2936		}
2937	}
2938	splx(s);
2939	if (changed)
2940		invltlb();
2941}
2942
2943/*
2944 *      pmap_page_protect:
2945 *
2946 *      Lower the permission for all mappings to a given page.
2947 */
2948void
2949pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
2950{
2951	if ((prot & VM_PROT_WRITE) == 0) {
2952		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2953			pmap_changebit(phys, PG_RW, FALSE);
2954		} else {
2955			pmap_remove_all(phys);
2956		}
2957	}
2958}
2959
2960vm_offset_t
2961pmap_phys_address(ppn)
2962	int ppn;
2963{
2964	return (i386_ptob(ppn));
2965}
2966
2967/*
2968 *	pmap_ts_referenced:
2969 *
2970 *	Return the count of reference bits for a page, clearing all of them.
2971 *
2972 */
2973int
2974pmap_ts_referenced(vm_offset_t pa)
2975{
2976	register pv_entry_t pv;
2977	pv_table_t *ppv;
2978	unsigned *pte;
2979	int s;
2980	int rtval = 0;
2981
2982	if (!pmap_is_managed(pa))
2983		return FALSE;
2984
2985	s = splvm();
2986
2987	ppv = pa_to_pvh(pa);
2988
2989	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2990		splx(s);
2991		return 0;
2992	}
2993
2994	/*
2995	 * Not found, check current mappings returning immediately if found.
2996	 */
2997	for (pv = TAILQ_FIRST(&ppv->pv_list);
2998		pv;
2999		pv = TAILQ_NEXT(pv, pv_list)) {
3000		/*
3001		 * if the bit being tested is the modified bit, then
3002		 * mark clean_map and ptes as never
3003		 * modified.
3004		 */
3005		if (!pmap_track_modified(pv->pv_va))
3006			continue;
3007
3008		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3009		if (pte == NULL) {
3010			continue;
3011		}
3012		if (*pte & PG_A) {
3013			rtval++;
3014			*pte &= ~PG_A;
3015		}
3016	}
3017	splx(s);
3018	if (rtval) {
3019		invltlb();
3020	}
3021	return (rtval);
3022}
3023
3024/*
3025 *	pmap_is_modified:
3026 *
3027 *	Return whether or not the specified physical page was modified
3028 *	in any physical maps.
3029 */
3030boolean_t
3031pmap_is_modified(vm_offset_t pa)
3032{
3033	return pmap_testbit((pa), PG_M);
3034}
3035
3036/*
3037 *	Clear the modify bits on the specified physical page.
3038 */
3039void
3040pmap_clear_modify(vm_offset_t pa)
3041{
3042	pmap_changebit((pa), PG_M, FALSE);
3043}
3044
3045/*
3046 *	pmap_clear_reference:
3047 *
3048 *	Clear the reference bit on the specified physical page.
3049 */
3050void
3051pmap_clear_reference(vm_offset_t pa)
3052{
3053	pmap_changebit((pa), PG_A, FALSE);
3054}
3055
3056/*
3057 * Miscellaneous support routines follow
3058 */
3059
3060static void
3061i386_protection_init()
3062{
3063	register int *kp, prot;
3064
3065	kp = protection_codes;
3066	for (prot = 0; prot < 8; prot++) {
3067		switch (prot) {
3068		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3069			/*
3070			 * Read access is also 0. There isn't any execute bit,
3071			 * so just make it readable.
3072			 */
3073		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3074		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3075		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3076			*kp++ = 0;
3077			break;
3078		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3079		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3080		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3081		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3082			*kp++ = PG_RW;
3083			break;
3084		}
3085	}
3086}
3087
3088/*
3089 * Map a set of physical memory pages into the kernel virtual
3090 * address space. Return a pointer to where it is mapped. This
3091 * routine is intended to be used for mapping device memory,
3092 * NOT real memory.
3093 */
3094void *
3095pmap_mapdev(pa, size)
3096	vm_offset_t pa;
3097	vm_size_t size;
3098{
3099	vm_offset_t va, tmpva;
3100	unsigned *pte;
3101
3102	size = roundup(size, PAGE_SIZE);
3103
3104	va = kmem_alloc_pageable(kernel_map, size);
3105	if (!va)
3106		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3107
3108	pa = pa & PG_FRAME;
3109	for (tmpva = va; size > 0;) {
3110		pte = (unsigned *)vtopte(tmpva);
3111		*pte = pa | PG_RW | PG_V | pgeflag;
3112		size -= PAGE_SIZE;
3113		tmpva += PAGE_SIZE;
3114		pa += PAGE_SIZE;
3115	}
3116	invltlb();
3117
3118	return ((void *) va);
3119}
3120
3121/*
3122 * perform the pmap work for mincore
3123 */
3124int
3125pmap_mincore(pmap, addr)
3126	pmap_t pmap;
3127	vm_offset_t addr;
3128{
3129
3130	unsigned *ptep, pte;
3131	int val = 0;
3132
3133	ptep = pmap_pte(pmap, addr);
3134	if (ptep == 0) {
3135		return 0;
3136	}
3137
3138	if (pte = *ptep) {
3139		vm_offset_t pa;
3140		val = MINCORE_INCORE;
3141		pa = pte & PG_FRAME;
3142
3143		/*
3144		 * Modified by us
3145		 */
3146		if (pte & PG_M)
3147			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3148		/*
3149		 * Modified by someone
3150		 */
3151		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
3152			pmap_is_modified(pa))
3153			val |= MINCORE_MODIFIED_OTHER;
3154		/*
3155		 * Referenced by us
3156		 */
3157		if (pte & PG_U)
3158			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3159
3160		/*
3161		 * Referenced by someone
3162		 */
3163		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
3164			pmap_ts_referenced(pa)) {
3165			val |= MINCORE_REFERENCED_OTHER;
3166			PHYS_TO_VM_PAGE(pa)->flags |= PG_REFERENCED;
3167		}
3168	}
3169	return val;
3170}
3171
3172void
3173pmap_activate(struct proc *p)
3174{
3175	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3176		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3177}
3178
3179vm_offset_t
3180pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3181
3182	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3183		return addr;
3184	}
3185
3186	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3187	return addr;
3188}
3189
3190
3191#if defined(PMAP_DEBUG)
3192pmap_pid_dump(int pid) {
3193	pmap_t pmap;
3194	struct proc *p;
3195	int npte = 0;
3196	int index;
3197	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3198		if (p->p_pid != pid)
3199			continue;
3200
3201		if (p->p_vmspace) {
3202			int i,j;
3203			index = 0;
3204			pmap = &p->p_vmspace->vm_pmap;
3205			for(i=0;i<1024;i++) {
3206				pd_entry_t *pde;
3207				unsigned *pte;
3208				unsigned base = i << PDRSHIFT;
3209
3210				pde = &pmap->pm_pdir[i];
3211				if (pde && pmap_pde_v(pde)) {
3212					for(j=0;j<1024;j++) {
3213						unsigned va = base + (j << PAGE_SHIFT);
3214						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3215							if (index) {
3216								index = 0;
3217								printf("\n");
3218							}
3219							return npte;
3220						}
3221						pte = pmap_pte_quick( pmap, va);
3222						if (pte && pmap_pte_v(pte)) {
3223							vm_offset_t pa;
3224							vm_page_t m;
3225							pa = *(int *)pte;
3226							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3227							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3228								va, pa, m->hold_count, m->wire_count, m->flags);
3229							npte++;
3230							index++;
3231							if (index >= 2) {
3232								index = 0;
3233								printf("\n");
3234							} else {
3235								printf(" ");
3236							}
3237						}
3238					}
3239				}
3240			}
3241		}
3242	}
3243	return npte;
3244}
3245#endif
3246
3247#if defined(DEBUG)
3248
3249static void	pads __P((pmap_t pm));
3250static void	pmap_pvdump __P((vm_offset_t pa));
3251
3252/* print address space of pmap*/
3253static void
3254pads(pm)
3255	pmap_t pm;
3256{
3257	unsigned va, i, j;
3258	unsigned *ptep;
3259
3260	if (pm == kernel_pmap)
3261		return;
3262	for (i = 0; i < 1024; i++)
3263		if (pm->pm_pdir[i])
3264			for (j = 0; j < 1024; j++) {
3265				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3266				if (pm == kernel_pmap && va < KERNBASE)
3267					continue;
3268				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3269					continue;
3270				ptep = pmap_pte_quick(pm, va);
3271				if (pmap_pte_v(ptep))
3272					printf("%x:%x ", va, *(int *) ptep);
3273			};
3274
3275}
3276
3277static void
3278pmap_pvdump(pa)
3279	vm_offset_t pa;
3280{
3281	pv_table_t *ppv;
3282	register pv_entry_t pv;
3283
3284	printf("pa %x", pa);
3285	ppv = pa_to_pvh(pa);
3286	for (pv = TAILQ_FIRST(&ppv->pv_list);
3287		pv;
3288		pv = TAILQ_NEXT(pv, pv_list)) {
3289#ifdef used_to_be
3290		printf(" -> pmap %x, va %x, flags %x",
3291		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
3292#endif
3293		printf(" -> pmap %x, va %x",
3294		    pv->pv_pmap, pv->pv_va);
3295		pads(pv->pv_pmap);
3296	}
3297	printf(" ");
3298}
3299#endif
3300