pmap.c revision 35933
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.194 1998/05/11 01:06:08 dyson Exp $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_disable_pse.h"
72#include "opt_pmap.h"
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/proc.h>
77#include <sys/msgbuf.h>
78#include <sys/vmmeter.h>
79#include <sys/mman.h>
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_prot.h>
84#include <sys/lock.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/vm_zone.h>
93
94#include <sys/user.h>
95
96#include <machine/cputypes.h>
97#include <machine/md_var.h>
98#include <machine/specialreg.h>
99#if defined(SMP) || defined(APIC_IO)
100#include <machine/smp.h>
101#include <machine/apic.h>
102#endif /* SMP || APIC_IO */
103
104#define PMAP_KEEP_PDIRS
105#ifndef PMAP_SHPGPERPROC
106#define PMAP_SHPGPERPROC 200
107#endif
108
109#if defined(DIAGNOSTIC)
110#define PMAP_DIAGNOSTIC
111#endif
112
113#define MINPV 2048
114
115#if !defined(PMAP_DIAGNOSTIC)
116#define PMAP_INLINE __inline
117#else
118#define PMAP_INLINE
119#endif
120
121/*
122 * Get PDEs and PTEs for user/kernel address space
123 */
124#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
125#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
126
127#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
128#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
129#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
130#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
131#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
132
133#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
134#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
135
136/*
137 * Given a map and a machine independent protection code,
138 * convert to a vax protection code.
139 */
140#define pte_prot(m, p)	(protection_codes[p])
141static int protection_codes[8];
142
143#define	pa_index(pa)		atop((pa) - vm_first_phys)
144#define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
145
146static struct pmap kernel_pmap_store;
147pmap_t kernel_pmap;
148extern pd_entry_t my_idlePTD;
149
150vm_offset_t avail_start;	/* PA of first available physical page */
151vm_offset_t avail_end;		/* PA of last available physical page */
152vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
153vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
154static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
155static vm_offset_t vm_first_phys;
156static int pgeflag;		/* PG_G or-in */
157static int pseflag;		/* PG_PS or-in */
158static int pv_npg;
159
160static vm_object_t kptobj;
161
162static int nkpt;
163vm_offset_t kernel_vm_end;
164
165/*
166 * Data for the pv entry allocation mechanism
167 */
168static vm_zone_t pvzone;
169static struct vm_zone pvzone_store;
170static struct vm_object pvzone_obj;
171static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
172static int pmap_pagedaemon_waken = 0;
173static struct pv_entry *pvinit;
174
175/*
176 * All those kernel PT submaps that BSD is so fond of
177 */
178pt_entry_t *CMAP1 = 0;
179static pt_entry_t *CMAP2, *ptmmap;
180static pv_table_t *pv_table;
181caddr_t CADDR1 = 0, ptvmmap = 0;
182static caddr_t CADDR2;
183static pt_entry_t *msgbufmap;
184struct msgbuf *msgbufp=0;
185
186#ifdef SMP
187extern char prv_CPAGE1[], prv_CPAGE2[], prv_CPAGE3[];
188extern pt_entry_t *prv_CMAP1, *prv_CMAP2, *prv_CMAP3;
189extern pd_entry_t *IdlePTDS[];
190extern pt_entry_t SMP_prvpt[];
191#endif
192
193static pt_entry_t *PMAP1 = 0;
194static unsigned *PADDR1 = 0;
195
196static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
197static unsigned * get_ptbase __P((pmap_t pmap));
198static pv_entry_t get_pv_entry __P((void));
199static void	i386_protection_init __P((void));
200static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
201
202static PMAP_INLINE int	pmap_is_managed __P((vm_offset_t pa));
203static void	pmap_remove_all __P((vm_offset_t pa));
204static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
205				      vm_offset_t pa, vm_page_t mpte));
206static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
207					vm_offset_t sva));
208static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
209static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
210					vm_offset_t va));
211static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
212static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
213		vm_page_t mpte, vm_offset_t pa));
214
215static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
216
217static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
218static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
219static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
220static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
221static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
222static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
223void pmap_collect(void);
224
225static unsigned pdir4mb;
226
227/*
228 *	Routine:	pmap_pte
229 *	Function:
230 *		Extract the page table entry associated
231 *		with the given map/virtual_address pair.
232 */
233
234PMAP_INLINE unsigned *
235pmap_pte(pmap, va)
236	register pmap_t pmap;
237	vm_offset_t va;
238{
239	unsigned *pdeaddr;
240
241	if (pmap) {
242		pdeaddr = (unsigned *) pmap_pde(pmap, va);
243		if (*pdeaddr & PG_PS)
244			return pdeaddr;
245		if (*pdeaddr) {
246			return get_ptbase(pmap) + i386_btop(va);
247		}
248	}
249	return (0);
250}
251
252/*
253 * Move the kernel virtual free pointer to the next
254 * 4MB.  This is used to help improve performance
255 * by using a large (4MB) page for much of the kernel
256 * (.text, .data, .bss)
257 */
258static vm_offset_t
259pmap_kmem_choose(vm_offset_t addr) {
260	vm_offset_t newaddr = addr;
261#ifndef DISABLE_PSE
262	if (cpu_feature & CPUID_PSE) {
263		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
264	}
265#endif
266	return newaddr;
267}
268
269/*
270 *	Bootstrap the system enough to run with virtual memory.
271 *
272 *	On the i386 this is called after mapping has already been enabled
273 *	and just syncs the pmap module with what has already been done.
274 *	[We can't call it easily with mapping off since the kernel is not
275 *	mapped with PA == VA, hence we would have to relocate every address
276 *	from the linked base (virtual) address "KERNBASE" to the actual
277 *	(physical) address starting relative to 0]
278 */
279void
280pmap_bootstrap(firstaddr, loadaddr)
281	vm_offset_t firstaddr;
282	vm_offset_t loadaddr;
283{
284	vm_offset_t va;
285	pt_entry_t *pte;
286	int i, j;
287
288	avail_start = firstaddr;
289
290	/*
291	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
292	 * large. It should instead be correctly calculated in locore.s and
293	 * not based on 'first' (which is a physical address, not a virtual
294	 * address, for the start of unused physical memory). The kernel
295	 * page tables are NOT double mapped and thus should not be included
296	 * in this calculation.
297	 */
298	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
299	virtual_avail = pmap_kmem_choose(virtual_avail);
300
301	virtual_end = VM_MAX_KERNEL_ADDRESS;
302
303	/*
304	 * Initialize protection array.
305	 */
306	i386_protection_init();
307
308	/*
309	 * The kernel's pmap is statically allocated so we don't have to use
310	 * pmap_create, which is unlikely to work correctly at this part of
311	 * the boot sequence (XXX and which no longer exists).
312	 */
313	kernel_pmap = &kernel_pmap_store;
314
315	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
316
317	kernel_pmap->pm_count = 1;
318	TAILQ_INIT(&kernel_pmap->pm_pvlist);
319	nkpt = NKPT;
320
321	/*
322	 * Reserve some special page table entries/VA space for temporary
323	 * mapping of pages.
324	 */
325#define	SYSMAP(c, p, v, n)	\
326	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
327
328	va = virtual_avail;
329	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
330
331	/*
332	 * CMAP1/CMAP2 are used for zeroing and copying pages.
333	 */
334	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
335	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
336
337	/*
338	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
339	 * XXX ptmmap is not used.
340	 */
341	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
342
343	/*
344	 * msgbufp is used to map the system message buffer.
345	 * XXX msgbufmap is not used.
346	 */
347	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
348	       atop(round_page(sizeof(struct msgbuf))))
349
350	/*
351	 * ptemap is used for pmap_pte_quick
352	 */
353	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
354
355	virtual_avail = va;
356
357	*(int *) CMAP1 = *(int *) CMAP2 = 0;
358	*(int *) PTD = 0;
359
360
361	pgeflag = 0;
362#if !defined(SMP)
363	if (cpu_feature & CPUID_PGE) {
364		pgeflag = PG_G;
365	}
366#endif
367
368/*
369 * Initialize the 4MB page size flag
370 */
371	pseflag = 0;
372/*
373 * The 4MB page version of the initial
374 * kernel page mapping.
375 */
376	pdir4mb = 0;
377
378#if !defined(DISABLE_PSE)
379	if (cpu_feature & CPUID_PSE) {
380		unsigned ptditmp;
381		/*
382		 * Enable the PSE mode
383		 */
384		load_cr4(rcr4() | CR4_PSE);
385
386		/*
387		 * Note that we have enabled PSE mode
388		 */
389		pseflag = PG_PS;
390		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
391		ptditmp &= ~(NBPDR - 1);
392		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
393		pdir4mb = ptditmp;
394		/*
395		 * We can do the mapping here for the single processor
396		 * case.  We simply ignore the old page table page from
397		 * now on.
398		 */
399#if !defined(SMP)
400		PTD[KPTDI] = (pd_entry_t) ptditmp;
401		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
402		invltlb();
403#endif
404	}
405#endif
406
407#ifdef SMP
408	if (cpu_apic_address == 0)
409		panic("pmap_bootstrap: no local apic!");
410
411	/* 0 = private page */
412	/* 1 = page table page */
413	/* 2 = local apic */
414	/* 16-31 = io apics */
415	SMP_prvpt[2] = (pt_entry_t)(PG_V | PG_RW | pgeflag | ((u_long)cpu_apic_address & PG_FRAME));
416
417	for (i = 0; i < mp_napics; i++) {
418		for (j = 0; j < 16; j++) {
419			/* same page frame as a previous IO apic? */
420			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) ==
421			    ((u_long)io_apic_address[0] & PG_FRAME)) {
422				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
423				break;
424			}
425			/* use this slot if available */
426			if (((u_long)SMP_prvpt[j + 16] & PG_FRAME) == 0) {
427				SMP_prvpt[j + 16] = (pt_entry_t)(PG_V | PG_RW | pgeflag |
428				    ((u_long)io_apic_address[i] & PG_FRAME));
429				ioapic[i] = (ioapic_t *)&SMP_ioapic[j * PAGE_SIZE];
430				break;
431			}
432		}
433		if (j == 16)
434			panic("no space to map IO apic %d!", i);
435	}
436
437	/* BSP does this itself, AP's get it pre-set */
438	prv_CMAP1 = &SMP_prvpt[3 + UPAGES];
439	prv_CMAP2 = &SMP_prvpt[4 + UPAGES];
440	prv_CMAP3 = &SMP_prvpt[5 + UPAGES];
441#endif
442
443	invltlb();
444
445}
446
447void
448getmtrr()
449{
450	int i;
451
452	if (cpu_class == CPUCLASS_686) {
453		for(i = 0; i < NPPROVMTRR; i++) {
454			PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2);
455			PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2);
456		}
457	}
458}
459
460void
461putmtrr()
462{
463	int i;
464
465	if (cpu_class == CPUCLASS_686) {
466		wbinvd();
467		for(i = 0; i < NPPROVMTRR; i++) {
468			wrmsr(PPRO_VMTRRphysBase0 + i * 2, PPro_vmtrr[i].base);
469			wrmsr(PPRO_VMTRRphysMask0 + i * 2, PPro_vmtrr[i].mask);
470		}
471	}
472}
473
474void
475pmap_setvidram(void)
476{
477	if (cpu_class == CPUCLASS_686) {
478		wbinvd();
479		/*
480		 * Set memory between 0-640K to be WB
481		 */
482		wrmsr(0x250, 0x0606060606060606LL);
483		wrmsr(0x258, 0x0606060606060606LL);
484		/*
485		 * Set normal, PC video memory to be WC
486		 */
487		wrmsr(0x259, 0x0101010101010101LL);
488	}
489}
490
491void
492pmap_setdevram(unsigned long long basea, vm_offset_t sizea)
493{
494	int i, free, skip;
495	unsigned basepage, basepaget;
496	unsigned long long base;
497	unsigned long long mask;
498
499	if (cpu_class != CPUCLASS_686)
500		return;
501
502	free = -1;
503	skip = 0;
504	basea &= ~0xfff;
505	base = basea | 0x1;
506	mask = (long long) (0xfffffffffLL - ((long) sizea - 1)) | (long long) 0x800;
507	mask &= ~0x7ff;
508
509	basepage = (long long) (base >> 12);
510	for(i = 0; i < NPPROVMTRR; i++) {
511		PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2);
512		PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2);
513		basepaget = (long long) (PPro_vmtrr[i].base >> 12);
514		if (basepage == basepaget)
515			skip = 1;
516		if ((PPro_vmtrr[i].mask & 0x800) == 0) {
517			if (free == -1)
518				free = i;
519		}
520	}
521
522	if (!skip && free != -1) {
523		wbinvd();
524		PPro_vmtrr[free].base = base;
525		PPro_vmtrr[free].mask = mask;
526		wrmsr(PPRO_VMTRRphysBase0 + free * 2, base);
527		wrmsr(PPRO_VMTRRphysMask0 + free * 2, mask);
528		printf("pmap: added WC mapping at page: 0x%x %x, size: %d mask: 0x%x %x\n", base, sizea, mask);
529	}
530}
531
532/*
533 * Set 4mb pdir for mp startup, and global flags
534 */
535void
536pmap_set_opt(unsigned *pdir) {
537	int i;
538
539	if (pseflag && (cpu_feature & CPUID_PSE)) {
540		load_cr4(rcr4() | CR4_PSE);
541		if (pdir4mb) {
542			pdir[KPTDI] = pdir4mb;
543		}
544	}
545
546	if (pgeflag && (cpu_feature & CPUID_PGE)) {
547		load_cr4(rcr4() | CR4_PGE);
548		for(i = KPTDI; i < KPTDI + nkpt; i++) {
549			if (pdir[i]) {
550				pdir[i] |= PG_G;
551			}
552		}
553	}
554}
555
556/*
557 * Setup the PTD for the boot processor
558 */
559void
560pmap_set_opt_bsp(void)
561{
562	pmap_set_opt((unsigned *)kernel_pmap->pm_pdir);
563	pmap_set_opt((unsigned *)PTD);
564	invltlb();
565}
566
567/*
568 *	Initialize the pmap module.
569 *	Called by vm_init, to initialize any structures that the pmap
570 *	system needs to map virtual memory.
571 *	pmap_init has been enhanced to support in a fairly consistant
572 *	way, discontiguous physical memory.
573 */
574void
575pmap_init(phys_start, phys_end)
576	vm_offset_t phys_start, phys_end;
577{
578	vm_offset_t addr;
579	vm_size_t s;
580	int i;
581	int initial_pvs;
582
583	/*
584	 * calculate the number of pv_entries needed
585	 */
586	vm_first_phys = phys_avail[0];
587	for (i = 0; phys_avail[i + 1]; i += 2);
588	pv_npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
589
590	/*
591	 * Allocate memory for random pmap data structures.  Includes the
592	 * pv_head_table.
593	 */
594	s = (vm_size_t) (sizeof(pv_table_t) * pv_npg);
595	s = round_page(s);
596
597	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
598	pv_table = (pv_table_t *) addr;
599	for(i = 0; i < pv_npg; i++) {
600		vm_offset_t pa;
601		TAILQ_INIT(&pv_table[i].pv_list);
602		pv_table[i].pv_list_count = 0;
603		pa = vm_first_phys + i * PAGE_SIZE;
604		pv_table[i].pv_vm_page = PHYS_TO_VM_PAGE(pa);
605	}
606
607	/*
608	 * init the pv free list
609	 */
610	initial_pvs = pv_npg;
611	if (initial_pvs < MINPV)
612		initial_pvs = MINPV;
613	pvzone = &pvzone_store;
614	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
615		initial_pvs * sizeof (struct pv_entry));
616	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, pv_npg);
617	/*
618	 * object for kernel page table pages
619	 */
620	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
621
622	/*
623	 * Now it is safe to enable pv_table recording.
624	 */
625	pmap_initialized = TRUE;
626}
627
628/*
629 * Initialize the address space (zone) for the pv_entries.  Set a
630 * high water mark so that the system can recover from excessive
631 * numbers of pv entries.
632 */
633void
634pmap_init2() {
635	pv_entry_max = PMAP_SHPGPERPROC * maxproc + pv_npg;
636	pv_entry_high_water = 9 * (pv_entry_max / 10);
637	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
638}
639
640/*
641 *	Used to map a range of physical addresses into kernel
642 *	virtual address space.
643 *
644 *	For now, VM is already on, we only need to map the
645 *	specified memory.
646 */
647vm_offset_t
648pmap_map(virt, start, end, prot)
649	vm_offset_t virt;
650	vm_offset_t start;
651	vm_offset_t end;
652	int prot;
653{
654	while (start < end) {
655		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
656		virt += PAGE_SIZE;
657		start += PAGE_SIZE;
658	}
659	return (virt);
660}
661
662
663/***************************************************
664 * Low level helper routines.....
665 ***************************************************/
666
667#if defined(PMAP_DIAGNOSTIC)
668
669/*
670 * This code checks for non-writeable/modified pages.
671 * This should be an invalid condition.
672 */
673static int
674pmap_nw_modified(pt_entry_t ptea) {
675	int pte;
676
677	pte = (int) ptea;
678
679	if ((pte & (PG_M|PG_RW)) == PG_M)
680		return 1;
681	else
682		return 0;
683}
684#endif
685
686
687/*
688 * this routine defines the region(s) of memory that should
689 * not be tested for the modified bit.
690 */
691static PMAP_INLINE int
692pmap_track_modified( vm_offset_t va) {
693	if ((va < clean_sva) || (va >= clean_eva))
694		return 1;
695	else
696		return 0;
697}
698
699static PMAP_INLINE void
700invltlb_1pg( vm_offset_t va) {
701#if defined(I386_CPU)
702	if (cpu_class == CPUCLASS_386) {
703		invltlb();
704	} else
705#endif
706	{
707		invlpg(va);
708	}
709}
710
711static PMAP_INLINE void
712invltlb_2pg( vm_offset_t va1, vm_offset_t va2) {
713#if defined(I386_CPU)
714	if (cpu_class == CPUCLASS_386) {
715		invltlb();
716	} else
717#endif
718	{
719		invlpg(va1);
720		invlpg(va2);
721	}
722}
723
724static unsigned *
725get_ptbase(pmap)
726	pmap_t pmap;
727{
728	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
729
730	/* are we current address space or kernel? */
731	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
732		return (unsigned *) PTmap;
733	}
734	/* otherwise, we are alternate address space */
735	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
736		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
737		invltlb();
738	}
739	return (unsigned *) APTmap;
740}
741
742/*
743 * Super fast pmap_pte routine best used when scanning
744 * the pv lists.  This eliminates many coarse-grained
745 * invltlb calls.  Note that many of the pv list
746 * scans are across different pmaps.  It is very wasteful
747 * to do an entire invltlb for checking a single mapping.
748 */
749
750static unsigned *
751pmap_pte_quick(pmap, va)
752	register pmap_t pmap;
753	vm_offset_t va;
754{
755	unsigned pde, newpf;
756	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
757		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
758		unsigned index = i386_btop(va);
759		/* are we current address space or kernel? */
760		if ((pmap == kernel_pmap) ||
761			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
762			return (unsigned *) PTmap + index;
763		}
764		newpf = pde & PG_FRAME;
765		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
766			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
767			invltlb_1pg((vm_offset_t) PADDR1);
768		}
769		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
770	}
771	return (0);
772}
773
774/*
775 *	Routine:	pmap_extract
776 *	Function:
777 *		Extract the physical page address associated
778 *		with the given map/virtual_address pair.
779 */
780vm_offset_t
781pmap_extract(pmap, va)
782	register pmap_t pmap;
783	vm_offset_t va;
784{
785	vm_offset_t rtval;
786	vm_offset_t pdirindex;
787	pdirindex = va >> PDRSHIFT;
788	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
789		unsigned *pte;
790		if ((rtval & PG_PS) != 0) {
791			rtval &= ~(NBPDR - 1);
792			rtval |= va & (NBPDR - 1);
793			return rtval;
794		}
795		pte = get_ptbase(pmap) + i386_btop(va);
796		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
797		return rtval;
798	}
799	return 0;
800
801}
802
803/*
804 * determine if a page is managed (memory vs. device)
805 */
806static PMAP_INLINE int
807pmap_is_managed(pa)
808	vm_offset_t pa;
809{
810	int i;
811
812	if (!pmap_initialized)
813		return 0;
814
815	for (i = 0; phys_avail[i + 1]; i += 2) {
816		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
817			return 1;
818	}
819	return 0;
820}
821
822
823/***************************************************
824 * Low level mapping routines.....
825 ***************************************************/
826
827/*
828 * Add a list of wired pages to the kva
829 * this routine is only used for temporary
830 * kernel mappings that do not need to have
831 * page modification or references recorded.
832 * Note that old mappings are simply written
833 * over.  The page *must* be wired.
834 */
835void
836pmap_qenter(va, m, count)
837	vm_offset_t va;
838	vm_page_t *m;
839	int count;
840{
841	int i;
842	register unsigned *pte;
843
844	for (i = 0; i < count; i++) {
845		vm_offset_t tva = va + i * PAGE_SIZE;
846		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | pgeflag;
847		unsigned opte;
848		pte = (unsigned *)vtopte(tva);
849		opte = *pte;
850		*pte = npte;
851		if (opte)
852			invltlb_1pg(tva);
853	}
854}
855
856/*
857 * this routine jerks page mappings from the
858 * kernel -- it is meant only for temporary mappings.
859 */
860void
861pmap_qremove(va, count)
862	vm_offset_t va;
863	int count;
864{
865	int i;
866	register unsigned *pte;
867
868	for (i = 0; i < count; i++) {
869		pte = (unsigned *)vtopte(va);
870		*pte = 0;
871		invltlb_1pg(va);
872		va += PAGE_SIZE;
873	}
874}
875
876/*
877 * add a wired page to the kva
878 * note that in order for the mapping to take effect -- you
879 * should do a invltlb after doing the pmap_kenter...
880 */
881PMAP_INLINE void
882pmap_kenter(va, pa)
883	vm_offset_t va;
884	register vm_offset_t pa;
885{
886	register unsigned *pte;
887	unsigned npte, opte;
888
889	npte = pa | PG_RW | PG_V | pgeflag;
890	pte = (unsigned *)vtopte(va);
891	opte = *pte;
892	*pte = npte;
893	if (opte)
894		invltlb_1pg(va);
895}
896
897/*
898 * remove a page from the kernel pagetables
899 */
900PMAP_INLINE void
901pmap_kremove(va)
902	vm_offset_t va;
903{
904	register unsigned *pte;
905
906	pte = (unsigned *)vtopte(va);
907	*pte = 0;
908	invltlb_1pg(va);
909}
910
911static vm_page_t
912pmap_page_lookup(object, pindex)
913	vm_object_t object;
914	vm_pindex_t pindex;
915{
916	vm_page_t m;
917retry:
918	m = vm_page_lookup(object, pindex);
919	if (m && vm_page_sleep(m, "pplookp", NULL))
920		goto retry;
921	return m;
922}
923
924/*
925 * Create the UPAGES for a new process.
926 * This routine directly affects the fork perf for a process.
927 */
928void
929pmap_new_proc(p)
930	struct proc *p;
931{
932	int i, updateneeded;
933	vm_object_t upobj;
934	vm_page_t m;
935	struct user *up;
936	unsigned *ptek, oldpte;
937
938	/*
939	 * allocate object for the upages
940	 */
941	if ((upobj = p->p_upages_obj) == NULL) {
942		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
943		p->p_upages_obj = upobj;
944	}
945
946	/* get a kernel virtual address for the UPAGES for this proc */
947	if ((up = p->p_addr) == NULL) {
948		up = (struct user *) kmem_alloc_pageable(kernel_map,
949				UPAGES * PAGE_SIZE);
950#if !defined(MAX_PERF)
951		if (up == NULL)
952			panic("pmap_new_proc: u_map allocation failed");
953#endif
954		p->p_addr = up;
955	}
956
957	ptek = (unsigned *) vtopte((vm_offset_t) up);
958
959	updateneeded = 0;
960	for(i=0;i<UPAGES;i++) {
961		/*
962		 * Get a kernel stack page
963		 */
964		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
965
966		/*
967		 * Wire the page
968		 */
969		m->wire_count++;
970		cnt.v_wire_count++;
971
972		oldpte = *(ptek + i);
973		/*
974		 * Enter the page into the kernel address space.
975		 */
976		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
977		if (oldpte) {
978			if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386)) {
979				invlpg((vm_offset_t) up + i * PAGE_SIZE);
980			} else {
981				updateneeded = 1;
982			}
983		}
984
985		PAGE_WAKEUP(m);
986		m->flags &= ~PG_ZERO;
987		m->flags |= PG_MAPPED | PG_WRITEABLE;
988		m->valid = VM_PAGE_BITS_ALL;
989	}
990	if (updateneeded)
991		invltlb();
992}
993
994/*
995 * Dispose the UPAGES for a process that has exited.
996 * This routine directly impacts the exit perf of a process.
997 */
998void
999pmap_dispose_proc(p)
1000	struct proc *p;
1001{
1002	int i;
1003	vm_object_t upobj;
1004	vm_page_t m;
1005	unsigned *ptek, oldpte;
1006
1007	upobj = p->p_upages_obj;
1008
1009	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
1010	for(i=0;i<UPAGES;i++) {
1011
1012		if ((m = vm_page_lookup(upobj, i)) == NULL)
1013			panic("pmap_dispose_proc: upage already missing???");
1014
1015		m->flags |= PG_BUSY;
1016
1017		oldpte = *(ptek + i);
1018		*(ptek + i) = 0;
1019		if ((oldpte & PG_G) || (cpu_class > CPUCLASS_386))
1020			invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
1021		vm_page_unwire(m);
1022		vm_page_free(m);
1023	}
1024
1025	if (cpu_class <= CPUCLASS_386)
1026		invltlb();
1027}
1028
1029/*
1030 * Allow the UPAGES for a process to be prejudicially paged out.
1031 */
1032void
1033pmap_swapout_proc(p)
1034	struct proc *p;
1035{
1036	int i;
1037	vm_object_t upobj;
1038	vm_page_t m;
1039
1040	upobj = p->p_upages_obj;
1041	/*
1042	 * let the upages be paged
1043	 */
1044	for(i=0;i<UPAGES;i++) {
1045		if ((m = vm_page_lookup(upobj, i)) == NULL)
1046			panic("pmap_swapout_proc: upage already missing???");
1047		m->dirty = VM_PAGE_BITS_ALL;
1048		vm_page_unwire(m);
1049		vm_page_deactivate(m);
1050		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
1051	}
1052}
1053
1054/*
1055 * Bring the UPAGES for a specified process back in.
1056 */
1057void
1058pmap_swapin_proc(p)
1059	struct proc *p;
1060{
1061	int i,rv;
1062	vm_object_t upobj;
1063	vm_page_t m;
1064
1065	upobj = p->p_upages_obj;
1066	for(i=0;i<UPAGES;i++) {
1067
1068		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1069
1070		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
1071			VM_PAGE_TO_PHYS(m));
1072
1073		if (m->valid != VM_PAGE_BITS_ALL) {
1074			rv = vm_pager_get_pages(upobj, &m, 1, 0);
1075#if !defined(MAX_PERF)
1076			if (rv != VM_PAGER_OK)
1077				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
1078#endif
1079			m = vm_page_lookup(upobj, i);
1080			m->valid = VM_PAGE_BITS_ALL;
1081		}
1082
1083		vm_page_wire(m);
1084		PAGE_WAKEUP(m);
1085		m->flags |= PG_MAPPED | PG_WRITEABLE;
1086	}
1087}
1088
1089/***************************************************
1090 * Page table page management routines.....
1091 ***************************************************/
1092
1093/*
1094 * This routine unholds page table pages, and if the hold count
1095 * drops to zero, then it decrements the wire count.
1096 */
1097static int
1098_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1099	int s;
1100
1101	while (vm_page_sleep(m, "pmuwpt", NULL));
1102
1103	if (m->hold_count == 0) {
1104		vm_offset_t pteva;
1105		/*
1106		 * unmap the page table page
1107		 */
1108		pmap->pm_pdir[m->pindex] = 0;
1109		--pmap->pm_stats.resident_count;
1110		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1111			(((unsigned) PTDpde) & PG_FRAME)) {
1112			/*
1113			 * Do a invltlb to make the invalidated mapping
1114			 * take effect immediately.
1115			 */
1116			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1117			invltlb_1pg(pteva);
1118		}
1119
1120		if (pmap->pm_ptphint == m)
1121			pmap->pm_ptphint = NULL;
1122
1123		/*
1124		 * If the page is finally unwired, simply free it.
1125		 */
1126		--m->wire_count;
1127		if (m->wire_count == 0) {
1128
1129			if (m->flags & PG_WANTED) {
1130				m->flags &= ~PG_WANTED;
1131				wakeup(m);
1132			}
1133
1134			m->flags |= PG_BUSY;
1135			vm_page_free_zero(m);
1136			--cnt.v_wire_count;
1137		}
1138		return 1;
1139	}
1140	return 0;
1141}
1142
1143static PMAP_INLINE int
1144pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
1145	vm_page_unhold(m);
1146	if (m->hold_count == 0)
1147		return _pmap_unwire_pte_hold(pmap, m);
1148	else
1149		return 0;
1150}
1151
1152/*
1153 * After removing a page table entry, this routine is used to
1154 * conditionally free the page, and manage the hold/wire counts.
1155 */
1156static int
1157pmap_unuse_pt(pmap, va, mpte)
1158	pmap_t pmap;
1159	vm_offset_t va;
1160	vm_page_t mpte;
1161{
1162	unsigned ptepindex;
1163	if (va >= UPT_MIN_ADDRESS)
1164		return 0;
1165
1166	if (mpte == NULL) {
1167		ptepindex = (va >> PDRSHIFT);
1168		if (pmap->pm_ptphint &&
1169			(pmap->pm_ptphint->pindex == ptepindex)) {
1170			mpte = pmap->pm_ptphint;
1171		} else {
1172			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1173			pmap->pm_ptphint = mpte;
1174		}
1175	}
1176
1177	return pmap_unwire_pte_hold(pmap, mpte);
1178}
1179
1180#if !defined(SMP)
1181void
1182pmap_pinit0(pmap)
1183	struct pmap *pmap;
1184{
1185	pmap->pm_pdir =
1186		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1187	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
1188	pmap->pm_flags = 0;
1189	pmap->pm_count = 1;
1190	pmap->pm_ptphint = NULL;
1191	TAILQ_INIT(&pmap->pm_pvlist);
1192	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1193}
1194#else
1195void
1196pmap_pinit0(pmap)
1197	struct pmap *pmap;
1198{
1199	pmap_pinit(pmap);
1200}
1201#endif
1202
1203/*
1204 * Initialize a preallocated and zeroed pmap structure,
1205 * such as one in a vmspace structure.
1206 */
1207void
1208pmap_pinit(pmap)
1209	register struct pmap *pmap;
1210{
1211	vm_page_t ptdpg;
1212
1213	/*
1214	 * No need to allocate page table space yet but we do need a valid
1215	 * page directory table.
1216	 */
1217	if (pmap->pm_pdir == NULL)
1218		pmap->pm_pdir =
1219			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1220
1221	/*
1222	 * allocate object for the ptes
1223	 */
1224	if (pmap->pm_pteobj == NULL)
1225		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1226
1227	/*
1228	 * allocate the page directory page
1229	 */
1230retry:
1231	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
1232			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1233
1234	ptdpg->wire_count = 1;
1235	++cnt.v_wire_count;
1236
1237	ptdpg->flags &= ~(PG_MAPPED | PG_BUSY);	/* not mapped normally */
1238	ptdpg->valid = VM_PAGE_BITS_ALL;
1239
1240	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1241	if ((ptdpg->flags & PG_ZERO) == 0)
1242		bzero(pmap->pm_pdir, PAGE_SIZE);
1243
1244	/* wire in kernel global address entries */
1245	/* XXX copies current process, does not fill in MPPTDI */
1246	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1247
1248	/* install self-referential address mapping entry */
1249	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1250		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1251
1252	pmap->pm_flags = 0;
1253	pmap->pm_count = 1;
1254	pmap->pm_ptphint = NULL;
1255	TAILQ_INIT(&pmap->pm_pvlist);
1256	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1257}
1258
1259static int
1260pmap_release_free_page(pmap, p)
1261	struct pmap *pmap;
1262	vm_page_t p;
1263{
1264	int s;
1265	unsigned *pde = (unsigned *) pmap->pm_pdir;
1266	/*
1267	 * This code optimizes the case of freeing non-busy
1268	 * page-table pages.  Those pages are zero now, and
1269	 * might as well be placed directly into the zero queue.
1270	 */
1271	if (vm_page_sleep(p, "pmaprl", NULL))
1272		return 0;
1273
1274	p->flags |= PG_BUSY;
1275
1276	/*
1277	 * Remove the page table page from the processes address space.
1278	 */
1279	pde[p->pindex] = 0;
1280	pmap->pm_stats.resident_count--;
1281
1282#if !defined(MAX_PERF)
1283	if (p->hold_count)  {
1284		panic("pmap_release: freeing held page table page");
1285	}
1286#endif
1287	/*
1288	 * Page directory pages need to have the kernel
1289	 * stuff cleared, so they can go into the zero queue also.
1290	 */
1291	if (p->pindex == PTDPTDI) {
1292		bzero(pde + KPTDI, nkpt * PTESIZE);
1293#ifdef SMP
1294		pde[MPPTDI] = 0;
1295#endif
1296		pde[APTDPTDI] = 0;
1297		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1298	}
1299
1300	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1301		pmap->pm_ptphint = NULL;
1302
1303	vm_page_free_zero(p);
1304	return 1;
1305}
1306
1307/*
1308 * this routine is called if the page table page is not
1309 * mapped correctly.
1310 */
1311static vm_page_t
1312_pmap_allocpte(pmap, ptepindex)
1313	pmap_t	pmap;
1314	unsigned ptepindex;
1315{
1316	vm_offset_t pteva, ptepa;
1317	vm_page_t m;
1318
1319	/*
1320	 * Find or fabricate a new pagetable page
1321	 */
1322	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1323			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
1324
1325	if (m->queue != PQ_NONE) {
1326		int s = splvm();
1327		vm_page_unqueue(m);
1328		splx(s);
1329	}
1330
1331	if (m->wire_count == 0)
1332		cnt.v_wire_count++;
1333	m->wire_count++;
1334
1335	/*
1336	 * Increment the hold count for the page table page
1337	 * (denoting a new mapping.)
1338	 */
1339	m->hold_count++;
1340
1341	/*
1342	 * Map the pagetable page into the process address space, if
1343	 * it isn't already there.
1344	 */
1345
1346	pmap->pm_stats.resident_count++;
1347
1348	ptepa = VM_PAGE_TO_PHYS(m);
1349	pmap->pm_pdir[ptepindex] =
1350		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
1351
1352	/*
1353	 * Set the page table hint
1354	 */
1355	pmap->pm_ptphint = m;
1356
1357	/*
1358	 * Try to use the new mapping, but if we cannot, then
1359	 * do it with the routine that maps the page explicitly.
1360	 */
1361	if ((m->flags & PG_ZERO) == 0) {
1362		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1363			(((unsigned) PTDpde) & PG_FRAME)) {
1364			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1365			bzero((caddr_t) pteva, PAGE_SIZE);
1366		} else {
1367			pmap_zero_page(ptepa);
1368		}
1369	}
1370
1371	m->valid = VM_PAGE_BITS_ALL;
1372	m->flags &= ~(PG_ZERO | PG_BUSY);
1373	m->flags |= PG_MAPPED;
1374
1375	return m;
1376}
1377
1378static vm_page_t
1379pmap_allocpte(pmap, va)
1380	pmap_t	pmap;
1381	vm_offset_t va;
1382{
1383	unsigned ptepindex;
1384	vm_offset_t ptepa;
1385	vm_page_t m;
1386
1387	/*
1388	 * Calculate pagetable page index
1389	 */
1390	ptepindex = va >> PDRSHIFT;
1391
1392	/*
1393	 * Get the page directory entry
1394	 */
1395	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1396
1397	/*
1398	 * This supports switching from a 4MB page to a
1399	 * normal 4K page.
1400	 */
1401	if (ptepa & PG_PS) {
1402		pmap->pm_pdir[ptepindex] = 0;
1403		ptepa = 0;
1404		invltlb();
1405	}
1406
1407	/*
1408	 * If the page table page is mapped, we just increment the
1409	 * hold count, and activate it.
1410	 */
1411	if (ptepa) {
1412		/*
1413		 * In order to get the page table page, try the
1414		 * hint first.
1415		 */
1416		if (pmap->pm_ptphint &&
1417			(pmap->pm_ptphint->pindex == ptepindex)) {
1418			m = pmap->pm_ptphint;
1419		} else {
1420			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1421			pmap->pm_ptphint = m;
1422		}
1423		m->hold_count++;
1424		return m;
1425	}
1426	/*
1427	 * Here if the pte page isn't mapped, or if it has been deallocated.
1428	 */
1429	return _pmap_allocpte(pmap, ptepindex);
1430}
1431
1432
1433/***************************************************
1434* Pmap allocation/deallocation routines.
1435 ***************************************************/
1436
1437/*
1438 * Release any resources held by the given physical map.
1439 * Called when a pmap initialized by pmap_pinit is being released.
1440 * Should only be called if the map contains no valid mappings.
1441 */
1442void
1443pmap_release(pmap)
1444	register struct pmap *pmap;
1445{
1446	vm_page_t p,n,ptdpg;
1447	vm_object_t object = pmap->pm_pteobj;
1448	int curgeneration;
1449
1450#if defined(DIAGNOSTIC)
1451	if (object->ref_count != 1)
1452		panic("pmap_release: pteobj reference count != 1");
1453#endif
1454
1455	ptdpg = NULL;
1456retry:
1457	curgeneration = object->generation;
1458	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1459		n = TAILQ_NEXT(p, listq);
1460		if (p->pindex == PTDPTDI) {
1461			ptdpg = p;
1462			continue;
1463		}
1464		while (1) {
1465			if (!pmap_release_free_page(pmap, p) &&
1466				(object->generation != curgeneration))
1467				goto retry;
1468		}
1469	}
1470
1471	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1472		goto retry;
1473}
1474
1475/*
1476 * grow the number of kernel page table entries, if needed
1477 */
1478void
1479pmap_growkernel(vm_offset_t addr)
1480{
1481	struct proc *p;
1482	struct pmap *pmap;
1483	int s;
1484	vm_offset_t ptppaddr;
1485	vm_page_t nkpg;
1486#ifdef SMP
1487	int i;
1488#endif
1489	pd_entry_t newpdir;
1490
1491	s = splhigh();
1492	if (kernel_vm_end == 0) {
1493		kernel_vm_end = KERNBASE;
1494		nkpt = 0;
1495		while (pdir_pde(PTD, kernel_vm_end)) {
1496			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1497			nkpt++;
1498		}
1499	}
1500	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1501	while (kernel_vm_end < addr) {
1502		if (pdir_pde(PTD, kernel_vm_end)) {
1503			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1504			continue;
1505		}
1506
1507		/*
1508		 * This index is bogus, but out of the way
1509		 */
1510		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
1511#if !defined(MAX_PERF)
1512		if (!nkpg)
1513			panic("pmap_growkernel: no memory to grow kernel");
1514#endif
1515
1516		nkpt++;
1517
1518		vm_page_wire(nkpg);
1519		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1520		pmap_zero_page(ptppaddr);
1521		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1522		pdir_pde(PTD, kernel_vm_end) = newpdir;
1523
1524#ifdef SMP
1525		for (i = 0; i < mp_ncpus; i++) {
1526			if (IdlePTDS[i])
1527				pdir_pde(IdlePTDS[i], kernel_vm_end) = newpdir;
1528		}
1529#endif
1530
1531		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1532			if (p->p_vmspace) {
1533				pmap = &p->p_vmspace->vm_pmap;
1534				*pmap_pde(pmap, kernel_vm_end) = newpdir;
1535			}
1536		}
1537		*pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
1538		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1539	}
1540	splx(s);
1541}
1542
1543/*
1544 *	Retire the given physical map from service.
1545 *	Should only be called if the map contains
1546 *	no valid mappings.
1547 */
1548void
1549pmap_destroy(pmap)
1550	register pmap_t pmap;
1551{
1552	int count;
1553
1554	if (pmap == NULL)
1555		return;
1556
1557	count = --pmap->pm_count;
1558	if (count == 0) {
1559		pmap_release(pmap);
1560#if !defined(MAX_PERF)
1561		panic("destroying a pmap is not yet implemented");
1562#endif
1563	}
1564}
1565
1566/*
1567 *	Add a reference to the specified pmap.
1568 */
1569void
1570pmap_reference(pmap)
1571	pmap_t pmap;
1572{
1573	if (pmap != NULL) {
1574		pmap->pm_count++;
1575	}
1576}
1577
1578/***************************************************
1579* page management routines.
1580 ***************************************************/
1581
1582/*
1583 * free the pv_entry back to the free list
1584 */
1585static PMAP_INLINE void
1586free_pv_entry(pv)
1587	pv_entry_t pv;
1588{
1589	pv_entry_count--;
1590	zfreei(pvzone, pv);
1591}
1592
1593/*
1594 * get a new pv_entry, allocating a block from the system
1595 * when needed.
1596 * the memory allocation is performed bypassing the malloc code
1597 * because of the possibility of allocations at interrupt time.
1598 */
1599static pv_entry_t
1600get_pv_entry(void)
1601{
1602	pv_entry_count++;
1603	if (pv_entry_high_water &&
1604		(pv_entry_count > pv_entry_high_water) &&
1605		(pmap_pagedaemon_waken == 0)) {
1606		pmap_pagedaemon_waken = 1;
1607		wakeup (&vm_pages_needed);
1608	}
1609	return zalloci(pvzone);
1610}
1611
1612/*
1613 * This routine is very drastic, but can save the system
1614 * in a pinch.
1615 */
1616void
1617pmap_collect() {
1618	pv_table_t *ppv;
1619	int i;
1620	vm_offset_t pa;
1621	vm_page_t m;
1622	static int warningdone=0;
1623
1624	if (pmap_pagedaemon_waken == 0)
1625		return;
1626
1627	if (warningdone < 5) {
1628		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1629		warningdone++;
1630	}
1631
1632	for(i = 0; i < pv_npg; i++) {
1633		if ((ppv = &pv_table[i]) == 0)
1634			continue;
1635		m = ppv->pv_vm_page;
1636		if ((pa = VM_PAGE_TO_PHYS(m)) == 0)
1637			continue;
1638		if (m->wire_count || m->hold_count || m->busy ||
1639			(m->flags & PG_BUSY))
1640			continue;
1641		pmap_remove_all(pa);
1642	}
1643	pmap_pagedaemon_waken = 0;
1644}
1645
1646
1647/*
1648 * If it is the first entry on the list, it is actually
1649 * in the header and we must copy the following entry up
1650 * to the header.  Otherwise we must search the list for
1651 * the entry.  In either case we free the now unused entry.
1652 */
1653
1654static int
1655pmap_remove_entry(pmap, ppv, va)
1656	struct pmap *pmap;
1657	pv_table_t *ppv;
1658	vm_offset_t va;
1659{
1660	pv_entry_t pv;
1661	int rtval;
1662	int s;
1663
1664	s = splvm();
1665	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
1666		for (pv = TAILQ_FIRST(&ppv->pv_list);
1667			pv;
1668			pv = TAILQ_NEXT(pv, pv_list)) {
1669			if (pmap == pv->pv_pmap && va == pv->pv_va)
1670				break;
1671		}
1672	} else {
1673		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1674			pv;
1675			pv = TAILQ_NEXT(pv, pv_plist)) {
1676			if (va == pv->pv_va)
1677				break;
1678		}
1679	}
1680
1681	rtval = 0;
1682	if (pv) {
1683
1684		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1685		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1686		ppv->pv_list_count--;
1687		if (TAILQ_FIRST(&ppv->pv_list) == NULL)
1688			ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
1689
1690		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1691		free_pv_entry(pv);
1692	}
1693
1694	splx(s);
1695	return rtval;
1696}
1697
1698/*
1699 * Create a pv entry for page at pa for
1700 * (pmap, va).
1701 */
1702static void
1703pmap_insert_entry(pmap, va, mpte, pa)
1704	pmap_t pmap;
1705	vm_offset_t va;
1706	vm_page_t mpte;
1707	vm_offset_t pa;
1708{
1709
1710	int s;
1711	pv_entry_t pv;
1712	pv_table_t *ppv;
1713
1714	s = splvm();
1715	pv = get_pv_entry();
1716	pv->pv_va = va;
1717	pv->pv_pmap = pmap;
1718	pv->pv_ptem = mpte;
1719
1720	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1721
1722	ppv = pa_to_pvh(pa);
1723	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
1724	ppv->pv_list_count++;
1725
1726	splx(s);
1727}
1728
1729/*
1730 * pmap_remove_pte: do the things to unmap a page in a process
1731 */
1732static int
1733pmap_remove_pte(pmap, ptq, va)
1734	struct pmap *pmap;
1735	unsigned *ptq;
1736	vm_offset_t va;
1737{
1738	unsigned oldpte;
1739	pv_table_t *ppv;
1740
1741	oldpte = *ptq;
1742	*ptq = 0;
1743	if (oldpte & PG_W)
1744		pmap->pm_stats.wired_count -= 1;
1745	/*
1746	 * Machines that don't support invlpg, also don't support
1747	 * PG_G.
1748	 */
1749	if (oldpte & PG_G)
1750		invlpg(va);
1751	pmap->pm_stats.resident_count -= 1;
1752	if (oldpte & PG_MANAGED) {
1753		ppv = pa_to_pvh(oldpte);
1754		if (oldpte & PG_M) {
1755#if defined(PMAP_DIAGNOSTIC)
1756			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1757				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
1758			}
1759#endif
1760			if (pmap_track_modified(va))
1761				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1762		}
1763		if (oldpte & PG_A)
1764			ppv->pv_vm_page->flags |= PG_REFERENCED;
1765		return pmap_remove_entry(pmap, ppv, va);
1766	} else {
1767		return pmap_unuse_pt(pmap, va, NULL);
1768	}
1769
1770	return 0;
1771}
1772
1773/*
1774 * Remove a single page from a process address space
1775 */
1776static void
1777pmap_remove_page(pmap, va)
1778	struct pmap *pmap;
1779	register vm_offset_t va;
1780{
1781	register unsigned *ptq;
1782
1783	/*
1784	 * if there is no pte for this address, just skip it!!!
1785	 */
1786	if (*pmap_pde(pmap, va) == 0) {
1787		return;
1788	}
1789
1790	/*
1791	 * get a local va for mappings for this pmap.
1792	 */
1793	ptq = get_ptbase(pmap) + i386_btop(va);
1794	if (*ptq) {
1795		(void) pmap_remove_pte(pmap, ptq, va);
1796		invltlb_1pg(va);
1797	}
1798	return;
1799}
1800
1801/*
1802 *	Remove the given range of addresses from the specified map.
1803 *
1804 *	It is assumed that the start and end are properly
1805 *	rounded to the page size.
1806 */
1807void
1808pmap_remove(pmap, sva, eva)
1809	struct pmap *pmap;
1810	register vm_offset_t sva;
1811	register vm_offset_t eva;
1812{
1813	register unsigned *ptbase;
1814	vm_offset_t pdnxt;
1815	vm_offset_t ptpaddr;
1816	vm_offset_t sindex, eindex;
1817	int anyvalid;
1818
1819	if (pmap == NULL)
1820		return;
1821
1822	if (pmap->pm_stats.resident_count == 0)
1823		return;
1824
1825	/*
1826	 * special handling of removing one page.  a very
1827	 * common operation and easy to short circuit some
1828	 * code.
1829	 */
1830	if (((sva + PAGE_SIZE) == eva) &&
1831		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1832		pmap_remove_page(pmap, sva);
1833		return;
1834	}
1835
1836	anyvalid = 0;
1837
1838	/*
1839	 * Get a local virtual address for the mappings that are being
1840	 * worked with.
1841	 */
1842	ptbase = get_ptbase(pmap);
1843
1844	sindex = i386_btop(sva);
1845	eindex = i386_btop(eva);
1846
1847	for (; sindex < eindex; sindex = pdnxt) {
1848		unsigned pdirindex;
1849
1850		/*
1851		 * Calculate index for next page table.
1852		 */
1853		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1854		if (pmap->pm_stats.resident_count == 0)
1855			break;
1856
1857		pdirindex = sindex / NPDEPG;
1858		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1859			pmap->pm_pdir[pdirindex] = 0;
1860			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1861			anyvalid++;
1862			continue;
1863		}
1864
1865		/*
1866		 * Weed out invalid mappings. Note: we assume that the page
1867		 * directory table is always allocated, and in kernel virtual.
1868		 */
1869		if (ptpaddr == 0)
1870			continue;
1871
1872		/*
1873		 * Limit our scan to either the end of the va represented
1874		 * by the current page table page, or to the end of the
1875		 * range being removed.
1876		 */
1877		if (pdnxt > eindex) {
1878			pdnxt = eindex;
1879		}
1880
1881		for ( ;sindex != pdnxt; sindex++) {
1882			vm_offset_t va;
1883			if (ptbase[sindex] == 0) {
1884				continue;
1885			}
1886			va = i386_ptob(sindex);
1887
1888			anyvalid++;
1889			if (pmap_remove_pte(pmap,
1890				ptbase + sindex, va))
1891				break;
1892		}
1893	}
1894
1895	if (anyvalid) {
1896		invltlb();
1897	}
1898}
1899
1900/*
1901 *	Routine:	pmap_remove_all
1902 *	Function:
1903 *		Removes this physical page from
1904 *		all physical maps in which it resides.
1905 *		Reflects back modify bits to the pager.
1906 *
1907 *	Notes:
1908 *		Original versions of this routine were very
1909 *		inefficient because they iteratively called
1910 *		pmap_remove (slow...)
1911 */
1912
1913static void
1914pmap_remove_all(pa)
1915	vm_offset_t pa;
1916{
1917	register pv_entry_t pv;
1918	pv_table_t *ppv;
1919	register unsigned *pte, tpte;
1920	int nmodify;
1921	int update_needed;
1922	int s;
1923
1924	nmodify = 0;
1925	update_needed = 0;
1926#if defined(PMAP_DIAGNOSTIC)
1927	/*
1928	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1929	 * pages!
1930	 */
1931	if (!pmap_is_managed(pa)) {
1932		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1933	}
1934#endif
1935
1936	s = splvm();
1937	ppv = pa_to_pvh(pa);
1938	while ((pv = TAILQ_FIRST(&ppv->pv_list)) != NULL) {
1939		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1940
1941		pv->pv_pmap->pm_stats.resident_count--;
1942
1943		tpte = *pte;
1944		*pte = 0;
1945		if (tpte & PG_W)
1946			pv->pv_pmap->pm_stats.wired_count--;
1947
1948		if (tpte & PG_A)
1949			ppv->pv_vm_page->flags |= PG_REFERENCED;
1950
1951		/*
1952		 * Update the vm_page_t clean and reference bits.
1953		 */
1954		if (tpte & PG_M) {
1955#if defined(PMAP_DIAGNOSTIC)
1956			if (pmap_nw_modified((pt_entry_t) tpte)) {
1957				printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
1958			}
1959#endif
1960			if (pmap_track_modified(pv->pv_va))
1961				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
1962		}
1963		if (!update_needed &&
1964			((!curproc || (&curproc->p_vmspace->vm_pmap == pv->pv_pmap)) ||
1965			(pv->pv_pmap == kernel_pmap))) {
1966			update_needed = 1;
1967		}
1968
1969		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1970		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
1971		ppv->pv_list_count--;
1972		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1973		free_pv_entry(pv);
1974	}
1975
1976	ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
1977
1978	if (update_needed)
1979		invltlb();
1980
1981	splx(s);
1982	return;
1983}
1984
1985/*
1986 *	Set the physical protection on the
1987 *	specified range of this map as requested.
1988 */
1989void
1990pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1991{
1992	register unsigned *ptbase;
1993	vm_offset_t pdnxt, ptpaddr;
1994	vm_pindex_t sindex, eindex;
1995	int anychanged;
1996
1997
1998	if (pmap == NULL)
1999		return;
2000
2001	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2002		pmap_remove(pmap, sva, eva);
2003		return;
2004	}
2005
2006	if (prot & VM_PROT_WRITE)
2007		return;
2008
2009	anychanged = 0;
2010
2011	ptbase = get_ptbase(pmap);
2012
2013	sindex = i386_btop(sva);
2014	eindex = i386_btop(eva);
2015
2016	for (; sindex < eindex; sindex = pdnxt) {
2017
2018		unsigned pdirindex;
2019
2020		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
2021
2022		pdirindex = sindex / NPDEPG;
2023		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
2024			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
2025			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
2026			anychanged++;
2027			continue;
2028		}
2029
2030		/*
2031		 * Weed out invalid mappings. Note: we assume that the page
2032		 * directory table is always allocated, and in kernel virtual.
2033		 */
2034		if (ptpaddr == 0)
2035			continue;
2036
2037		if (pdnxt > eindex) {
2038			pdnxt = eindex;
2039		}
2040
2041		for (; sindex != pdnxt; sindex++) {
2042
2043			unsigned pbits;
2044			pv_table_t *ppv;
2045
2046			pbits = ptbase[sindex];
2047
2048			if (pbits & PG_MANAGED) {
2049				ppv = NULL;
2050				if (pbits & PG_A) {
2051					ppv = pa_to_pvh(pbits);
2052					ppv->pv_vm_page->flags |= PG_REFERENCED;
2053					pbits &= ~PG_A;
2054				}
2055				if (pbits & PG_M) {
2056					if (pmap_track_modified(i386_ptob(sindex))) {
2057						if (ppv == NULL)
2058							ppv = pa_to_pvh(pbits);
2059						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2060						pbits &= ~PG_M;
2061					}
2062				}
2063			}
2064
2065			pbits &= ~PG_RW;
2066
2067			if (pbits != ptbase[sindex]) {
2068				ptbase[sindex] = pbits;
2069				anychanged = 1;
2070			}
2071		}
2072	}
2073	if (anychanged)
2074		invltlb();
2075}
2076
2077/*
2078 *	Insert the given physical page (p) at
2079 *	the specified virtual address (v) in the
2080 *	target physical map with the protection requested.
2081 *
2082 *	If specified, the page will be wired down, meaning
2083 *	that the related pte can not be reclaimed.
2084 *
2085 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2086 *	or lose information.  That is, this routine must actually
2087 *	insert this page into the given map NOW.
2088 */
2089void
2090pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
2091	   boolean_t wired)
2092{
2093	register unsigned *pte;
2094	vm_offset_t opa;
2095	vm_offset_t origpte, newpte;
2096	vm_page_t mpte;
2097
2098	if (pmap == NULL)
2099		return;
2100
2101	va &= PG_FRAME;
2102#ifdef PMAP_DIAGNOSTIC
2103	if (va > VM_MAX_KERNEL_ADDRESS)
2104		panic("pmap_enter: toobig");
2105	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
2106		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
2107#endif
2108
2109	mpte = NULL;
2110	/*
2111	 * In the case that a page table page is not
2112	 * resident, we are creating it here.
2113	 */
2114	if (va < UPT_MIN_ADDRESS) {
2115		mpte = pmap_allocpte(pmap, va);
2116	}
2117#if 0 && defined(PMAP_DIAGNOSTIC)
2118	else {
2119		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
2120		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
2121			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
2122				pmap->pm_pdir[PTDPTDI], origpte, va);
2123		}
2124		if (smp_active) {
2125			pdeaddr = (vm_offset_t *) IdlePTDS[cpuid];
2126			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
2127				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
2128					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
2129				printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
2130				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
2131					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
2132			}
2133		}
2134	}
2135#endif
2136
2137	pte = pmap_pte(pmap, va);
2138
2139#if !defined(MAX_PERF)
2140	/*
2141	 * Page Directory table entry not valid, we need a new PT page
2142	 */
2143	if (pte == NULL) {
2144		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
2145			pmap->pm_pdir[PTDPTDI], va);
2146	}
2147#endif
2148
2149	origpte = *(vm_offset_t *)pte;
2150	pa &= PG_FRAME;
2151	opa = origpte & PG_FRAME;
2152
2153#if !defined(MAX_PERF)
2154	if (origpte & PG_PS)
2155		panic("pmap_enter: attempted pmap_enter on 4MB page");
2156#endif
2157
2158	/*
2159	 * Mapping has not changed, must be protection or wiring change.
2160	 */
2161	if (origpte && (opa == pa)) {
2162		/*
2163		 * Wiring change, just update stats. We don't worry about
2164		 * wiring PT pages as they remain resident as long as there
2165		 * are valid mappings in them. Hence, if a user page is wired,
2166		 * the PT page will be also.
2167		 */
2168		if (wired && ((origpte & PG_W) == 0))
2169			pmap->pm_stats.wired_count++;
2170		else if (!wired && (origpte & PG_W))
2171			pmap->pm_stats.wired_count--;
2172
2173#if defined(PMAP_DIAGNOSTIC)
2174		if (pmap_nw_modified((pt_entry_t) origpte)) {
2175			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
2176		}
2177#endif
2178
2179		/*
2180		 * Remove extra pte reference
2181		 */
2182		if (mpte)
2183			mpte->hold_count--;
2184
2185		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
2186			if ((origpte & PG_RW) == 0) {
2187				*pte |= PG_RW;
2188				invltlb_1pg(va);
2189			}
2190			return;
2191		}
2192
2193		/*
2194		 * We might be turning off write access to the page,
2195		 * so we go ahead and sense modify status.
2196		 */
2197		if (origpte & PG_MANAGED) {
2198			if ((origpte & PG_M) && pmap_track_modified(va)) {
2199				pv_table_t *ppv;
2200				ppv = pa_to_pvh(opa);
2201				ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2202			}
2203			pa |= PG_MANAGED;
2204		}
2205		goto validate;
2206	}
2207	/*
2208	 * Mapping has changed, invalidate old range and fall through to
2209	 * handle validating new mapping.
2210	 */
2211	if (opa) {
2212		int err;
2213		err = pmap_remove_pte(pmap, pte, va);
2214#if !defined(MAX_PERF)
2215		if (err)
2216			panic("pmap_enter: pte vanished, va: 0x%x", va);
2217#endif
2218	}
2219
2220	/*
2221	 * Enter on the PV list if part of our managed memory Note that we
2222	 * raise IPL while manipulating pv_table since pmap_enter can be
2223	 * called at interrupt time.
2224	 */
2225	if (pmap_is_managed(pa)) {
2226		pmap_insert_entry(pmap, va, mpte, pa);
2227		pa |= PG_MANAGED;
2228	}
2229
2230	/*
2231	 * Increment counters
2232	 */
2233	pmap->pm_stats.resident_count++;
2234	if (wired)
2235		pmap->pm_stats.wired_count++;
2236
2237validate:
2238	/*
2239	 * Now validate mapping with desired protection/wiring.
2240	 */
2241	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2242
2243	if (wired)
2244		newpte |= PG_W;
2245	if (va < UPT_MIN_ADDRESS)
2246		newpte |= PG_U;
2247	if (pmap == kernel_pmap)
2248		newpte |= pgeflag;
2249
2250	/*
2251	 * if the mapping or permission bits are different, we need
2252	 * to update the pte.
2253	 */
2254	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2255		*pte = newpte | PG_A;
2256		if (origpte)
2257			invltlb_1pg(va);
2258	}
2259}
2260
2261/*
2262 * this code makes some *MAJOR* assumptions:
2263 * 1. Current pmap & pmap exists.
2264 * 2. Not wired.
2265 * 3. Read access.
2266 * 4. No page table pages.
2267 * 5. Tlbflush is deferred to calling procedure.
2268 * 6. Page IS managed.
2269 * but is *MUCH* faster than pmap_enter...
2270 */
2271
2272static vm_page_t
2273pmap_enter_quick(pmap, va, pa, mpte)
2274	register pmap_t pmap;
2275	vm_offset_t va;
2276	register vm_offset_t pa;
2277	vm_page_t mpte;
2278{
2279	register unsigned *pte;
2280
2281	/*
2282	 * In the case that a page table page is not
2283	 * resident, we are creating it here.
2284	 */
2285	if (va < UPT_MIN_ADDRESS) {
2286		unsigned ptepindex;
2287		vm_offset_t ptepa;
2288
2289		/*
2290		 * Calculate pagetable page index
2291		 */
2292		ptepindex = va >> PDRSHIFT;
2293		if (mpte && (mpte->pindex == ptepindex)) {
2294			mpte->hold_count++;
2295		} else {
2296retry:
2297			/*
2298			 * Get the page directory entry
2299			 */
2300			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2301
2302			/*
2303			 * If the page table page is mapped, we just increment
2304			 * the hold count, and activate it.
2305			 */
2306			if (ptepa) {
2307#if !defined(MAX_PERF)
2308				if (ptepa & PG_PS)
2309					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2310#endif
2311				if (pmap->pm_ptphint &&
2312					(pmap->pm_ptphint->pindex == ptepindex)) {
2313					mpte = pmap->pm_ptphint;
2314				} else {
2315					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2316					pmap->pm_ptphint = mpte;
2317				}
2318				if (mpte == NULL)
2319					goto retry;
2320				mpte->hold_count++;
2321			} else {
2322				mpte = _pmap_allocpte(pmap, ptepindex);
2323			}
2324		}
2325	} else {
2326		mpte = NULL;
2327	}
2328
2329	/*
2330	 * This call to vtopte makes the assumption that we are
2331	 * entering the page into the current pmap.  In order to support
2332	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2333	 * But that isn't as quick as vtopte.
2334	 */
2335	pte = (unsigned *)vtopte(va);
2336	if (*pte) {
2337		if (mpte)
2338			pmap_unwire_pte_hold(pmap, mpte);
2339		return 0;
2340	}
2341
2342	/*
2343	 * Enter on the PV list if part of our managed memory Note that we
2344	 * raise IPL while manipulating pv_table since pmap_enter can be
2345	 * called at interrupt time.
2346	 */
2347	pmap_insert_entry(pmap, va, mpte, pa);
2348
2349	/*
2350	 * Increment counters
2351	 */
2352	pmap->pm_stats.resident_count++;
2353
2354	/*
2355	 * Now validate mapping with RO protection
2356	 */
2357	*pte = pa | PG_V | PG_U | PG_MANAGED;
2358
2359	return mpte;
2360}
2361
2362#define MAX_INIT_PT (96)
2363/*
2364 * pmap_object_init_pt preloads the ptes for a given object
2365 * into the specified pmap.  This eliminates the blast of soft
2366 * faults on process startup and immediately after an mmap.
2367 */
2368void
2369pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2370	pmap_t pmap;
2371	vm_offset_t addr;
2372	vm_object_t object;
2373	vm_pindex_t pindex;
2374	vm_size_t size;
2375	int limit;
2376{
2377	vm_offset_t tmpidx;
2378	int psize;
2379	vm_page_t p, mpte;
2380	int objpgs;
2381
2382	if (!pmap)
2383		return;
2384
2385	/*
2386	 * This code maps large physical mmap regions into the
2387	 * processor address space.  Note that some shortcuts
2388	 * are taken, but the code works.
2389	 */
2390	if (pseflag &&
2391		(object->type == OBJT_DEVICE) &&
2392		((addr & (NBPDR - 1)) == 0) &&
2393		((size & (NBPDR - 1)) == 0) ) {
2394		int i;
2395		int s;
2396		vm_page_t m[1];
2397		unsigned int ptepindex;
2398		int npdes;
2399		vm_offset_t ptepa;
2400
2401		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2402			return;
2403
2404retry:
2405		p = vm_page_lookup(object, pindex);
2406		if (p && vm_page_sleep(p, "init4p", NULL))
2407			goto retry;
2408
2409		if (p == NULL) {
2410			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2411			if (p == NULL)
2412				return;
2413			m[0] = p;
2414
2415			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2416				vm_page_free(p);
2417				return;
2418			}
2419
2420			p = vm_page_lookup(object, pindex);
2421			PAGE_WAKEUP(p);
2422		}
2423
2424		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2425		if (ptepa & (NBPDR - 1)) {
2426			return;
2427		}
2428
2429		p->valid = VM_PAGE_BITS_ALL;
2430
2431		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2432		npdes = size >> PDRSHIFT;
2433		for(i=0;i<npdes;i++) {
2434			pmap->pm_pdir[ptepindex] =
2435				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2436			ptepa += NBPDR;
2437			ptepindex += 1;
2438		}
2439		p->flags |= PG_MAPPED;
2440		invltlb();
2441		return;
2442	}
2443
2444	psize = i386_btop(size);
2445
2446	if ((object->type != OBJT_VNODE) ||
2447		(limit && (psize > MAX_INIT_PT) &&
2448			(object->resident_page_count > MAX_INIT_PT))) {
2449		return;
2450	}
2451
2452	if (psize + pindex > object->size)
2453		psize = object->size - pindex;
2454
2455	mpte = NULL;
2456	/*
2457	 * if we are processing a major portion of the object, then scan the
2458	 * entire thing.
2459	 */
2460	if (psize > (object->size >> 2)) {
2461		objpgs = psize;
2462
2463		for (p = TAILQ_FIRST(&object->memq);
2464		    ((objpgs > 0) && (p != NULL));
2465		    p = TAILQ_NEXT(p, listq)) {
2466
2467			tmpidx = p->pindex;
2468			if (tmpidx < pindex) {
2469				continue;
2470			}
2471			tmpidx -= pindex;
2472			if (tmpidx >= psize) {
2473				continue;
2474			}
2475			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2476			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2477				if ((p->queue - p->pc) == PQ_CACHE)
2478					vm_page_deactivate(p);
2479				p->flags |= PG_BUSY;
2480				mpte = pmap_enter_quick(pmap,
2481					addr + i386_ptob(tmpidx),
2482					VM_PAGE_TO_PHYS(p), mpte);
2483				p->flags |= PG_MAPPED;
2484				PAGE_WAKEUP(p);
2485			}
2486			objpgs -= 1;
2487		}
2488	} else {
2489		/*
2490		 * else lookup the pages one-by-one.
2491		 */
2492		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2493			p = vm_page_lookup(object, tmpidx + pindex);
2494			if (p &&
2495			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2496			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2497				if ((p->queue - p->pc) == PQ_CACHE)
2498					vm_page_deactivate(p);
2499				p->flags |= PG_BUSY;
2500				mpte = pmap_enter_quick(pmap,
2501					addr + i386_ptob(tmpidx),
2502					VM_PAGE_TO_PHYS(p), mpte);
2503				p->flags |= PG_MAPPED;
2504				PAGE_WAKEUP(p);
2505			}
2506		}
2507	}
2508	return;
2509}
2510
2511/*
2512 * pmap_prefault provides a quick way of clustering
2513 * pagefaults into a processes address space.  It is a "cousin"
2514 * of pmap_object_init_pt, except it runs at page fault time instead
2515 * of mmap time.
2516 */
2517#define PFBAK 4
2518#define PFFOR 4
2519#define PAGEORDER_SIZE (PFBAK+PFFOR)
2520
2521static int pmap_prefault_pageorder[] = {
2522	-PAGE_SIZE, PAGE_SIZE,
2523	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
2524	-3 * PAGE_SIZE, 3 * PAGE_SIZE
2525	-4 * PAGE_SIZE, 4 * PAGE_SIZE
2526};
2527
2528void
2529pmap_prefault(pmap, addra, entry)
2530	pmap_t pmap;
2531	vm_offset_t addra;
2532	vm_map_entry_t entry;
2533{
2534	int i;
2535	vm_offset_t starta;
2536	vm_offset_t addr;
2537	vm_pindex_t pindex;
2538	vm_page_t m, mpte;
2539	vm_object_t object;
2540
2541	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
2542		return;
2543
2544	object = entry->object.vm_object;
2545
2546	starta = addra - PFBAK * PAGE_SIZE;
2547	if (starta < entry->start) {
2548		starta = entry->start;
2549	} else if (starta > addra) {
2550		starta = 0;
2551	}
2552
2553	mpte = NULL;
2554	for (i = 0; i < PAGEORDER_SIZE; i++) {
2555		vm_object_t lobject;
2556		unsigned *pte;
2557
2558		addr = addra + pmap_prefault_pageorder[i];
2559		if (addr > addra + (PFFOR * PAGE_SIZE))
2560			addr = 0;
2561
2562		if (addr < starta || addr >= entry->end)
2563			continue;
2564
2565		if ((*pmap_pde(pmap, addr)) == NULL)
2566			continue;
2567
2568		pte = (unsigned *) vtopte(addr);
2569		if (*pte)
2570			continue;
2571
2572		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2573		lobject = object;
2574		for (m = vm_page_lookup(lobject, pindex);
2575		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2576		    lobject = lobject->backing_object) {
2577			if (lobject->backing_object_offset & PAGE_MASK)
2578				break;
2579			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2580			m = vm_page_lookup(lobject->backing_object, pindex);
2581		}
2582
2583		/*
2584		 * give-up when a page is not in memory
2585		 */
2586		if (m == NULL)
2587			break;
2588
2589		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2590		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2591
2592			if ((m->queue - m->pc) == PQ_CACHE) {
2593				vm_page_deactivate(m);
2594			}
2595			m->flags |= PG_BUSY;
2596			mpte = pmap_enter_quick(pmap, addr,
2597				VM_PAGE_TO_PHYS(m), mpte);
2598			m->flags |= PG_MAPPED;
2599			PAGE_WAKEUP(m);
2600		}
2601	}
2602}
2603
2604/*
2605 *	Routine:	pmap_change_wiring
2606 *	Function:	Change the wiring attribute for a map/virtual-address
2607 *			pair.
2608 *	In/out conditions:
2609 *			The mapping must already exist in the pmap.
2610 */
2611void
2612pmap_change_wiring(pmap, va, wired)
2613	register pmap_t pmap;
2614	vm_offset_t va;
2615	boolean_t wired;
2616{
2617	register unsigned *pte;
2618
2619	if (pmap == NULL)
2620		return;
2621
2622	pte = pmap_pte(pmap, va);
2623
2624	if (wired && !pmap_pte_w(pte))
2625		pmap->pm_stats.wired_count++;
2626	else if (!wired && pmap_pte_w(pte))
2627		pmap->pm_stats.wired_count--;
2628
2629	/*
2630	 * Wiring is not a hardware characteristic so there is no need to
2631	 * invalidate TLB.
2632	 */
2633	pmap_pte_set_w(pte, wired);
2634}
2635
2636
2637
2638/*
2639 *	Copy the range specified by src_addr/len
2640 *	from the source map to the range dst_addr/len
2641 *	in the destination map.
2642 *
2643 *	This routine is only advisory and need not do anything.
2644 */
2645
2646void
2647pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2648	pmap_t dst_pmap, src_pmap;
2649	vm_offset_t dst_addr;
2650	vm_size_t len;
2651	vm_offset_t src_addr;
2652{
2653	vm_offset_t addr;
2654	vm_offset_t end_addr = src_addr + len;
2655	vm_offset_t pdnxt;
2656	unsigned src_frame, dst_frame;
2657
2658	if (dst_addr != src_addr)
2659		return;
2660
2661	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2662	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2663		return;
2664	}
2665
2666	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2667	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2668		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2669		invltlb();
2670	}
2671
2672	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2673		unsigned *src_pte, *dst_pte;
2674		vm_page_t dstmpte, srcmpte;
2675		vm_offset_t srcptepaddr;
2676		unsigned ptepindex;
2677
2678#if !defined(MAX_PERF)
2679		if (addr >= UPT_MIN_ADDRESS)
2680			panic("pmap_copy: invalid to pmap_copy page tables\n");
2681#endif
2682
2683		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2684		ptepindex = addr >> PDRSHIFT;
2685
2686		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2687		if (srcptepaddr == 0)
2688			continue;
2689
2690		if (srcptepaddr & PG_PS) {
2691			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2692				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2693				dst_pmap->pm_stats.resident_count += NBPDR;
2694			}
2695			continue;
2696		}
2697
2698		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2699		if ((srcmpte == NULL) ||
2700			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2701			continue;
2702
2703		if (pdnxt > end_addr)
2704			pdnxt = end_addr;
2705
2706		src_pte = (unsigned *) vtopte(addr);
2707		dst_pte = (unsigned *) avtopte(addr);
2708		while (addr < pdnxt) {
2709			unsigned ptetemp;
2710			ptetemp = *src_pte;
2711			/*
2712			 * we only virtual copy managed pages
2713			 */
2714			if ((ptetemp & PG_MANAGED) != 0) {
2715				/*
2716				 * We have to check after allocpte for the
2717				 * pte still being around...  allocpte can
2718				 * block.
2719				 */
2720				dstmpte = pmap_allocpte(dst_pmap, addr);
2721				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2722					/*
2723					 * Clear the modified and
2724					 * accessed (referenced) bits
2725					 * during the copy.
2726					 */
2727					*dst_pte = ptetemp & ~(PG_M | PG_A);
2728					dst_pmap->pm_stats.resident_count++;
2729					pmap_insert_entry(dst_pmap, addr,
2730						dstmpte,
2731						(ptetemp & PG_FRAME));
2732	 			} else {
2733					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2734				}
2735				if (dstmpte->hold_count >= srcmpte->hold_count)
2736					break;
2737			}
2738			addr += PAGE_SIZE;
2739			src_pte++;
2740			dst_pte++;
2741		}
2742	}
2743}
2744
2745/*
2746 *	Routine:	pmap_kernel
2747 *	Function:
2748 *		Returns the physical map handle for the kernel.
2749 */
2750pmap_t
2751pmap_kernel()
2752{
2753	return (kernel_pmap);
2754}
2755
2756/*
2757 *	pmap_zero_page zeros the specified (machine independent)
2758 *	page by mapping the page into virtual memory and using
2759 *	bzero to clear its contents, one machine dependent page
2760 *	at a time.
2761 */
2762void
2763pmap_zero_page(phys)
2764	vm_offset_t phys;
2765{
2766#ifdef SMP
2767#if !defined(MAX_PERF)
2768	if (*(int *) prv_CMAP3)
2769		panic("pmap_zero_page: prv_CMAP3 busy");
2770#endif
2771
2772	*(int *) prv_CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2773	cpu_invlpg(&prv_CPAGE3);
2774
2775#if defined(I686_CPU)
2776	if (cpu_class == CPUCLASS_686)
2777		i686_pagezero(&prv_CPAGE3);
2778	else
2779#endif
2780		bzero(&prv_CPAGE3, PAGE_SIZE);
2781
2782	*(int *) prv_CMAP3 = 0;
2783#else
2784#if !defined(MAX_PERF)
2785	if (*(int *) CMAP2)
2786		panic("pmap_zero_page: CMAP busy");
2787#endif
2788
2789	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2790	invltlb_1pg(CADDR2);
2791
2792#if defined(I686_CPU)
2793	if (cpu_class == CPUCLASS_686)
2794		i686_pagezero(CADDR2);
2795	else
2796#endif
2797		bzero(CADDR2, PAGE_SIZE);
2798	*(int *) CMAP2 = 0;
2799#endif
2800}
2801
2802/*
2803 *	pmap_copy_page copies the specified (machine independent)
2804 *	page by mapping the page into virtual memory and using
2805 *	bcopy to copy the page, one machine dependent page at a
2806 *	time.
2807 */
2808void
2809pmap_copy_page(src, dst)
2810	vm_offset_t src;
2811	vm_offset_t dst;
2812{
2813#ifdef SMP
2814#if !defined(MAX_PERF)
2815	if (*(int *) prv_CMAP1)
2816		panic("pmap_copy_page: prv_CMAP1 busy");
2817	if (*(int *) prv_CMAP2)
2818		panic("pmap_copy_page: prv_CMAP2 busy");
2819#endif
2820
2821	*(int *) prv_CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2822	*(int *) prv_CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2823
2824	cpu_invlpg(&prv_CPAGE1);
2825	cpu_invlpg(&prv_CPAGE2);
2826
2827	bcopy(&prv_CPAGE1, &prv_CPAGE2, PAGE_SIZE);
2828
2829	*(int *) prv_CMAP1 = 0;
2830	*(int *) prv_CMAP2 = 0;
2831#else
2832#if !defined(MAX_PERF)
2833	if (*(int *) CMAP1 || *(int *) CMAP2)
2834		panic("pmap_copy_page: CMAP busy");
2835#endif
2836
2837	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2838	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2839
2840	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2841
2842	*(int *) CMAP1 = 0;
2843	*(int *) CMAP2 = 0;
2844#endif
2845}
2846
2847
2848/*
2849 *	Routine:	pmap_pageable
2850 *	Function:
2851 *		Make the specified pages (by pmap, offset)
2852 *		pageable (or not) as requested.
2853 *
2854 *		A page which is not pageable may not take
2855 *		a fault; therefore, its page table entry
2856 *		must remain valid for the duration.
2857 *
2858 *		This routine is merely advisory; pmap_enter
2859 *		will specify that these pages are to be wired
2860 *		down (or not) as appropriate.
2861 */
2862void
2863pmap_pageable(pmap, sva, eva, pageable)
2864	pmap_t pmap;
2865	vm_offset_t sva, eva;
2866	boolean_t pageable;
2867{
2868}
2869
2870/*
2871 * this routine returns true if a physical page resides
2872 * in the given pmap.
2873 */
2874boolean_t
2875pmap_page_exists(pmap, pa)
2876	pmap_t pmap;
2877	vm_offset_t pa;
2878{
2879	register pv_entry_t pv;
2880	pv_table_t *ppv;
2881	int s;
2882
2883	if (!pmap_is_managed(pa))
2884		return FALSE;
2885
2886	s = splvm();
2887
2888	ppv = pa_to_pvh(pa);
2889	/*
2890	 * Not found, check current mappings returning immediately if found.
2891	 */
2892	for (pv = TAILQ_FIRST(&ppv->pv_list);
2893		pv;
2894		pv = TAILQ_NEXT(pv, pv_list)) {
2895		if (pv->pv_pmap == pmap) {
2896			splx(s);
2897			return TRUE;
2898		}
2899	}
2900	splx(s);
2901	return (FALSE);
2902}
2903
2904#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2905/*
2906 * Remove all pages from specified address space
2907 * this aids process exit speeds.  Also, this code
2908 * is special cased for current process only, but
2909 * can have the more generic (and slightly slower)
2910 * mode enabled.  This is much faster than pmap_remove
2911 * in the case of running down an entire address space.
2912 */
2913void
2914pmap_remove_pages(pmap, sva, eva)
2915	pmap_t pmap;
2916	vm_offset_t sva, eva;
2917{
2918	unsigned *pte, tpte;
2919	pv_table_t *ppv;
2920	pv_entry_t pv, npv;
2921	int s;
2922
2923#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2924	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
2925		printf("warning: pmap_remove_pages called with non-current pmap\n");
2926		return;
2927	}
2928#endif
2929
2930	s = splvm();
2931	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2932		pv;
2933		pv = npv) {
2934
2935		if (pv->pv_va >= eva || pv->pv_va < sva) {
2936			npv = TAILQ_NEXT(pv, pv_plist);
2937			continue;
2938		}
2939
2940#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2941		pte = (unsigned *)vtopte(pv->pv_va);
2942#else
2943		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2944#endif
2945		tpte = *pte;
2946
2947/*
2948 * We cannot remove wired pages from a process' mapping at this time
2949 */
2950		if (tpte & PG_W) {
2951			npv = TAILQ_NEXT(pv, pv_plist);
2952			continue;
2953		}
2954		*pte = 0;
2955
2956		ppv = pa_to_pvh(tpte);
2957
2958		pv->pv_pmap->pm_stats.resident_count--;
2959
2960		/*
2961		 * Update the vm_page_t clean and reference bits.
2962		 */
2963		if (tpte & PG_M) {
2964			ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
2965		}
2966
2967
2968		npv = TAILQ_NEXT(pv, pv_plist);
2969		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2970
2971		ppv->pv_list_count--;
2972		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
2973		if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
2974			ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
2975		}
2976
2977		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2978		free_pv_entry(pv);
2979	}
2980	splx(s);
2981	invltlb();
2982}
2983
2984/*
2985 * pmap_testbit tests bits in pte's
2986 * note that the testbit/changebit routines are inline,
2987 * and a lot of things compile-time evaluate.
2988 */
2989static boolean_t
2990pmap_testbit(pa, bit)
2991	register vm_offset_t pa;
2992	int bit;
2993{
2994	register pv_entry_t pv;
2995	pv_table_t *ppv;
2996	unsigned *pte;
2997	int s;
2998
2999	if (!pmap_is_managed(pa))
3000		return FALSE;
3001
3002	ppv = pa_to_pvh(pa);
3003	if (TAILQ_FIRST(&ppv->pv_list) == NULL)
3004		return FALSE;
3005
3006	s = splvm();
3007
3008	for (pv = TAILQ_FIRST(&ppv->pv_list);
3009		pv;
3010		pv = TAILQ_NEXT(pv, pv_list)) {
3011
3012		/*
3013		 * if the bit being tested is the modified bit, then
3014		 * mark clean_map and ptes as never
3015		 * modified.
3016		 */
3017		if (bit & (PG_A|PG_M)) {
3018			if (!pmap_track_modified(pv->pv_va))
3019				continue;
3020		}
3021
3022#if defined(PMAP_DIAGNOSTIC)
3023		if (!pv->pv_pmap) {
3024			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
3025			continue;
3026		}
3027#endif
3028		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3029		if (*pte & bit) {
3030			splx(s);
3031			return TRUE;
3032		}
3033	}
3034	splx(s);
3035	return (FALSE);
3036}
3037
3038/*
3039 * this routine is used to modify bits in ptes
3040 */
3041static void
3042pmap_changebit(pa, bit, setem)
3043	vm_offset_t pa;
3044	int bit;
3045	boolean_t setem;
3046{
3047	register pv_entry_t pv;
3048	pv_table_t *ppv;
3049	register unsigned *pte;
3050	int changed;
3051	int s;
3052
3053	if (!pmap_is_managed(pa))
3054		return;
3055
3056	s = splvm();
3057	changed = 0;
3058	ppv = pa_to_pvh(pa);
3059
3060	/*
3061	 * Loop over all current mappings setting/clearing as appropos If
3062	 * setting RO do we need to clear the VAC?
3063	 */
3064	for (pv = TAILQ_FIRST(&ppv->pv_list);
3065		pv;
3066		pv = TAILQ_NEXT(pv, pv_list)) {
3067
3068		/*
3069		 * don't write protect pager mappings
3070		 */
3071		if (!setem && (bit == PG_RW)) {
3072			if (!pmap_track_modified(pv->pv_va))
3073				continue;
3074		}
3075
3076#if defined(PMAP_DIAGNOSTIC)
3077		if (!pv->pv_pmap) {
3078			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
3079			continue;
3080		}
3081#endif
3082
3083		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3084
3085		if (setem) {
3086			*(int *)pte |= bit;
3087			changed = 1;
3088		} else {
3089			vm_offset_t pbits = *(vm_offset_t *)pte;
3090			if (pbits & bit) {
3091				changed = 1;
3092				if (bit == PG_RW) {
3093					if (pbits & PG_M) {
3094						ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
3095					}
3096					*(int *)pte = pbits & ~(PG_M|PG_RW);
3097				} else {
3098					*(int *)pte = pbits & ~bit;
3099				}
3100			}
3101		}
3102	}
3103	splx(s);
3104	if (changed)
3105		invltlb();
3106}
3107
3108/*
3109 *      pmap_page_protect:
3110 *
3111 *      Lower the permission for all mappings to a given page.
3112 */
3113void
3114pmap_page_protect(vm_offset_t phys, vm_prot_t prot)
3115{
3116	if ((prot & VM_PROT_WRITE) == 0) {
3117		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
3118			pmap_changebit(phys, PG_RW, FALSE);
3119		} else {
3120			pmap_remove_all(phys);
3121		}
3122	}
3123}
3124
3125vm_offset_t
3126pmap_phys_address(ppn)
3127	int ppn;
3128{
3129	return (i386_ptob(ppn));
3130}
3131
3132/*
3133 *	pmap_ts_referenced:
3134 *
3135 *	Return the count of reference bits for a page, clearing all of them.
3136 *
3137 */
3138int
3139pmap_ts_referenced(vm_offset_t pa)
3140{
3141	register pv_entry_t pv;
3142	pv_table_t *ppv;
3143	unsigned *pte;
3144	int s;
3145	int rtval = 0;
3146
3147	if (!pmap_is_managed(pa))
3148		return FALSE;
3149
3150	s = splvm();
3151
3152	ppv = pa_to_pvh(pa);
3153
3154	if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
3155		splx(s);
3156		return 0;
3157	}
3158
3159	/*
3160	 * Not found, check current mappings returning immediately if found.
3161	 */
3162	for (pv = TAILQ_FIRST(&ppv->pv_list);
3163		pv;
3164		pv = TAILQ_NEXT(pv, pv_list)) {
3165
3166		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
3167		/*
3168		 * if the bit being tested is the modified bit, then
3169		 * mark clean_map and ptes as never
3170		 * modified.
3171		 */
3172		if (!pmap_track_modified(pv->pv_va)) {
3173			TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3174			continue;
3175		}
3176
3177		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3178		if (pte == NULL) {
3179			TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3180			continue;
3181		}
3182
3183		if (*pte & PG_A) {
3184			rtval++;
3185			*pte &= ~PG_A;
3186			if (rtval > 4) {
3187				TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3188				break;
3189			}
3190		}
3191		TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
3192	}
3193
3194	splx(s);
3195	if (rtval) {
3196		invltlb();
3197	}
3198	return (rtval);
3199}
3200
3201/*
3202 *	pmap_is_modified:
3203 *
3204 *	Return whether or not the specified physical page was modified
3205 *	in any physical maps.
3206 */
3207boolean_t
3208pmap_is_modified(vm_offset_t pa)
3209{
3210	return pmap_testbit((pa), PG_M);
3211}
3212
3213/*
3214 *	Clear the modify bits on the specified physical page.
3215 */
3216void
3217pmap_clear_modify(vm_offset_t pa)
3218{
3219	pmap_changebit((pa), PG_M, FALSE);
3220}
3221
3222/*
3223 *	pmap_clear_reference:
3224 *
3225 *	Clear the reference bit on the specified physical page.
3226 */
3227void
3228pmap_clear_reference(vm_offset_t pa)
3229{
3230	pmap_changebit((pa), PG_A, FALSE);
3231}
3232
3233/*
3234 * Miscellaneous support routines follow
3235 */
3236
3237static void
3238i386_protection_init()
3239{
3240	register int *kp, prot;
3241
3242	kp = protection_codes;
3243	for (prot = 0; prot < 8; prot++) {
3244		switch (prot) {
3245		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3246			/*
3247			 * Read access is also 0. There isn't any execute bit,
3248			 * so just make it readable.
3249			 */
3250		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3251		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3252		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3253			*kp++ = 0;
3254			break;
3255		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3256		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3257		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3258		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3259			*kp++ = PG_RW;
3260			break;
3261		}
3262	}
3263}
3264
3265/*
3266 * Map a set of physical memory pages into the kernel virtual
3267 * address space. Return a pointer to where it is mapped. This
3268 * routine is intended to be used for mapping device memory,
3269 * NOT real memory.
3270 */
3271void *
3272pmap_mapdev(pa, size)
3273	vm_offset_t pa;
3274	vm_size_t size;
3275{
3276	vm_offset_t va, tmpva;
3277	unsigned *pte;
3278
3279	size = roundup(size, PAGE_SIZE);
3280
3281	va = kmem_alloc_pageable(kernel_map, size);
3282#if !defined(MAX_PERF)
3283	if (!va)
3284		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3285#endif
3286
3287	pa = pa & PG_FRAME;
3288	for (tmpva = va; size > 0;) {
3289		pte = (unsigned *)vtopte(tmpva);
3290		*pte = pa | PG_RW | PG_V | pgeflag;
3291		size -= PAGE_SIZE;
3292		tmpva += PAGE_SIZE;
3293		pa += PAGE_SIZE;
3294	}
3295	invltlb();
3296
3297	return ((void *) va);
3298}
3299
3300/*
3301 * perform the pmap work for mincore
3302 */
3303int
3304pmap_mincore(pmap, addr)
3305	pmap_t pmap;
3306	vm_offset_t addr;
3307{
3308
3309	unsigned *ptep, pte;
3310	vm_page_t m;
3311	int val = 0;
3312
3313	ptep = pmap_pte(pmap, addr);
3314	if (ptep == 0) {
3315		return 0;
3316	}
3317
3318	if (pte = *ptep) {
3319		pv_table_t *ppv;
3320		vm_offset_t pa;
3321
3322		val = MINCORE_INCORE;
3323		if ((pte & PG_MANAGED) == 0)
3324			return val;
3325
3326		pa = pte & PG_FRAME;
3327
3328		ppv = pa_to_pvh((pa & PG_FRAME));
3329		m = ppv->pv_vm_page;
3330
3331		/*
3332		 * Modified by us
3333		 */
3334		if (pte & PG_M)
3335			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3336		/*
3337		 * Modified by someone
3338		 */
3339		else if (m->dirty || pmap_is_modified(pa))
3340			val |= MINCORE_MODIFIED_OTHER;
3341		/*
3342		 * Referenced by us
3343		 */
3344		if (pte & PG_A)
3345			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3346
3347		/*
3348		 * Referenced by someone
3349		 */
3350		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) {
3351			val |= MINCORE_REFERENCED_OTHER;
3352			m->flags |= PG_REFERENCED;
3353		}
3354	}
3355	return val;
3356}
3357
3358void
3359pmap_activate(struct proc *p)
3360{
3361#if defined(SWTCH_OPTIM_STATS)
3362	tlb_flush_count++;
3363#endif
3364	load_cr3(p->p_addr->u_pcb.pcb_cr3 =
3365		vtophys(p->p_vmspace->vm_pmap.pm_pdir));
3366}
3367
3368vm_offset_t
3369pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) {
3370
3371	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3372		return addr;
3373	}
3374
3375	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3376	return addr;
3377}
3378
3379
3380#if defined(PMAP_DEBUG)
3381pmap_pid_dump(int pid) {
3382	pmap_t pmap;
3383	struct proc *p;
3384	int npte = 0;
3385	int index;
3386	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
3387		if (p->p_pid != pid)
3388			continue;
3389
3390		if (p->p_vmspace) {
3391			int i,j;
3392			index = 0;
3393			pmap = &p->p_vmspace->vm_pmap;
3394			for(i=0;i<1024;i++) {
3395				pd_entry_t *pde;
3396				unsigned *pte;
3397				unsigned base = i << PDRSHIFT;
3398
3399				pde = &pmap->pm_pdir[i];
3400				if (pde && pmap_pde_v(pde)) {
3401					for(j=0;j<1024;j++) {
3402						unsigned va = base + (j << PAGE_SHIFT);
3403						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3404							if (index) {
3405								index = 0;
3406								printf("\n");
3407							}
3408							return npte;
3409						}
3410						pte = pmap_pte_quick( pmap, va);
3411						if (pte && pmap_pte_v(pte)) {
3412							vm_offset_t pa;
3413							vm_page_t m;
3414							pa = *(int *)pte;
3415							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
3416							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3417								va, pa, m->hold_count, m->wire_count, m->flags);
3418							npte++;
3419							index++;
3420							if (index >= 2) {
3421								index = 0;
3422								printf("\n");
3423							} else {
3424								printf(" ");
3425							}
3426						}
3427					}
3428				}
3429			}
3430		}
3431	}
3432	return npte;
3433}
3434#endif
3435
3436#if defined(DEBUG)
3437
3438static void	pads __P((pmap_t pm));
3439static void	pmap_pvdump __P((vm_offset_t pa));
3440
3441/* print address space of pmap*/
3442static void
3443pads(pm)
3444	pmap_t pm;
3445{
3446	unsigned va, i, j;
3447	unsigned *ptep;
3448
3449	if (pm == kernel_pmap)
3450		return;
3451	for (i = 0; i < 1024; i++)
3452		if (pm->pm_pdir[i])
3453			for (j = 0; j < 1024; j++) {
3454				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3455				if (pm == kernel_pmap && va < KERNBASE)
3456					continue;
3457				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3458					continue;
3459				ptep = pmap_pte_quick(pm, va);
3460				if (pmap_pte_v(ptep))
3461					printf("%x:%x ", va, *(int *) ptep);
3462			};
3463
3464}
3465
3466static void
3467pmap_pvdump(pa)
3468	vm_offset_t pa;
3469{
3470	pv_table_t *ppv;
3471	register pv_entry_t pv;
3472
3473	printf("pa %x", pa);
3474	ppv = pa_to_pvh(pa);
3475	for (pv = TAILQ_FIRST(&ppv->pv_list);
3476		pv;
3477		pv = TAILQ_NEXT(pv, pv_list)) {
3478#ifdef used_to_be
3479		printf(" -> pmap %x, va %x, flags %x",
3480		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
3481#endif
3482		printf(" -> pmap %x, va %x",
3483		    pv->pv_pmap, pv->pv_va);
3484		pads(pv->pv_pmap);
3485	}
3486	printf(" ");
3487}
3488#endif
3489