pmap.c revision 73862
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 * $FreeBSD: head/sys/i386/i386/pmap.c 73862 2001-03-06 06:06:42Z jhb $
43 */
44
45/*
46 *	Manages physical address maps.
47 *
48 *	In addition to hardware address maps, this
49 *	module is called upon to provide software-use-only
50 *	maps which may or may not be stored in the same
51 *	form as hardware maps.  These pseudo-maps are
52 *	used to store intermediate results from copy
53 *	operations to and from address spaces.
54 *
55 *	Since the information managed by this module is
56 *	also stored by the logical address mapping module,
57 *	this module may throw away valid virtual-to-physical
58 *	mappings at almost any time.  However, invalidations
59 *	of virtual-to-physical mappings must be done as
60 *	requested.
61 *
62 *	In order to cope with hardware architectures which
63 *	make virtual-to-physical map invalidates expensive,
64 *	this module may delay invalidate or reduced protection
65 *	operations until such time as they are actually
66 *	necessary.  This module is given full information as
67 *	to which processors are currently using which maps,
68 *	and to when physical maps must be made correct.
69 */
70
71#include "opt_disable_pse.h"
72#include "opt_pmap.h"
73#include "opt_msgbuf.h"
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/proc.h>
78#include <sys/msgbuf.h>
79#include <sys/vmmeter.h>
80#include <sys/mman.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <sys/lock.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_extern.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/vm_zone.h>
93
94#include <sys/user.h>
95
96#include <machine/cputypes.h>
97#include <machine/md_var.h>
98#include <machine/specialreg.h>
99#if defined(SMP) || defined(APIC_IO)
100#include <machine/smp.h>
101#include <machine/apic.h>
102#include <machine/segments.h>
103#include <machine/tss.h>
104#include <machine/globaldata.h>
105#endif /* SMP || APIC_IO */
106
107#define PMAP_KEEP_PDIRS
108#ifndef PMAP_SHPGPERPROC
109#define PMAP_SHPGPERPROC 200
110#endif
111
112#if defined(DIAGNOSTIC)
113#define PMAP_DIAGNOSTIC
114#endif
115
116#define MINPV 2048
117
118#if !defined(PMAP_DIAGNOSTIC)
119#define PMAP_INLINE __inline
120#else
121#define PMAP_INLINE
122#endif
123
124/*
125 * Get PDEs and PTEs for user/kernel address space
126 */
127#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
128#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
129
130#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
131#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
132#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
133#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
134#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
135
136#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
137#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
138
139/*
140 * Given a map and a machine independent protection code,
141 * convert to a vax protection code.
142 */
143#define pte_prot(m, p)	(protection_codes[p])
144static int protection_codes[8];
145
146static struct pmap kernel_pmap_store;
147pmap_t kernel_pmap;
148LIST_HEAD(pmaplist, pmap);
149struct pmaplist allpmaps;
150
151vm_offset_t avail_start;	/* PA of first available physical page */
152vm_offset_t avail_end;		/* PA of last available physical page */
153vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
154vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
155static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
156static int pgeflag;		/* PG_G or-in */
157static int pseflag;		/* PG_PS or-in */
158
159static vm_object_t kptobj;
160
161static int nkpt;
162vm_offset_t kernel_vm_end;
163
164/*
165 * Data for the pv entry allocation mechanism
166 */
167static vm_zone_t pvzone;
168static struct vm_zone pvzone_store;
169static struct vm_object pvzone_obj;
170static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
171static int pmap_pagedaemon_waken = 0;
172static struct pv_entry *pvinit;
173
174/*
175 * All those kernel PT submaps that BSD is so fond of
176 */
177pt_entry_t *CMAP1 = 0;
178static pt_entry_t *CMAP2, *ptmmap;
179caddr_t CADDR1 = 0, ptvmmap = 0;
180static caddr_t CADDR2;
181static pt_entry_t *msgbufmap;
182struct msgbuf *msgbufp=0;
183
184/*
185 * Crashdump maps.
186 */
187static pt_entry_t *pt_crashdumpmap;
188static caddr_t crashdumpmap;
189
190#ifdef SMP
191extern pt_entry_t *SMPpt;
192#endif
193static pt_entry_t *PMAP1 = 0;
194static unsigned *PADDR1 = 0;
195
196static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
197static unsigned * get_ptbase __P((pmap_t pmap));
198static pv_entry_t get_pv_entry __P((void));
199static void	i386_protection_init __P((void));
200static __inline void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
201
202static void	pmap_remove_all __P((vm_page_t m));
203static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
204				      vm_page_t m, vm_page_t mpte));
205static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
206					vm_offset_t sva));
207static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
208static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
209					vm_offset_t va));
210static boolean_t pmap_testbit __P((vm_page_t m, int bit));
211static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
212		vm_page_t mpte, vm_page_t m));
213
214static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
215
216static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
217static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
218static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
219static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
220static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
221static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
222
223static unsigned pdir4mb;
224
225/*
226 *	Routine:	pmap_pte
227 *	Function:
228 *		Extract the page table entry associated
229 *		with the given map/virtual_address pair.
230 */
231
232PMAP_INLINE unsigned *
233pmap_pte(pmap, va)
234	register pmap_t pmap;
235	vm_offset_t va;
236{
237	unsigned *pdeaddr;
238
239	if (pmap) {
240		pdeaddr = (unsigned *) pmap_pde(pmap, va);
241		if (*pdeaddr & PG_PS)
242			return pdeaddr;
243		if (*pdeaddr) {
244			return get_ptbase(pmap) + i386_btop(va);
245		}
246	}
247	return (0);
248}
249
250/*
251 * Move the kernel virtual free pointer to the next
252 * 4MB.  This is used to help improve performance
253 * by using a large (4MB) page for much of the kernel
254 * (.text, .data, .bss)
255 */
256static vm_offset_t
257pmap_kmem_choose(vm_offset_t addr)
258{
259	vm_offset_t newaddr = addr;
260#ifndef DISABLE_PSE
261	if (cpu_feature & CPUID_PSE) {
262		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
263	}
264#endif
265	return newaddr;
266}
267
268/*
269 *	Bootstrap the system enough to run with virtual memory.
270 *
271 *	On the i386 this is called after mapping has already been enabled
272 *	and just syncs the pmap module with what has already been done.
273 *	[We can't call it easily with mapping off since the kernel is not
274 *	mapped with PA == VA, hence we would have to relocate every address
275 *	from the linked base (virtual) address "KERNBASE" to the actual
276 *	(physical) address starting relative to 0]
277 */
278void
279pmap_bootstrap(firstaddr, loadaddr)
280	vm_offset_t firstaddr;
281	vm_offset_t loadaddr;
282{
283	vm_offset_t va;
284	pt_entry_t *pte;
285
286	avail_start = firstaddr;
287
288	/*
289	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
290	 * large. It should instead be correctly calculated in locore.s and
291	 * not based on 'first' (which is a physical address, not a virtual
292	 * address, for the start of unused physical memory). The kernel
293	 * page tables are NOT double mapped and thus should not be included
294	 * in this calculation.
295	 */
296	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
297	virtual_avail = pmap_kmem_choose(virtual_avail);
298
299	virtual_end = VM_MAX_KERNEL_ADDRESS;
300
301	/*
302	 * Initialize protection array.
303	 */
304	i386_protection_init();
305
306	/*
307	 * The kernel's pmap is statically allocated so we don't have to use
308	 * pmap_create, which is unlikely to work correctly at this part of
309	 * the boot sequence (XXX and which no longer exists).
310	 */
311	kernel_pmap = &kernel_pmap_store;
312
313	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
314	kernel_pmap->pm_count = 1;
315	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
316	TAILQ_INIT(&kernel_pmap->pm_pvlist);
317	LIST_INIT(&allpmaps);
318	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
319	nkpt = NKPT;
320
321	/*
322	 * Reserve some special page table entries/VA space for temporary
323	 * mapping of pages.
324	 */
325#define	SYSMAP(c, p, v, n)	\
326	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
327
328	va = virtual_avail;
329	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
330
331	/*
332	 * CMAP1/CMAP2 are used for zeroing and copying pages.
333	 */
334	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
335	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
336
337	/*
338	 * Crashdump maps.
339	 */
340	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
341
342	/*
343	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
344	 * XXX ptmmap is not used.
345	 */
346	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
347
348	/*
349	 * msgbufp is used to map the system message buffer.
350	 * XXX msgbufmap is not used.
351	 */
352	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
353	       atop(round_page(MSGBUF_SIZE)))
354
355	/*
356	 * ptemap is used for pmap_pte_quick
357	 */
358	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
359
360	virtual_avail = va;
361
362	*(int *) CMAP1 = *(int *) CMAP2 = 0;
363	*(int *) PTD = 0;
364
365
366	pgeflag = 0;
367#if !defined(SMP)			/* XXX - see also mp_machdep.c */
368	if (cpu_feature & CPUID_PGE) {
369		pgeflag = PG_G;
370	}
371#endif
372
373/*
374 * Initialize the 4MB page size flag
375 */
376	pseflag = 0;
377/*
378 * The 4MB page version of the initial
379 * kernel page mapping.
380 */
381	pdir4mb = 0;
382
383#if !defined(DISABLE_PSE)
384	if (cpu_feature & CPUID_PSE) {
385		unsigned ptditmp;
386		/*
387		 * Note that we have enabled PSE mode
388		 */
389		pseflag = PG_PS;
390		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
391		ptditmp &= ~(NBPDR - 1);
392		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
393		pdir4mb = ptditmp;
394
395#if !defined(SMP)
396		/*
397		 * Enable the PSE mode.
398		 */
399		load_cr4(rcr4() | CR4_PSE);
400
401		/*
402		 * We can do the mapping here for the single processor
403		 * case.  We simply ignore the old page table page from
404		 * now on.
405		 */
406		/*
407		 * For SMP, we still need 4K pages to bootstrap APs,
408		 * PSE will be enabled as soon as all APs are up.
409		 */
410		PTD[KPTDI] = (pd_entry_t) ptditmp;
411		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
412		invltlb();
413#endif
414	}
415#endif
416
417#ifdef SMP
418	if (cpu_apic_address == 0)
419		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
420
421	/* local apic is mapped on last page */
422	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
423	    (cpu_apic_address & PG_FRAME));
424#endif
425
426	invltlb();
427}
428
429#ifdef SMP
430/*
431 * Set 4mb pdir for mp startup
432 */
433void
434pmap_set_opt(void)
435{
436	if (pseflag && (cpu_feature & CPUID_PSE)) {
437		load_cr4(rcr4() | CR4_PSE);
438		if (pdir4mb && PCPU_GET(cpuid) == 0) {	/* only on BSP */
439			kernel_pmap->pm_pdir[KPTDI] =
440			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
441			cpu_invltlb();
442		}
443	}
444}
445#endif
446
447/*
448 *	Initialize the pmap module.
449 *	Called by vm_init, to initialize any structures that the pmap
450 *	system needs to map virtual memory.
451 *	pmap_init has been enhanced to support in a fairly consistant
452 *	way, discontiguous physical memory.
453 */
454void
455pmap_init(phys_start, phys_end)
456	vm_offset_t phys_start, phys_end;
457{
458	int i;
459	int initial_pvs;
460
461	/*
462	 * object for kernel page table pages
463	 */
464	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
465
466	/*
467	 * Allocate memory for random pmap data structures.  Includes the
468	 * pv_head_table.
469	 */
470
471	for(i = 0; i < vm_page_array_size; i++) {
472		vm_page_t m;
473
474		m = &vm_page_array[i];
475		TAILQ_INIT(&m->md.pv_list);
476		m->md.pv_list_count = 0;
477	}
478
479	/*
480	 * init the pv free list
481	 */
482	initial_pvs = vm_page_array_size;
483	if (initial_pvs < MINPV)
484		initial_pvs = MINPV;
485	pvzone = &pvzone_store;
486	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
487		initial_pvs * sizeof (struct pv_entry));
488	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
489	    vm_page_array_size);
490
491	/*
492	 * Now it is safe to enable pv_table recording.
493	 */
494	pmap_initialized = TRUE;
495}
496
497/*
498 * Initialize the address space (zone) for the pv_entries.  Set a
499 * high water mark so that the system can recover from excessive
500 * numbers of pv entries.
501 */
502void
503pmap_init2()
504{
505	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
506	pv_entry_high_water = 9 * (pv_entry_max / 10);
507	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
508}
509
510
511/***************************************************
512 * Low level helper routines.....
513 ***************************************************/
514
515#if defined(PMAP_DIAGNOSTIC)
516
517/*
518 * This code checks for non-writeable/modified pages.
519 * This should be an invalid condition.
520 */
521static int
522pmap_nw_modified(pt_entry_t ptea)
523{
524	int pte;
525
526	pte = (int) ptea;
527
528	if ((pte & (PG_M|PG_RW)) == PG_M)
529		return 1;
530	else
531		return 0;
532}
533#endif
534
535
536/*
537 * this routine defines the region(s) of memory that should
538 * not be tested for the modified bit.
539 */
540static PMAP_INLINE int
541pmap_track_modified(vm_offset_t va)
542{
543	if ((va < clean_sva) || (va >= clean_eva))
544		return 1;
545	else
546		return 0;
547}
548
549static PMAP_INLINE void
550invltlb_1pg(vm_offset_t va)
551{
552#ifdef I386_CPU
553	invltlb();
554#else
555	invlpg(va);
556#endif
557}
558
559static __inline void
560pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
561{
562#if defined(SMP)
563	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
564		cpu_invlpg((void *)va);
565	if (pmap->pm_active & PCPU_GET(other_cpus))
566		smp_invltlb();
567#else
568	if (pmap->pm_active)
569		invltlb_1pg(va);
570#endif
571}
572
573static __inline void
574pmap_TLB_invalidate_all(pmap_t pmap)
575{
576#if defined(SMP)
577	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
578		cpu_invltlb();
579	if (pmap->pm_active & PCPU_GET(other_cpus))
580		smp_invltlb();
581#else
582	if (pmap->pm_active)
583		invltlb();
584#endif
585}
586
587static unsigned *
588get_ptbase(pmap)
589	pmap_t pmap;
590{
591	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
592
593	/* are we current address space or kernel? */
594	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
595		return (unsigned *) PTmap;
596	}
597	/* otherwise, we are alternate address space */
598	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
599		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
600#if defined(SMP)
601		/* The page directory is not shared between CPUs */
602		cpu_invltlb();
603#else
604		invltlb();
605#endif
606	}
607	return (unsigned *) APTmap;
608}
609
610/*
611 * Super fast pmap_pte routine best used when scanning
612 * the pv lists.  This eliminates many coarse-grained
613 * invltlb calls.  Note that many of the pv list
614 * scans are across different pmaps.  It is very wasteful
615 * to do an entire invltlb for checking a single mapping.
616 */
617
618static unsigned *
619pmap_pte_quick(pmap, va)
620	register pmap_t pmap;
621	vm_offset_t va;
622{
623	unsigned pde, newpf;
624	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
625		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
626		unsigned index = i386_btop(va);
627		/* are we current address space or kernel? */
628		if ((pmap == kernel_pmap) ||
629			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
630			return (unsigned *) PTmap + index;
631		}
632		newpf = pde & PG_FRAME;
633		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
634			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
635			invltlb_1pg((vm_offset_t) PADDR1);
636		}
637		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
638	}
639	return (0);
640}
641
642/*
643 *	Routine:	pmap_extract
644 *	Function:
645 *		Extract the physical page address associated
646 *		with the given map/virtual_address pair.
647 */
648vm_offset_t
649pmap_extract(pmap, va)
650	register pmap_t pmap;
651	vm_offset_t va;
652{
653	vm_offset_t rtval;
654	vm_offset_t pdirindex;
655	pdirindex = va >> PDRSHIFT;
656	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
657		unsigned *pte;
658		if ((rtval & PG_PS) != 0) {
659			rtval &= ~(NBPDR - 1);
660			rtval |= va & (NBPDR - 1);
661			return rtval;
662		}
663		pte = get_ptbase(pmap) + i386_btop(va);
664		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
665		return rtval;
666	}
667	return 0;
668
669}
670
671/***************************************************
672 * Low level mapping routines.....
673 ***************************************************/
674
675/*
676 * add a wired page to the kva
677 * note that in order for the mapping to take effect -- you
678 * should do a invltlb after doing the pmap_kenter...
679 */
680PMAP_INLINE void
681pmap_kenter(va, pa)
682	vm_offset_t va;
683	register vm_offset_t pa;
684{
685	register unsigned *pte;
686	unsigned npte, opte;
687
688	npte = pa | PG_RW | PG_V | pgeflag;
689	pte = (unsigned *)vtopte(va);
690	opte = *pte;
691	*pte = npte;
692	/*if (opte)*/
693		invltlb_1pg(va);	/* XXX what about SMP? */
694}
695
696/*
697 * remove a page from the kernel pagetables
698 */
699PMAP_INLINE void
700pmap_kremove(va)
701	vm_offset_t va;
702{
703	register unsigned *pte;
704
705	pte = (unsigned *)vtopte(va);
706	*pte = 0;
707	invltlb_1pg(va);	/* XXX what about SMP? */
708}
709
710/*
711 *	Used to map a range of physical addresses into kernel
712 *	virtual address space.
713 *
714 *	The value passed in '*virt' is a suggested virtual address for
715 *	the mapping. Architectures which can support a direct-mapped
716 *	physical to virtual region can return the appropriate address
717 *	within that region, leaving '*virt' unchanged. Other
718 *	architectures should map the pages starting at '*virt' and
719 *	update '*virt' with the first usable address after the mapped
720 *	region.
721 */
722vm_offset_t
723pmap_map(virt, start, end, prot)
724	vm_offset_t *virt;
725	vm_offset_t start;
726	vm_offset_t end;
727	int prot;
728{
729	vm_offset_t sva = *virt;
730	vm_offset_t va = sva;
731	while (start < end) {
732		pmap_kenter(va, start);
733		va += PAGE_SIZE;
734		start += PAGE_SIZE;
735	}
736	*virt = va;
737	return (sva);
738}
739
740
741/*
742 * Add a list of wired pages to the kva
743 * this routine is only used for temporary
744 * kernel mappings that do not need to have
745 * page modification or references recorded.
746 * Note that old mappings are simply written
747 * over.  The page *must* be wired.
748 */
749void
750pmap_qenter(va, m, count)
751	vm_offset_t va;
752	vm_page_t *m;
753	int count;
754{
755	int i;
756
757	for (i = 0; i < count; i++) {
758		vm_offset_t tva = va + i * PAGE_SIZE;
759		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
760	}
761}
762
763/*
764 * this routine jerks page mappings from the
765 * kernel -- it is meant only for temporary mappings.
766 */
767void
768pmap_qremove(va, count)
769	vm_offset_t va;
770	int count;
771{
772	vm_offset_t end_va;
773
774	end_va = va + count*PAGE_SIZE;
775
776	while (va < end_va) {
777		unsigned *pte;
778
779		pte = (unsigned *)vtopte(va);
780		*pte = 0;
781#ifdef SMP
782		cpu_invlpg((void *)va);
783#else
784		invltlb_1pg(va);
785#endif
786		va += PAGE_SIZE;
787	}
788#ifdef SMP
789	smp_invltlb();
790#endif
791}
792
793static vm_page_t
794pmap_page_lookup(object, pindex)
795	vm_object_t object;
796	vm_pindex_t pindex;
797{
798	vm_page_t m;
799retry:
800	m = vm_page_lookup(object, pindex);
801	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
802		goto retry;
803	return m;
804}
805
806/*
807 * Create the UPAGES for a new process.
808 * This routine directly affects the fork perf for a process.
809 */
810void
811pmap_new_proc(p)
812	struct proc *p;
813{
814#ifdef I386_CPU
815	int updateneeded;
816#endif
817	int i;
818	vm_object_t upobj;
819	vm_page_t m;
820	struct user *up;
821	unsigned *ptek, oldpte;
822
823	/*
824	 * allocate object for the upages
825	 */
826	if ((upobj = p->p_upages_obj) == NULL) {
827		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
828		p->p_upages_obj = upobj;
829	}
830
831	/* get a kernel virtual address for the UPAGES for this proc */
832	if ((up = p->p_addr) == NULL) {
833		up = (struct user *) kmem_alloc_nofault(kernel_map,
834				UPAGES * PAGE_SIZE);
835		if (up == NULL)
836			panic("pmap_new_proc: u_map allocation failed");
837		p->p_addr = up;
838	}
839
840	ptek = (unsigned *) vtopte((vm_offset_t) up);
841
842#ifdef I386_CPU
843	updateneeded = 0;
844#endif
845	for(i=0;i<UPAGES;i++) {
846		/*
847		 * Get a kernel stack page
848		 */
849		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
850
851		/*
852		 * Wire the page
853		 */
854		m->wire_count++;
855		cnt.v_wire_count++;
856
857		oldpte = *(ptek + i);
858		/*
859		 * Enter the page into the kernel address space.
860		 */
861		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
862		if (oldpte) {
863#ifdef I386_CPU
864			updateneeded = 1;
865#else
866			invlpg((vm_offset_t) up + i * PAGE_SIZE);
867#endif
868		}
869
870		vm_page_wakeup(m);
871		vm_page_flag_clear(m, PG_ZERO);
872		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
873		m->valid = VM_PAGE_BITS_ALL;
874	}
875#ifdef I386_CPU
876	if (updateneeded)
877		invltlb();
878#endif
879}
880
881/*
882 * Dispose the UPAGES for a process that has exited.
883 * This routine directly impacts the exit perf of a process.
884 */
885void
886pmap_dispose_proc(p)
887	struct proc *p;
888{
889	int i;
890	vm_object_t upobj;
891	vm_page_t m;
892	unsigned *ptek, oldpte;
893
894	upobj = p->p_upages_obj;
895
896	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
897	for(i=0;i<UPAGES;i++) {
898
899		if ((m = vm_page_lookup(upobj, i)) == NULL)
900			panic("pmap_dispose_proc: upage already missing???");
901
902		vm_page_busy(m);
903
904		oldpte = *(ptek + i);
905		*(ptek + i) = 0;
906#ifndef I386_CPU
907		invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
908#endif
909		vm_page_unwire(m, 0);
910		vm_page_free(m);
911	}
912#ifdef I386_CPU
913	invltlb();
914#endif
915}
916
917/*
918 * Allow the UPAGES for a process to be prejudicially paged out.
919 */
920void
921pmap_swapout_proc(p)
922	struct proc *p;
923{
924	int i;
925	vm_object_t upobj;
926	vm_page_t m;
927
928	upobj = p->p_upages_obj;
929	/*
930	 * let the upages be paged
931	 */
932	for(i=0;i<UPAGES;i++) {
933		if ((m = vm_page_lookup(upobj, i)) == NULL)
934			panic("pmap_swapout_proc: upage already missing???");
935		vm_page_dirty(m);
936		vm_page_unwire(m, 0);
937		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
938	}
939}
940
941/*
942 * Bring the UPAGES for a specified process back in.
943 */
944void
945pmap_swapin_proc(p)
946	struct proc *p;
947{
948	int i,rv;
949	vm_object_t upobj;
950	vm_page_t m;
951
952	upobj = p->p_upages_obj;
953	for(i=0;i<UPAGES;i++) {
954
955		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
956
957		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
958			VM_PAGE_TO_PHYS(m));
959
960		if (m->valid != VM_PAGE_BITS_ALL) {
961			rv = vm_pager_get_pages(upobj, &m, 1, 0);
962			if (rv != VM_PAGER_OK)
963				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
964			m = vm_page_lookup(upobj, i);
965			m->valid = VM_PAGE_BITS_ALL;
966		}
967
968		vm_page_wire(m);
969		vm_page_wakeup(m);
970		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
971	}
972}
973
974/***************************************************
975 * Page table page management routines.....
976 ***************************************************/
977
978/*
979 * This routine unholds page table pages, and if the hold count
980 * drops to zero, then it decrements the wire count.
981 */
982static int
983_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
984
985	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
986		;
987
988	if (m->hold_count == 0) {
989		vm_offset_t pteva;
990		/*
991		 * unmap the page table page
992		 */
993		pmap->pm_pdir[m->pindex] = 0;
994		--pmap->pm_stats.resident_count;
995		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
996			(((unsigned) PTDpde) & PG_FRAME)) {
997			/*
998			 * Do a invltlb to make the invalidated mapping
999			 * take effect immediately.
1000			 */
1001			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
1002			pmap_TLB_invalidate(pmap, pteva);
1003		}
1004
1005		if (pmap->pm_ptphint == m)
1006			pmap->pm_ptphint = NULL;
1007
1008		/*
1009		 * If the page is finally unwired, simply free it.
1010		 */
1011		--m->wire_count;
1012		if (m->wire_count == 0) {
1013
1014			vm_page_flash(m);
1015			vm_page_busy(m);
1016			vm_page_free_zero(m);
1017			--cnt.v_wire_count;
1018		}
1019		return 1;
1020	}
1021	return 0;
1022}
1023
1024static PMAP_INLINE int
1025pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
1026{
1027	vm_page_unhold(m);
1028	if (m->hold_count == 0)
1029		return _pmap_unwire_pte_hold(pmap, m);
1030	else
1031		return 0;
1032}
1033
1034/*
1035 * After removing a page table entry, this routine is used to
1036 * conditionally free the page, and manage the hold/wire counts.
1037 */
1038static int
1039pmap_unuse_pt(pmap, va, mpte)
1040	pmap_t pmap;
1041	vm_offset_t va;
1042	vm_page_t mpte;
1043{
1044	unsigned ptepindex;
1045	if (va >= UPT_MIN_ADDRESS)
1046		return 0;
1047
1048	if (mpte == NULL) {
1049		ptepindex = (va >> PDRSHIFT);
1050		if (pmap->pm_ptphint &&
1051			(pmap->pm_ptphint->pindex == ptepindex)) {
1052			mpte = pmap->pm_ptphint;
1053		} else {
1054			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1055			pmap->pm_ptphint = mpte;
1056		}
1057	}
1058
1059	return pmap_unwire_pte_hold(pmap, mpte);
1060}
1061
1062void
1063pmap_pinit0(pmap)
1064	struct pmap *pmap;
1065{
1066	pmap->pm_pdir =
1067		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1068	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
1069	pmap->pm_count = 1;
1070	pmap->pm_active = 0;
1071	pmap->pm_ptphint = NULL;
1072	TAILQ_INIT(&pmap->pm_pvlist);
1073	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1074	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
1075}
1076
1077/*
1078 * Initialize a preallocated and zeroed pmap structure,
1079 * such as one in a vmspace structure.
1080 */
1081void
1082pmap_pinit(pmap)
1083	register struct pmap *pmap;
1084{
1085	vm_page_t ptdpg;
1086
1087	/*
1088	 * No need to allocate page table space yet but we do need a valid
1089	 * page directory table.
1090	 */
1091	if (pmap->pm_pdir == NULL)
1092		pmap->pm_pdir =
1093			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
1094
1095	/*
1096	 * allocate object for the ptes
1097	 */
1098	if (pmap->pm_pteobj == NULL)
1099		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
1100
1101	/*
1102	 * allocate the page directory page
1103	 */
1104	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
1105			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1106
1107	ptdpg->wire_count = 1;
1108	++cnt.v_wire_count;
1109
1110
1111	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
1112	ptdpg->valid = VM_PAGE_BITS_ALL;
1113
1114	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
1115	if ((ptdpg->flags & PG_ZERO) == 0)
1116		bzero(pmap->pm_pdir, PAGE_SIZE);
1117
1118	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
1119	/* Wire in kernel global address entries. */
1120	/* XXX copies current process, does not fill in MPPTDI */
1121	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
1122#ifdef SMP
1123	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
1124#endif
1125
1126	/* install self-referential address mapping entry */
1127	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
1128		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
1129
1130	pmap->pm_count = 1;
1131	pmap->pm_active = 0;
1132	pmap->pm_ptphint = NULL;
1133	TAILQ_INIT(&pmap->pm_pvlist);
1134	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1135}
1136
1137/*
1138 * Wire in kernel global address entries.  To avoid a race condition
1139 * between pmap initialization and pmap_growkernel, this procedure
1140 * should be called after the vmspace is attached to the process
1141 * but before this pmap is activated.
1142 */
1143void
1144pmap_pinit2(pmap)
1145	struct pmap *pmap;
1146{
1147	/* XXX: Remove this stub when no longer called */
1148}
1149
1150static int
1151pmap_release_free_page(pmap, p)
1152	struct pmap *pmap;
1153	vm_page_t p;
1154{
1155	unsigned *pde = (unsigned *) pmap->pm_pdir;
1156	/*
1157	 * This code optimizes the case of freeing non-busy
1158	 * page-table pages.  Those pages are zero now, and
1159	 * might as well be placed directly into the zero queue.
1160	 */
1161	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
1162		return 0;
1163
1164	vm_page_busy(p);
1165
1166	/*
1167	 * Remove the page table page from the processes address space.
1168	 */
1169	pde[p->pindex] = 0;
1170	pmap->pm_stats.resident_count--;
1171
1172	if (p->hold_count)  {
1173		panic("pmap_release: freeing held page table page");
1174	}
1175	/*
1176	 * Page directory pages need to have the kernel
1177	 * stuff cleared, so they can go into the zero queue also.
1178	 */
1179	if (p->pindex == PTDPTDI) {
1180		bzero(pde + KPTDI, nkpt * PTESIZE);
1181#ifdef SMP
1182		pde[MPPTDI] = 0;
1183#endif
1184		pde[APTDPTDI] = 0;
1185		pmap_kremove((vm_offset_t) pmap->pm_pdir);
1186	}
1187
1188	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
1189		pmap->pm_ptphint = NULL;
1190
1191	p->wire_count--;
1192	cnt.v_wire_count--;
1193	vm_page_free_zero(p);
1194	return 1;
1195}
1196
1197/*
1198 * this routine is called if the page table page is not
1199 * mapped correctly.
1200 */
1201static vm_page_t
1202_pmap_allocpte(pmap, ptepindex)
1203	pmap_t	pmap;
1204	unsigned ptepindex;
1205{
1206	vm_offset_t pteva, ptepa;
1207	vm_page_t m;
1208
1209	/*
1210	 * Find or fabricate a new pagetable page
1211	 */
1212	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
1213			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
1214
1215	KASSERT(m->queue == PQ_NONE,
1216		("_pmap_allocpte: %p->queue != PQ_NONE", m));
1217
1218	if (m->wire_count == 0)
1219		cnt.v_wire_count++;
1220	m->wire_count++;
1221
1222	/*
1223	 * Increment the hold count for the page table page
1224	 * (denoting a new mapping.)
1225	 */
1226	m->hold_count++;
1227
1228	/*
1229	 * Map the pagetable page into the process address space, if
1230	 * it isn't already there.
1231	 */
1232
1233	pmap->pm_stats.resident_count++;
1234
1235	ptepa = VM_PAGE_TO_PHYS(m);
1236	pmap->pm_pdir[ptepindex] =
1237		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
1238
1239	/*
1240	 * Set the page table hint
1241	 */
1242	pmap->pm_ptphint = m;
1243
1244	/*
1245	 * Try to use the new mapping, but if we cannot, then
1246	 * do it with the routine that maps the page explicitly.
1247	 */
1248	if ((m->flags & PG_ZERO) == 0) {
1249		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
1250			(((unsigned) PTDpde) & PG_FRAME)) {
1251			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
1252			bzero((caddr_t) pteva, PAGE_SIZE);
1253		} else {
1254			pmap_zero_page(ptepa);
1255		}
1256	}
1257
1258	m->valid = VM_PAGE_BITS_ALL;
1259	vm_page_flag_clear(m, PG_ZERO);
1260	vm_page_flag_set(m, PG_MAPPED);
1261	vm_page_wakeup(m);
1262
1263	return m;
1264}
1265
1266static vm_page_t
1267pmap_allocpte(pmap, va)
1268	pmap_t	pmap;
1269	vm_offset_t va;
1270{
1271	unsigned ptepindex;
1272	vm_offset_t ptepa;
1273	vm_page_t m;
1274
1275	/*
1276	 * Calculate pagetable page index
1277	 */
1278	ptepindex = va >> PDRSHIFT;
1279
1280	/*
1281	 * Get the page directory entry
1282	 */
1283	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1284
1285	/*
1286	 * This supports switching from a 4MB page to a
1287	 * normal 4K page.
1288	 */
1289	if (ptepa & PG_PS) {
1290		pmap->pm_pdir[ptepindex] = 0;
1291		ptepa = 0;
1292		invltlb();
1293	}
1294
1295	/*
1296	 * If the page table page is mapped, we just increment the
1297	 * hold count, and activate it.
1298	 */
1299	if (ptepa) {
1300		/*
1301		 * In order to get the page table page, try the
1302		 * hint first.
1303		 */
1304		if (pmap->pm_ptphint &&
1305			(pmap->pm_ptphint->pindex == ptepindex)) {
1306			m = pmap->pm_ptphint;
1307		} else {
1308			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
1309			pmap->pm_ptphint = m;
1310		}
1311		m->hold_count++;
1312		return m;
1313	}
1314	/*
1315	 * Here if the pte page isn't mapped, or if it has been deallocated.
1316	 */
1317	return _pmap_allocpte(pmap, ptepindex);
1318}
1319
1320
1321/***************************************************
1322* Pmap allocation/deallocation routines.
1323 ***************************************************/
1324
1325/*
1326 * Release any resources held by the given physical map.
1327 * Called when a pmap initialized by pmap_pinit is being released.
1328 * Should only be called if the map contains no valid mappings.
1329 */
1330void
1331pmap_release(pmap)
1332	register struct pmap *pmap;
1333{
1334	vm_page_t p,n,ptdpg;
1335	vm_object_t object = pmap->pm_pteobj;
1336	int curgeneration;
1337
1338#if defined(DIAGNOSTIC)
1339	if (object->ref_count != 1)
1340		panic("pmap_release: pteobj reference count != 1");
1341#endif
1342
1343	ptdpg = NULL;
1344	LIST_REMOVE(pmap, pm_list);
1345retry:
1346	curgeneration = object->generation;
1347	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
1348		n = TAILQ_NEXT(p, listq);
1349		if (p->pindex == PTDPTDI) {
1350			ptdpg = p;
1351			continue;
1352		}
1353		while (1) {
1354			if (!pmap_release_free_page(pmap, p) &&
1355				(object->generation != curgeneration))
1356				goto retry;
1357		}
1358	}
1359
1360	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
1361		goto retry;
1362}
1363
1364/*
1365 * grow the number of kernel page table entries, if needed
1366 */
1367void
1368pmap_growkernel(vm_offset_t addr)
1369{
1370	struct pmap *pmap;
1371	int s;
1372	vm_offset_t ptppaddr;
1373	vm_page_t nkpg;
1374	pd_entry_t newpdir;
1375
1376	s = splhigh();
1377	if (kernel_vm_end == 0) {
1378		kernel_vm_end = KERNBASE;
1379		nkpt = 0;
1380		while (pdir_pde(PTD, kernel_vm_end)) {
1381			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1382			nkpt++;
1383		}
1384	}
1385	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1386	while (kernel_vm_end < addr) {
1387		if (pdir_pde(PTD, kernel_vm_end)) {
1388			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1389			continue;
1390		}
1391
1392		/*
1393		 * This index is bogus, but out of the way
1394		 */
1395		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
1396		if (!nkpg)
1397			panic("pmap_growkernel: no memory to grow kernel");
1398
1399		nkpt++;
1400
1401		vm_page_wire(nkpg);
1402		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
1403		pmap_zero_page(ptppaddr);
1404		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
1405		pdir_pde(PTD, kernel_vm_end) = newpdir;
1406
1407		LIST_FOREACH(pmap, &allpmaps, pm_list) {
1408			*pmap_pde(pmap, kernel_vm_end) = newpdir;
1409		}
1410		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
1411	}
1412	splx(s);
1413}
1414
1415/*
1416 *	Retire the given physical map from service.
1417 *	Should only be called if the map contains
1418 *	no valid mappings.
1419 */
1420void
1421pmap_destroy(pmap)
1422	register pmap_t pmap;
1423{
1424	int count;
1425
1426	if (pmap == NULL)
1427		return;
1428
1429	count = --pmap->pm_count;
1430	if (count == 0) {
1431		pmap_release(pmap);
1432		panic("destroying a pmap is not yet implemented");
1433	}
1434}
1435
1436/*
1437 *	Add a reference to the specified pmap.
1438 */
1439void
1440pmap_reference(pmap)
1441	pmap_t pmap;
1442{
1443	if (pmap != NULL) {
1444		pmap->pm_count++;
1445	}
1446}
1447
1448/***************************************************
1449* page management routines.
1450 ***************************************************/
1451
1452/*
1453 * free the pv_entry back to the free list
1454 */
1455static PMAP_INLINE void
1456free_pv_entry(pv)
1457	pv_entry_t pv;
1458{
1459	pv_entry_count--;
1460	zfree(pvzone, pv);
1461}
1462
1463/*
1464 * get a new pv_entry, allocating a block from the system
1465 * when needed.
1466 * the memory allocation is performed bypassing the malloc code
1467 * because of the possibility of allocations at interrupt time.
1468 */
1469static pv_entry_t
1470get_pv_entry(void)
1471{
1472	pv_entry_count++;
1473	if (pv_entry_high_water &&
1474		(pv_entry_count > pv_entry_high_water) &&
1475		(pmap_pagedaemon_waken == 0)) {
1476		pmap_pagedaemon_waken = 1;
1477		wakeup (&vm_pages_needed);
1478	}
1479	return zalloc(pvzone);
1480}
1481
1482/*
1483 * This routine is very drastic, but can save the system
1484 * in a pinch.
1485 */
1486void
1487pmap_collect()
1488{
1489	int i;
1490	vm_page_t m;
1491	static int warningdone=0;
1492
1493	if (pmap_pagedaemon_waken == 0)
1494		return;
1495
1496	if (warningdone < 5) {
1497		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1498		warningdone++;
1499	}
1500
1501	for(i = 0; i < vm_page_array_size; i++) {
1502		m = &vm_page_array[i];
1503		if (m->wire_count || m->hold_count || m->busy ||
1504		    (m->flags & PG_BUSY))
1505			continue;
1506		pmap_remove_all(m);
1507	}
1508	pmap_pagedaemon_waken = 0;
1509}
1510
1511
1512/*
1513 * If it is the first entry on the list, it is actually
1514 * in the header and we must copy the following entry up
1515 * to the header.  Otherwise we must search the list for
1516 * the entry.  In either case we free the now unused entry.
1517 */
1518
1519static int
1520pmap_remove_entry(pmap, m, va)
1521	struct pmap *pmap;
1522	vm_page_t m;
1523	vm_offset_t va;
1524{
1525	pv_entry_t pv;
1526	int rtval;
1527	int s;
1528
1529	s = splvm();
1530	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1531		for (pv = TAILQ_FIRST(&m->md.pv_list);
1532			pv;
1533			pv = TAILQ_NEXT(pv, pv_list)) {
1534			if (pmap == pv->pv_pmap && va == pv->pv_va)
1535				break;
1536		}
1537	} else {
1538		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1539			pv;
1540			pv = TAILQ_NEXT(pv, pv_plist)) {
1541			if (va == pv->pv_va)
1542				break;
1543		}
1544	}
1545
1546	rtval = 0;
1547	if (pv) {
1548
1549		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
1550		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1551		m->md.pv_list_count--;
1552		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1553			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1554
1555		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1556		free_pv_entry(pv);
1557	}
1558
1559	splx(s);
1560	return rtval;
1561}
1562
1563/*
1564 * Create a pv entry for page at pa for
1565 * (pmap, va).
1566 */
1567static void
1568pmap_insert_entry(pmap, va, mpte, m)
1569	pmap_t pmap;
1570	vm_offset_t va;
1571	vm_page_t mpte;
1572	vm_page_t m;
1573{
1574
1575	int s;
1576	pv_entry_t pv;
1577
1578	s = splvm();
1579	pv = get_pv_entry();
1580	pv->pv_va = va;
1581	pv->pv_pmap = pmap;
1582	pv->pv_ptem = mpte;
1583
1584	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1585	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1586	m->md.pv_list_count++;
1587
1588	splx(s);
1589}
1590
1591/*
1592 * pmap_remove_pte: do the things to unmap a page in a process
1593 */
1594static int
1595pmap_remove_pte(pmap, ptq, va)
1596	struct pmap *pmap;
1597	unsigned *ptq;
1598	vm_offset_t va;
1599{
1600	unsigned oldpte;
1601	vm_page_t m;
1602
1603	oldpte = atomic_readandclear_int(ptq);
1604	if (oldpte & PG_W)
1605		pmap->pm_stats.wired_count -= 1;
1606	/*
1607	 * Machines that don't support invlpg, also don't support
1608	 * PG_G.
1609	 */
1610	if (oldpte & PG_G)
1611		invlpg(va);
1612	pmap->pm_stats.resident_count -= 1;
1613	if (oldpte & PG_MANAGED) {
1614		m = PHYS_TO_VM_PAGE(oldpte);
1615		if (oldpte & PG_M) {
1616#if defined(PMAP_DIAGNOSTIC)
1617			if (pmap_nw_modified((pt_entry_t) oldpte)) {
1618				printf(
1619	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
1620				    va, oldpte);
1621			}
1622#endif
1623			if (pmap_track_modified(va))
1624				vm_page_dirty(m);
1625		}
1626		if (oldpte & PG_A)
1627			vm_page_flag_set(m, PG_REFERENCED);
1628		return pmap_remove_entry(pmap, m, va);
1629	} else {
1630		return pmap_unuse_pt(pmap, va, NULL);
1631	}
1632
1633	return 0;
1634}
1635
1636/*
1637 * Remove a single page from a process address space
1638 */
1639static void
1640pmap_remove_page(pmap, va)
1641	struct pmap *pmap;
1642	register vm_offset_t va;
1643{
1644	register unsigned *ptq;
1645
1646	/*
1647	 * if there is no pte for this address, just skip it!!!
1648	 */
1649	if (*pmap_pde(pmap, va) == 0) {
1650		return;
1651	}
1652
1653	/*
1654	 * get a local va for mappings for this pmap.
1655	 */
1656	ptq = get_ptbase(pmap) + i386_btop(va);
1657	if (*ptq) {
1658		(void) pmap_remove_pte(pmap, ptq, va);
1659		pmap_TLB_invalidate(pmap, va);
1660	}
1661	return;
1662}
1663
1664/*
1665 *	Remove the given range of addresses from the specified map.
1666 *
1667 *	It is assumed that the start and end are properly
1668 *	rounded to the page size.
1669 */
1670void
1671pmap_remove(pmap, sva, eva)
1672	struct pmap *pmap;
1673	register vm_offset_t sva;
1674	register vm_offset_t eva;
1675{
1676	register unsigned *ptbase;
1677	vm_offset_t pdnxt;
1678	vm_offset_t ptpaddr;
1679	vm_offset_t sindex, eindex;
1680	int anyvalid;
1681
1682	if (pmap == NULL)
1683		return;
1684
1685	if (pmap->pm_stats.resident_count == 0)
1686		return;
1687
1688	/*
1689	 * special handling of removing one page.  a very
1690	 * common operation and easy to short circuit some
1691	 * code.
1692	 */
1693	if (((sva + PAGE_SIZE) == eva) &&
1694		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
1695		pmap_remove_page(pmap, sva);
1696		return;
1697	}
1698
1699	anyvalid = 0;
1700
1701	/*
1702	 * Get a local virtual address for the mappings that are being
1703	 * worked with.
1704	 */
1705	ptbase = get_ptbase(pmap);
1706
1707	sindex = i386_btop(sva);
1708	eindex = i386_btop(eva);
1709
1710	for (; sindex < eindex; sindex = pdnxt) {
1711		unsigned pdirindex;
1712
1713		/*
1714		 * Calculate index for next page table.
1715		 */
1716		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1717		if (pmap->pm_stats.resident_count == 0)
1718			break;
1719
1720		pdirindex = sindex / NPDEPG;
1721		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1722			pmap->pm_pdir[pdirindex] = 0;
1723			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1724			anyvalid++;
1725			continue;
1726		}
1727
1728		/*
1729		 * Weed out invalid mappings. Note: we assume that the page
1730		 * directory table is always allocated, and in kernel virtual.
1731		 */
1732		if (ptpaddr == 0)
1733			continue;
1734
1735		/*
1736		 * Limit our scan to either the end of the va represented
1737		 * by the current page table page, or to the end of the
1738		 * range being removed.
1739		 */
1740		if (pdnxt > eindex) {
1741			pdnxt = eindex;
1742		}
1743
1744		for ( ;sindex != pdnxt; sindex++) {
1745			vm_offset_t va;
1746			if (ptbase[sindex] == 0) {
1747				continue;
1748			}
1749			va = i386_ptob(sindex);
1750
1751			anyvalid++;
1752			if (pmap_remove_pte(pmap,
1753				ptbase + sindex, va))
1754				break;
1755		}
1756	}
1757
1758	if (anyvalid)
1759		pmap_TLB_invalidate_all(pmap);
1760}
1761
1762/*
1763 *	Routine:	pmap_remove_all
1764 *	Function:
1765 *		Removes this physical page from
1766 *		all physical maps in which it resides.
1767 *		Reflects back modify bits to the pager.
1768 *
1769 *	Notes:
1770 *		Original versions of this routine were very
1771 *		inefficient because they iteratively called
1772 *		pmap_remove (slow...)
1773 */
1774
1775static void
1776pmap_remove_all(m)
1777	vm_page_t m;
1778{
1779	register pv_entry_t pv;
1780	register unsigned *pte, tpte;
1781	int s;
1782
1783#if defined(PMAP_DIAGNOSTIC)
1784	/*
1785	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1786	 * pages!
1787	 */
1788	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1789		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
1790	}
1791#endif
1792
1793	s = splvm();
1794	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1795		pv->pv_pmap->pm_stats.resident_count--;
1796
1797		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
1798
1799		tpte = atomic_readandclear_int(pte);
1800		if (tpte & PG_W)
1801			pv->pv_pmap->pm_stats.wired_count--;
1802
1803		if (tpte & PG_A)
1804			vm_page_flag_set(m, PG_REFERENCED);
1805
1806		/*
1807		 * Update the vm_page_t clean and reference bits.
1808		 */
1809		if (tpte & PG_M) {
1810#if defined(PMAP_DIAGNOSTIC)
1811			if (pmap_nw_modified((pt_entry_t) tpte)) {
1812				printf(
1813	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
1814				    pv->pv_va, tpte);
1815			}
1816#endif
1817			if (pmap_track_modified(pv->pv_va))
1818				vm_page_dirty(m);
1819		}
1820		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
1821
1822		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1823		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1824		m->md.pv_list_count--;
1825		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1826		free_pv_entry(pv);
1827	}
1828
1829	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1830
1831	splx(s);
1832}
1833
1834/*
1835 *	Set the physical protection on the
1836 *	specified range of this map as requested.
1837 */
1838void
1839pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1840{
1841	register unsigned *ptbase;
1842	vm_offset_t pdnxt, ptpaddr;
1843	vm_pindex_t sindex, eindex;
1844	int anychanged;
1845
1846	if (pmap == NULL)
1847		return;
1848
1849	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1850		pmap_remove(pmap, sva, eva);
1851		return;
1852	}
1853
1854	if (prot & VM_PROT_WRITE)
1855		return;
1856
1857	anychanged = 0;
1858
1859	ptbase = get_ptbase(pmap);
1860
1861	sindex = i386_btop(sva);
1862	eindex = i386_btop(eva);
1863
1864	for (; sindex < eindex; sindex = pdnxt) {
1865
1866		unsigned pdirindex;
1867
1868		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1869
1870		pdirindex = sindex / NPDEPG;
1871		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
1872			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
1873			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
1874			anychanged++;
1875			continue;
1876		}
1877
1878		/*
1879		 * Weed out invalid mappings. Note: we assume that the page
1880		 * directory table is always allocated, and in kernel virtual.
1881		 */
1882		if (ptpaddr == 0)
1883			continue;
1884
1885		if (pdnxt > eindex) {
1886			pdnxt = eindex;
1887		}
1888
1889		for (; sindex != pdnxt; sindex++) {
1890
1891			unsigned pbits;
1892			vm_page_t m;
1893
1894			pbits = ptbase[sindex];
1895
1896			if (pbits & PG_MANAGED) {
1897				m = NULL;
1898				if (pbits & PG_A) {
1899					m = PHYS_TO_VM_PAGE(pbits);
1900					vm_page_flag_set(m, PG_REFERENCED);
1901					pbits &= ~PG_A;
1902				}
1903				if (pbits & PG_M) {
1904					if (pmap_track_modified(i386_ptob(sindex))) {
1905						if (m == NULL)
1906							m = PHYS_TO_VM_PAGE(pbits);
1907						vm_page_dirty(m);
1908						pbits &= ~PG_M;
1909					}
1910				}
1911			}
1912
1913			pbits &= ~PG_RW;
1914
1915			if (pbits != ptbase[sindex]) {
1916				ptbase[sindex] = pbits;
1917				anychanged = 1;
1918			}
1919		}
1920	}
1921	if (anychanged)
1922		pmap_TLB_invalidate_all(pmap);
1923}
1924
1925/*
1926 *	Insert the given physical page (p) at
1927 *	the specified virtual address (v) in the
1928 *	target physical map with the protection requested.
1929 *
1930 *	If specified, the page will be wired down, meaning
1931 *	that the related pte can not be reclaimed.
1932 *
1933 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1934 *	or lose information.  That is, this routine must actually
1935 *	insert this page into the given map NOW.
1936 */
1937void
1938pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1939	   boolean_t wired)
1940{
1941	vm_offset_t pa;
1942	register unsigned *pte;
1943	vm_offset_t opa;
1944	vm_offset_t origpte, newpte;
1945	vm_page_t mpte;
1946
1947	if (pmap == NULL)
1948		return;
1949
1950	va &= PG_FRAME;
1951#ifdef PMAP_DIAGNOSTIC
1952	if (va > VM_MAX_KERNEL_ADDRESS)
1953		panic("pmap_enter: toobig");
1954	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
1955		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
1956#endif
1957
1958	mpte = NULL;
1959	/*
1960	 * In the case that a page table page is not
1961	 * resident, we are creating it here.
1962	 */
1963	if (va < UPT_MIN_ADDRESS) {
1964		mpte = pmap_allocpte(pmap, va);
1965	}
1966#if 0 && defined(PMAP_DIAGNOSTIC)
1967	else {
1968		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
1969		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) {
1970			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
1971				pmap->pm_pdir[PTDPTDI], origpte, va);
1972		}
1973		if (smp_active) {
1974			pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)];
1975			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
1976				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
1977					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
1978				printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr);
1979				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
1980					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
1981			}
1982		}
1983	}
1984#endif
1985
1986	pte = pmap_pte(pmap, va);
1987
1988	/*
1989	 * Page Directory table entry not valid, we need a new PT page
1990	 */
1991	if (pte == NULL) {
1992		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
1993			(void *)pmap->pm_pdir[PTDPTDI], va);
1994	}
1995
1996	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
1997	origpte = *(vm_offset_t *)pte;
1998	opa = origpte & PG_FRAME;
1999
2000	if (origpte & PG_PS)
2001		panic("pmap_enter: attempted pmap_enter on 4MB page");
2002
2003	/*
2004	 * Mapping has not changed, must be protection or wiring change.
2005	 */
2006	if (origpte && (opa == pa)) {
2007		/*
2008		 * Wiring change, just update stats. We don't worry about
2009		 * wiring PT pages as they remain resident as long as there
2010		 * are valid mappings in them. Hence, if a user page is wired,
2011		 * the PT page will be also.
2012		 */
2013		if (wired && ((origpte & PG_W) == 0))
2014			pmap->pm_stats.wired_count++;
2015		else if (!wired && (origpte & PG_W))
2016			pmap->pm_stats.wired_count--;
2017
2018#if defined(PMAP_DIAGNOSTIC)
2019		if (pmap_nw_modified((pt_entry_t) origpte)) {
2020			printf(
2021	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
2022			    va, origpte);
2023		}
2024#endif
2025
2026		/*
2027		 * Remove extra pte reference
2028		 */
2029		if (mpte)
2030			mpte->hold_count--;
2031
2032		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
2033			if ((origpte & PG_RW) == 0) {
2034				*pte |= PG_RW;
2035#ifdef SMP
2036				cpu_invlpg((void *)va);
2037				if (pmap->pm_active & PCPU_GET(other_cpus))
2038					smp_invltlb();
2039#else
2040				invltlb_1pg(va);
2041#endif
2042			}
2043			return;
2044		}
2045
2046		/*
2047		 * We might be turning off write access to the page,
2048		 * so we go ahead and sense modify status.
2049		 */
2050		if (origpte & PG_MANAGED) {
2051			if ((origpte & PG_M) && pmap_track_modified(va)) {
2052				vm_page_t om;
2053				om = PHYS_TO_VM_PAGE(opa);
2054				vm_page_dirty(om);
2055			}
2056			pa |= PG_MANAGED;
2057		}
2058		goto validate;
2059	}
2060	/*
2061	 * Mapping has changed, invalidate old range and fall through to
2062	 * handle validating new mapping.
2063	 */
2064	if (opa) {
2065		int err;
2066		err = pmap_remove_pte(pmap, pte, va);
2067		if (err)
2068			panic("pmap_enter: pte vanished, va: 0x%x", va);
2069	}
2070
2071	/*
2072	 * Enter on the PV list if part of our managed memory. Note that we
2073	 * raise IPL while manipulating pv_table since pmap_enter can be
2074	 * called at interrupt time.
2075	 */
2076	if (pmap_initialized &&
2077	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
2078		pmap_insert_entry(pmap, va, mpte, m);
2079		pa |= PG_MANAGED;
2080	}
2081
2082	/*
2083	 * Increment counters
2084	 */
2085	pmap->pm_stats.resident_count++;
2086	if (wired)
2087		pmap->pm_stats.wired_count++;
2088
2089validate:
2090	/*
2091	 * Now validate mapping with desired protection/wiring.
2092	 */
2093	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
2094
2095	if (wired)
2096		newpte |= PG_W;
2097	if (va < UPT_MIN_ADDRESS)
2098		newpte |= PG_U;
2099	if (pmap == kernel_pmap)
2100		newpte |= pgeflag;
2101
2102	/*
2103	 * if the mapping or permission bits are different, we need
2104	 * to update the pte.
2105	 */
2106	if ((origpte & ~(PG_M|PG_A)) != newpte) {
2107		*pte = newpte | PG_A;
2108		/*if (origpte)*/ {
2109#ifdef SMP
2110			cpu_invlpg((void *)va);
2111			if (pmap->pm_active & PCPU_GET(other_cpus))
2112				smp_invltlb();
2113#else
2114			invltlb_1pg(va);
2115#endif
2116		}
2117	}
2118}
2119
2120/*
2121 * this code makes some *MAJOR* assumptions:
2122 * 1. Current pmap & pmap exists.
2123 * 2. Not wired.
2124 * 3. Read access.
2125 * 4. No page table pages.
2126 * 5. Tlbflush is deferred to calling procedure.
2127 * 6. Page IS managed.
2128 * but is *MUCH* faster than pmap_enter...
2129 */
2130
2131static vm_page_t
2132pmap_enter_quick(pmap, va, m, mpte)
2133	register pmap_t pmap;
2134	vm_offset_t va;
2135	vm_page_t m;
2136	vm_page_t mpte;
2137{
2138	unsigned *pte;
2139	vm_offset_t pa;
2140
2141	/*
2142	 * In the case that a page table page is not
2143	 * resident, we are creating it here.
2144	 */
2145	if (va < UPT_MIN_ADDRESS) {
2146		unsigned ptepindex;
2147		vm_offset_t ptepa;
2148
2149		/*
2150		 * Calculate pagetable page index
2151		 */
2152		ptepindex = va >> PDRSHIFT;
2153		if (mpte && (mpte->pindex == ptepindex)) {
2154			mpte->hold_count++;
2155		} else {
2156retry:
2157			/*
2158			 * Get the page directory entry
2159			 */
2160			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
2161
2162			/*
2163			 * If the page table page is mapped, we just increment
2164			 * the hold count, and activate it.
2165			 */
2166			if (ptepa) {
2167				if (ptepa & PG_PS)
2168					panic("pmap_enter_quick: unexpected mapping into 4MB page");
2169				if (pmap->pm_ptphint &&
2170					(pmap->pm_ptphint->pindex == ptepindex)) {
2171					mpte = pmap->pm_ptphint;
2172				} else {
2173					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
2174					pmap->pm_ptphint = mpte;
2175				}
2176				if (mpte == NULL)
2177					goto retry;
2178				mpte->hold_count++;
2179			} else {
2180				mpte = _pmap_allocpte(pmap, ptepindex);
2181			}
2182		}
2183	} else {
2184		mpte = NULL;
2185	}
2186
2187	/*
2188	 * This call to vtopte makes the assumption that we are
2189	 * entering the page into the current pmap.  In order to support
2190	 * quick entry into any pmap, one would likely use pmap_pte_quick.
2191	 * But that isn't as quick as vtopte.
2192	 */
2193	pte = (unsigned *)vtopte(va);
2194	if (*pte) {
2195		if (mpte)
2196			pmap_unwire_pte_hold(pmap, mpte);
2197		return 0;
2198	}
2199
2200	/*
2201	 * Enter on the PV list if part of our managed memory. Note that we
2202	 * raise IPL while manipulating pv_table since pmap_enter can be
2203	 * called at interrupt time.
2204	 */
2205	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
2206		pmap_insert_entry(pmap, va, mpte, m);
2207
2208	/*
2209	 * Increment counters
2210	 */
2211	pmap->pm_stats.resident_count++;
2212
2213	pa = VM_PAGE_TO_PHYS(m);
2214
2215	/*
2216	 * Now validate mapping with RO protection
2217	 */
2218	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
2219		*pte = pa | PG_V | PG_U;
2220	else
2221		*pte = pa | PG_V | PG_U | PG_MANAGED;
2222
2223	return mpte;
2224}
2225
2226/*
2227 * Make a temporary mapping for a physical address.  This is only intended
2228 * to be used for panic dumps.
2229 */
2230void *
2231pmap_kenter_temporary(vm_offset_t pa, int i)
2232{
2233	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
2234	return ((void *)crashdumpmap);
2235}
2236
2237#define MAX_INIT_PT (96)
2238/*
2239 * pmap_object_init_pt preloads the ptes for a given object
2240 * into the specified pmap.  This eliminates the blast of soft
2241 * faults on process startup and immediately after an mmap.
2242 */
2243void
2244pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
2245	pmap_t pmap;
2246	vm_offset_t addr;
2247	vm_object_t object;
2248	vm_pindex_t pindex;
2249	vm_size_t size;
2250	int limit;
2251{
2252	vm_offset_t tmpidx;
2253	int psize;
2254	vm_page_t p, mpte;
2255	int objpgs;
2256
2257	if (pmap == NULL || object == NULL)
2258		return;
2259
2260	/*
2261	 * This code maps large physical mmap regions into the
2262	 * processor address space.  Note that some shortcuts
2263	 * are taken, but the code works.
2264	 */
2265	if (pseflag &&
2266		(object->type == OBJT_DEVICE) &&
2267		((addr & (NBPDR - 1)) == 0) &&
2268		((size & (NBPDR - 1)) == 0) ) {
2269		int i;
2270		vm_page_t m[1];
2271		unsigned int ptepindex;
2272		int npdes;
2273		vm_offset_t ptepa;
2274
2275		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
2276			return;
2277
2278retry:
2279		p = vm_page_lookup(object, pindex);
2280		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
2281			goto retry;
2282
2283		if (p == NULL) {
2284			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
2285			if (p == NULL)
2286				return;
2287			m[0] = p;
2288
2289			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
2290				vm_page_free(p);
2291				return;
2292			}
2293
2294			p = vm_page_lookup(object, pindex);
2295			vm_page_wakeup(p);
2296		}
2297
2298		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
2299		if (ptepa & (NBPDR - 1)) {
2300			return;
2301		}
2302
2303		p->valid = VM_PAGE_BITS_ALL;
2304
2305		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
2306		npdes = size >> PDRSHIFT;
2307		for(i=0;i<npdes;i++) {
2308			pmap->pm_pdir[ptepindex] =
2309				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
2310			ptepa += NBPDR;
2311			ptepindex += 1;
2312		}
2313		vm_page_flag_set(p, PG_MAPPED);
2314		invltlb();
2315		return;
2316	}
2317
2318	psize = i386_btop(size);
2319
2320	if ((object->type != OBJT_VNODE) ||
2321		(limit && (psize > MAX_INIT_PT) &&
2322			(object->resident_page_count > MAX_INIT_PT))) {
2323		return;
2324	}
2325
2326	if (psize + pindex > object->size) {
2327		if (object->size < pindex)
2328			return;
2329		psize = object->size - pindex;
2330	}
2331
2332	mpte = NULL;
2333	/*
2334	 * if we are processing a major portion of the object, then scan the
2335	 * entire thing.
2336	 */
2337	if (psize > (object->resident_page_count >> 2)) {
2338		objpgs = psize;
2339
2340		for (p = TAILQ_FIRST(&object->memq);
2341		    ((objpgs > 0) && (p != NULL));
2342		    p = TAILQ_NEXT(p, listq)) {
2343
2344			tmpidx = p->pindex;
2345			if (tmpidx < pindex) {
2346				continue;
2347			}
2348			tmpidx -= pindex;
2349			if (tmpidx >= psize) {
2350				continue;
2351			}
2352			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2353				(p->busy == 0) &&
2354			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2355				if ((p->queue - p->pc) == PQ_CACHE)
2356					vm_page_deactivate(p);
2357				vm_page_busy(p);
2358				mpte = pmap_enter_quick(pmap,
2359					addr + i386_ptob(tmpidx), p, mpte);
2360				vm_page_flag_set(p, PG_MAPPED);
2361				vm_page_wakeup(p);
2362			}
2363			objpgs -= 1;
2364		}
2365	} else {
2366		/*
2367		 * else lookup the pages one-by-one.
2368		 */
2369		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
2370			p = vm_page_lookup(object, tmpidx + pindex);
2371			if (p &&
2372			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2373				(p->busy == 0) &&
2374			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2375				if ((p->queue - p->pc) == PQ_CACHE)
2376					vm_page_deactivate(p);
2377				vm_page_busy(p);
2378				mpte = pmap_enter_quick(pmap,
2379					addr + i386_ptob(tmpidx), p, mpte);
2380				vm_page_flag_set(p, PG_MAPPED);
2381				vm_page_wakeup(p);
2382			}
2383		}
2384	}
2385	return;
2386}
2387
2388/*
2389 * pmap_prefault provides a quick way of clustering
2390 * pagefaults into a processes address space.  It is a "cousin"
2391 * of pmap_object_init_pt, except it runs at page fault time instead
2392 * of mmap time.
2393 */
2394#define PFBAK 4
2395#define PFFOR 4
2396#define PAGEORDER_SIZE (PFBAK+PFFOR)
2397
2398static int pmap_prefault_pageorder[] = {
2399	-PAGE_SIZE, PAGE_SIZE,
2400	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
2401	-3 * PAGE_SIZE, 3 * PAGE_SIZE
2402	-4 * PAGE_SIZE, 4 * PAGE_SIZE
2403};
2404
2405void
2406pmap_prefault(pmap, addra, entry)
2407	pmap_t pmap;
2408	vm_offset_t addra;
2409	vm_map_entry_t entry;
2410{
2411	int i;
2412	vm_offset_t starta;
2413	vm_offset_t addr;
2414	vm_pindex_t pindex;
2415	vm_page_t m, mpte;
2416	vm_object_t object;
2417
2418	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
2419		return;
2420
2421	object = entry->object.vm_object;
2422
2423	starta = addra - PFBAK * PAGE_SIZE;
2424	if (starta < entry->start) {
2425		starta = entry->start;
2426	} else if (starta > addra) {
2427		starta = 0;
2428	}
2429
2430	mpte = NULL;
2431	for (i = 0; i < PAGEORDER_SIZE; i++) {
2432		vm_object_t lobject;
2433		unsigned *pte;
2434
2435		addr = addra + pmap_prefault_pageorder[i];
2436		if (addr > addra + (PFFOR * PAGE_SIZE))
2437			addr = 0;
2438
2439		if (addr < starta || addr >= entry->end)
2440			continue;
2441
2442		if ((*pmap_pde(pmap, addr)) == NULL)
2443			continue;
2444
2445		pte = (unsigned *) vtopte(addr);
2446		if (*pte)
2447			continue;
2448
2449		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2450		lobject = object;
2451		for (m = vm_page_lookup(lobject, pindex);
2452		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2453		    lobject = lobject->backing_object) {
2454			if (lobject->backing_object_offset & PAGE_MASK)
2455				break;
2456			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2457			m = vm_page_lookup(lobject->backing_object, pindex);
2458		}
2459
2460		/*
2461		 * give-up when a page is not in memory
2462		 */
2463		if (m == NULL)
2464			break;
2465
2466		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2467			(m->busy == 0) &&
2468		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2469
2470			if ((m->queue - m->pc) == PQ_CACHE) {
2471				vm_page_deactivate(m);
2472			}
2473			vm_page_busy(m);
2474			mpte = pmap_enter_quick(pmap, addr, m, mpte);
2475			vm_page_flag_set(m, PG_MAPPED);
2476			vm_page_wakeup(m);
2477		}
2478	}
2479}
2480
2481/*
2482 *	Routine:	pmap_change_wiring
2483 *	Function:	Change the wiring attribute for a map/virtual-address
2484 *			pair.
2485 *	In/out conditions:
2486 *			The mapping must already exist in the pmap.
2487 */
2488void
2489pmap_change_wiring(pmap, va, wired)
2490	register pmap_t pmap;
2491	vm_offset_t va;
2492	boolean_t wired;
2493{
2494	register unsigned *pte;
2495
2496	if (pmap == NULL)
2497		return;
2498
2499	pte = pmap_pte(pmap, va);
2500
2501	if (wired && !pmap_pte_w(pte))
2502		pmap->pm_stats.wired_count++;
2503	else if (!wired && pmap_pte_w(pte))
2504		pmap->pm_stats.wired_count--;
2505
2506	/*
2507	 * Wiring is not a hardware characteristic so there is no need to
2508	 * invalidate TLB.
2509	 */
2510	pmap_pte_set_w(pte, wired);
2511}
2512
2513
2514
2515/*
2516 *	Copy the range specified by src_addr/len
2517 *	from the source map to the range dst_addr/len
2518 *	in the destination map.
2519 *
2520 *	This routine is only advisory and need not do anything.
2521 */
2522
2523void
2524pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2525	pmap_t dst_pmap, src_pmap;
2526	vm_offset_t dst_addr;
2527	vm_size_t len;
2528	vm_offset_t src_addr;
2529{
2530	vm_offset_t addr;
2531	vm_offset_t end_addr = src_addr + len;
2532	vm_offset_t pdnxt;
2533	unsigned src_frame, dst_frame;
2534	vm_page_t m;
2535
2536	if (dst_addr != src_addr)
2537		return;
2538
2539	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2540	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
2541		return;
2542	}
2543
2544	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
2545	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
2546		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
2547#if defined(SMP)
2548		/* The page directory is not shared between CPUs */
2549		cpu_invltlb();
2550#else
2551		invltlb();
2552#endif
2553	}
2554
2555	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
2556		unsigned *src_pte, *dst_pte;
2557		vm_page_t dstmpte, srcmpte;
2558		vm_offset_t srcptepaddr;
2559		unsigned ptepindex;
2560
2561		if (addr >= UPT_MIN_ADDRESS)
2562			panic("pmap_copy: invalid to pmap_copy page tables\n");
2563
2564		/*
2565		 * Don't let optional prefaulting of pages make us go
2566		 * way below the low water mark of free pages or way
2567		 * above high water mark of used pv entries.
2568		 */
2569		if (cnt.v_free_count < cnt.v_free_reserved ||
2570		    pv_entry_count > pv_entry_high_water)
2571			break;
2572
2573		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
2574		ptepindex = addr >> PDRSHIFT;
2575
2576		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
2577		if (srcptepaddr == 0)
2578			continue;
2579
2580		if (srcptepaddr & PG_PS) {
2581			if (dst_pmap->pm_pdir[ptepindex] == 0) {
2582				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
2583				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
2584			}
2585			continue;
2586		}
2587
2588		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
2589		if ((srcmpte == NULL) ||
2590			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
2591			continue;
2592
2593		if (pdnxt > end_addr)
2594			pdnxt = end_addr;
2595
2596		src_pte = (unsigned *) vtopte(addr);
2597		dst_pte = (unsigned *) avtopte(addr);
2598		while (addr < pdnxt) {
2599			unsigned ptetemp;
2600			ptetemp = *src_pte;
2601			/*
2602			 * we only virtual copy managed pages
2603			 */
2604			if ((ptetemp & PG_MANAGED) != 0) {
2605				/*
2606				 * We have to check after allocpte for the
2607				 * pte still being around...  allocpte can
2608				 * block.
2609				 */
2610				dstmpte = pmap_allocpte(dst_pmap, addr);
2611				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
2612					/*
2613					 * Clear the modified and
2614					 * accessed (referenced) bits
2615					 * during the copy.
2616					 */
2617					m = PHYS_TO_VM_PAGE(ptetemp);
2618					*dst_pte = ptetemp & ~(PG_M | PG_A);
2619					dst_pmap->pm_stats.resident_count++;
2620					pmap_insert_entry(dst_pmap, addr,
2621						dstmpte, m);
2622	 			} else {
2623					pmap_unwire_pte_hold(dst_pmap, dstmpte);
2624				}
2625				if (dstmpte->hold_count >= srcmpte->hold_count)
2626					break;
2627			}
2628			addr += PAGE_SIZE;
2629			src_pte++;
2630			dst_pte++;
2631		}
2632	}
2633}
2634
2635/*
2636 *	Routine:	pmap_kernel
2637 *	Function:
2638 *		Returns the physical map handle for the kernel.
2639 */
2640pmap_t
2641pmap_kernel()
2642{
2643	return (kernel_pmap);
2644}
2645
2646/*
2647 *	pmap_zero_page zeros the specified hardware page by mapping
2648 *	the page into KVM and using bzero to clear its contents.
2649 */
2650void
2651pmap_zero_page(phys)
2652	vm_offset_t phys;
2653{
2654
2655	if (*(int *) CMAP2)
2656		panic("pmap_zero_page: CMAP2 busy");
2657
2658	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2659	invltlb_1pg((vm_offset_t)CADDR2);
2660
2661#if defined(I686_CPU)
2662	if (cpu_class == CPUCLASS_686)
2663		i686_pagezero(CADDR2);
2664	else
2665#endif
2666		bzero(CADDR2, PAGE_SIZE);
2667	*(int *) CMAP2 = 0;
2668}
2669
2670/*
2671 *	pmap_zero_page_area zeros the specified hardware page by mapping
2672 *	the page into KVM and using bzero to clear its contents.
2673 *
2674 *	off and size may not cover an area beyond a single hardware page.
2675 */
2676void
2677pmap_zero_page_area(phys, off, size)
2678	vm_offset_t phys;
2679	int off;
2680	int size;
2681{
2682
2683	if (*(int *) CMAP2)
2684		panic("pmap_zero_page: CMAP2 busy");
2685
2686	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
2687	invltlb_1pg((vm_offset_t)CADDR2);
2688
2689#if defined(I686_CPU)
2690	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
2691		i686_pagezero(CADDR2);
2692	else
2693#endif
2694		bzero((char *)CADDR2 + off, size);
2695	*(int *) CMAP2 = 0;
2696}
2697
2698/*
2699 *	pmap_copy_page copies the specified (machine independent)
2700 *	page by mapping the page into virtual memory and using
2701 *	bcopy to copy the page, one machine dependent page at a
2702 *	time.
2703 */
2704void
2705pmap_copy_page(src, dst)
2706	vm_offset_t src;
2707	vm_offset_t dst;
2708{
2709
2710	if (*(int *) CMAP1)
2711		panic("pmap_copy_page: CMAP1 busy");
2712	if (*(int *) CMAP2)
2713		panic("pmap_copy_page: CMAP2 busy");
2714
2715	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
2716	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
2717#ifdef I386_CPU
2718	invltlb();
2719#else
2720	invlpg((u_int)CADDR1);
2721	invlpg((u_int)CADDR2);
2722#endif
2723
2724	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2725
2726	*(int *) CMAP1 = 0;
2727	*(int *) CMAP2 = 0;
2728}
2729
2730
2731/*
2732 *	Routine:	pmap_pageable
2733 *	Function:
2734 *		Make the specified pages (by pmap, offset)
2735 *		pageable (or not) as requested.
2736 *
2737 *		A page which is not pageable may not take
2738 *		a fault; therefore, its page table entry
2739 *		must remain valid for the duration.
2740 *
2741 *		This routine is merely advisory; pmap_enter
2742 *		will specify that these pages are to be wired
2743 *		down (or not) as appropriate.
2744 */
2745void
2746pmap_pageable(pmap, sva, eva, pageable)
2747	pmap_t pmap;
2748	vm_offset_t sva, eva;
2749	boolean_t pageable;
2750{
2751}
2752
2753/*
2754 * this routine returns true if a physical page resides
2755 * in the given pmap.
2756 */
2757boolean_t
2758pmap_page_exists(pmap, m)
2759	pmap_t pmap;
2760	vm_page_t m;
2761{
2762	register pv_entry_t pv;
2763	int s;
2764
2765	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2766		return FALSE;
2767
2768	s = splvm();
2769
2770	/*
2771	 * Not found, check current mappings returning immediately if found.
2772	 */
2773	for (pv = TAILQ_FIRST(&m->md.pv_list);
2774		pv;
2775		pv = TAILQ_NEXT(pv, pv_list)) {
2776		if (pv->pv_pmap == pmap) {
2777			splx(s);
2778			return TRUE;
2779		}
2780	}
2781	splx(s);
2782	return (FALSE);
2783}
2784
2785#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2786/*
2787 * Remove all pages from specified address space
2788 * this aids process exit speeds.  Also, this code
2789 * is special cased for current process only, but
2790 * can have the more generic (and slightly slower)
2791 * mode enabled.  This is much faster than pmap_remove
2792 * in the case of running down an entire address space.
2793 */
2794void
2795pmap_remove_pages(pmap, sva, eva)
2796	pmap_t pmap;
2797	vm_offset_t sva, eva;
2798{
2799	unsigned *pte, tpte;
2800	pv_entry_t pv, npv;
2801	int s;
2802	vm_page_t m;
2803
2804#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2805	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
2806		printf("warning: pmap_remove_pages called with non-current pmap\n");
2807		return;
2808	}
2809#endif
2810
2811	s = splvm();
2812	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
2813		pv;
2814		pv = npv) {
2815
2816		if (pv->pv_va >= eva || pv->pv_va < sva) {
2817			npv = TAILQ_NEXT(pv, pv_plist);
2818			continue;
2819		}
2820
2821#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2822		pte = (unsigned *)vtopte(pv->pv_va);
2823#else
2824		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2825#endif
2826		tpte = *pte;
2827
2828/*
2829 * We cannot remove wired pages from a process' mapping at this time
2830 */
2831		if (tpte & PG_W) {
2832			npv = TAILQ_NEXT(pv, pv_plist);
2833			continue;
2834		}
2835		*pte = 0;
2836
2837		m = PHYS_TO_VM_PAGE(tpte);
2838
2839		KASSERT(m < &vm_page_array[vm_page_array_size],
2840			("pmap_remove_pages: bad tpte %x", tpte));
2841
2842		pv->pv_pmap->pm_stats.resident_count--;
2843
2844		/*
2845		 * Update the vm_page_t clean and reference bits.
2846		 */
2847		if (tpte & PG_M) {
2848			vm_page_dirty(m);
2849		}
2850
2851
2852		npv = TAILQ_NEXT(pv, pv_plist);
2853		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2854
2855		m->md.pv_list_count--;
2856		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2857		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
2858			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
2859		}
2860
2861		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2862		free_pv_entry(pv);
2863	}
2864	splx(s);
2865	pmap_TLB_invalidate_all(pmap);
2866}
2867
2868/*
2869 * pmap_testbit tests bits in pte's
2870 * note that the testbit/changebit routines are inline,
2871 * and a lot of things compile-time evaluate.
2872 */
2873static boolean_t
2874pmap_testbit(m, bit)
2875	vm_page_t m;
2876	int bit;
2877{
2878	pv_entry_t pv;
2879	unsigned *pte;
2880	int s;
2881
2882	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2883		return FALSE;
2884
2885	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2886		return FALSE;
2887
2888	s = splvm();
2889
2890	for (pv = TAILQ_FIRST(&m->md.pv_list);
2891		pv;
2892		pv = TAILQ_NEXT(pv, pv_list)) {
2893
2894		/*
2895		 * if the bit being tested is the modified bit, then
2896		 * mark clean_map and ptes as never
2897		 * modified.
2898		 */
2899		if (bit & (PG_A|PG_M)) {
2900			if (!pmap_track_modified(pv->pv_va))
2901				continue;
2902		}
2903
2904#if defined(PMAP_DIAGNOSTIC)
2905		if (!pv->pv_pmap) {
2906			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
2907			continue;
2908		}
2909#endif
2910		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2911		if (*pte & bit) {
2912			splx(s);
2913			return TRUE;
2914		}
2915	}
2916	splx(s);
2917	return (FALSE);
2918}
2919
2920/*
2921 * this routine is used to modify bits in ptes
2922 */
2923static __inline void
2924pmap_changebit(m, bit, setem)
2925	vm_page_t m;
2926	int bit;
2927	boolean_t setem;
2928{
2929	register pv_entry_t pv;
2930	register unsigned *pte;
2931	int s;
2932
2933	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2934		return;
2935
2936	s = splvm();
2937
2938	/*
2939	 * Loop over all current mappings setting/clearing as appropos If
2940	 * setting RO do we need to clear the VAC?
2941	 */
2942	for (pv = TAILQ_FIRST(&m->md.pv_list);
2943		pv;
2944		pv = TAILQ_NEXT(pv, pv_list)) {
2945
2946		/*
2947		 * don't write protect pager mappings
2948		 */
2949		if (!setem && (bit == PG_RW)) {
2950			if (!pmap_track_modified(pv->pv_va))
2951				continue;
2952		}
2953
2954#if defined(PMAP_DIAGNOSTIC)
2955		if (!pv->pv_pmap) {
2956			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
2957			continue;
2958		}
2959#endif
2960
2961		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
2962
2963		if (setem) {
2964			*(int *)pte |= bit;
2965			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
2966		} else {
2967			vm_offset_t pbits = *(vm_offset_t *)pte;
2968			if (pbits & bit) {
2969				if (bit == PG_RW) {
2970					if (pbits & PG_M) {
2971						vm_page_dirty(m);
2972					}
2973					*(int *)pte = pbits & ~(PG_M|PG_RW);
2974				} else {
2975					*(int *)pte = pbits & ~bit;
2976				}
2977				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
2978			}
2979		}
2980	}
2981	splx(s);
2982}
2983
2984/*
2985 *      pmap_page_protect:
2986 *
2987 *      Lower the permission for all mappings to a given page.
2988 */
2989void
2990pmap_page_protect(vm_page_t m, vm_prot_t prot)
2991{
2992	if ((prot & VM_PROT_WRITE) == 0) {
2993		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2994			pmap_changebit(m, PG_RW, FALSE);
2995		} else {
2996			pmap_remove_all(m);
2997		}
2998	}
2999}
3000
3001vm_offset_t
3002pmap_phys_address(ppn)
3003	int ppn;
3004{
3005	return (i386_ptob(ppn));
3006}
3007
3008/*
3009 *	pmap_ts_referenced:
3010 *
3011 *	Return the count of reference bits for a page, clearing all of them.
3012 */
3013int
3014pmap_ts_referenced(vm_page_t m)
3015{
3016	register pv_entry_t pv, pvf, pvn;
3017	unsigned *pte;
3018	int s;
3019	int rtval = 0;
3020
3021	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
3022		return (rtval);
3023
3024	s = splvm();
3025
3026	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
3027
3028		pvf = pv;
3029
3030		do {
3031			pvn = TAILQ_NEXT(pv, pv_list);
3032
3033			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
3034
3035			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
3036
3037			if (!pmap_track_modified(pv->pv_va))
3038				continue;
3039
3040			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
3041
3042			if (pte && (*pte & PG_A)) {
3043				*pte &= ~PG_A;
3044
3045				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
3046
3047				rtval++;
3048				if (rtval > 4) {
3049					break;
3050				}
3051			}
3052		} while ((pv = pvn) != NULL && pv != pvf);
3053	}
3054	splx(s);
3055
3056	return (rtval);
3057}
3058
3059/*
3060 *	pmap_is_modified:
3061 *
3062 *	Return whether or not the specified physical page was modified
3063 *	in any physical maps.
3064 */
3065boolean_t
3066pmap_is_modified(vm_page_t m)
3067{
3068	return pmap_testbit(m, PG_M);
3069}
3070
3071/*
3072 *	Clear the modify bits on the specified physical page.
3073 */
3074void
3075pmap_clear_modify(vm_page_t m)
3076{
3077	pmap_changebit(m, PG_M, FALSE);
3078}
3079
3080/*
3081 *	pmap_clear_reference:
3082 *
3083 *	Clear the reference bit on the specified physical page.
3084 */
3085void
3086pmap_clear_reference(vm_page_t m)
3087{
3088	pmap_changebit(m, PG_A, FALSE);
3089}
3090
3091/*
3092 * Miscellaneous support routines follow
3093 */
3094
3095static void
3096i386_protection_init()
3097{
3098	register int *kp, prot;
3099
3100	kp = protection_codes;
3101	for (prot = 0; prot < 8; prot++) {
3102		switch (prot) {
3103		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
3104			/*
3105			 * Read access is also 0. There isn't any execute bit,
3106			 * so just make it readable.
3107			 */
3108		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
3109		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
3110		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
3111			*kp++ = 0;
3112			break;
3113		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
3114		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
3115		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
3116		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
3117			*kp++ = PG_RW;
3118			break;
3119		}
3120	}
3121}
3122
3123/*
3124 * Map a set of physical memory pages into the kernel virtual
3125 * address space. Return a pointer to where it is mapped. This
3126 * routine is intended to be used for mapping device memory,
3127 * NOT real memory.
3128 */
3129void *
3130pmap_mapdev(pa, size)
3131	vm_offset_t pa;
3132	vm_size_t size;
3133{
3134	vm_offset_t va, tmpva, offset;
3135	unsigned *pte;
3136
3137	offset = pa & PAGE_MASK;
3138	size = roundup(offset + size, PAGE_SIZE);
3139
3140	va = kmem_alloc_pageable(kernel_map, size);
3141	if (!va)
3142		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3143
3144	pa = pa & PG_FRAME;
3145	for (tmpva = va; size > 0;) {
3146		pte = (unsigned *)vtopte(tmpva);
3147		*pte = pa | PG_RW | PG_V | pgeflag;
3148		size -= PAGE_SIZE;
3149		tmpva += PAGE_SIZE;
3150		pa += PAGE_SIZE;
3151	}
3152	invltlb();
3153
3154	return ((void *)(va + offset));
3155}
3156
3157void
3158pmap_unmapdev(va, size)
3159	vm_offset_t va;
3160	vm_size_t size;
3161{
3162	vm_offset_t base, offset;
3163
3164	base = va & PG_FRAME;
3165	offset = va & PAGE_MASK;
3166	size = roundup(offset + size, PAGE_SIZE);
3167	kmem_free(kernel_map, base, size);
3168}
3169
3170/*
3171 * perform the pmap work for mincore
3172 */
3173int
3174pmap_mincore(pmap, addr)
3175	pmap_t pmap;
3176	vm_offset_t addr;
3177{
3178
3179	unsigned *ptep, pte;
3180	vm_page_t m;
3181	int val = 0;
3182
3183	ptep = pmap_pte(pmap, addr);
3184	if (ptep == 0) {
3185		return 0;
3186	}
3187
3188	if ((pte = *ptep) != 0) {
3189		vm_offset_t pa;
3190
3191		val = MINCORE_INCORE;
3192		if ((pte & PG_MANAGED) == 0)
3193			return val;
3194
3195		pa = pte & PG_FRAME;
3196
3197		m = PHYS_TO_VM_PAGE(pa);
3198
3199		/*
3200		 * Modified by us
3201		 */
3202		if (pte & PG_M)
3203			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
3204		/*
3205		 * Modified by someone
3206		 */
3207		else if (m->dirty || pmap_is_modified(m))
3208			val |= MINCORE_MODIFIED_OTHER;
3209		/*
3210		 * Referenced by us
3211		 */
3212		if (pte & PG_A)
3213			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
3214
3215		/*
3216		 * Referenced by someone
3217		 */
3218		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
3219			val |= MINCORE_REFERENCED_OTHER;
3220			vm_page_flag_set(m, PG_REFERENCED);
3221		}
3222	}
3223	return val;
3224}
3225
3226void
3227pmap_activate(struct proc *p)
3228{
3229	pmap_t	pmap;
3230
3231	pmap = vmspace_pmap(p->p_vmspace);
3232#if defined(SMP)
3233	pmap->pm_active |= 1 << PCPU_GET(cpuid);
3234#else
3235	pmap->pm_active |= 1;
3236#endif
3237#if defined(SWTCH_OPTIM_STATS)
3238	tlb_flush_count++;
3239#endif
3240	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
3241}
3242
3243vm_offset_t
3244pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
3245{
3246
3247	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
3248		return addr;
3249	}
3250
3251	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
3252	return addr;
3253}
3254
3255
3256#if defined(PMAP_DEBUG)
3257pmap_pid_dump(int pid)
3258{
3259	pmap_t pmap;
3260	struct proc *p;
3261	int npte = 0;
3262	int index;
3263	ALLPROC_LOCK(AP_SHARED);
3264	LIST_FOREACH(p, &allproc, p_list) {
3265		if (p->p_pid != pid)
3266			continue;
3267
3268		if (p->p_vmspace) {
3269			int i,j;
3270			index = 0;
3271			pmap = vmspace_pmap(p->p_vmspace);
3272			for(i=0;i<1024;i++) {
3273				pd_entry_t *pde;
3274				unsigned *pte;
3275				unsigned base = i << PDRSHIFT;
3276
3277				pde = &pmap->pm_pdir[i];
3278				if (pde && pmap_pde_v(pde)) {
3279					for(j=0;j<1024;j++) {
3280						unsigned va = base + (j << PAGE_SHIFT);
3281						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
3282							if (index) {
3283								index = 0;
3284								printf("\n");
3285							}
3286							ALLPROC_LOCK(AP_RELEASE);
3287							return npte;
3288						}
3289						pte = pmap_pte_quick( pmap, va);
3290						if (pte && pmap_pte_v(pte)) {
3291							vm_offset_t pa;
3292							vm_page_t m;
3293							pa = *(int *)pte;
3294							m = PHYS_TO_VM_PAGE(pa);
3295							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
3296								va, pa, m->hold_count, m->wire_count, m->flags);
3297							npte++;
3298							index++;
3299							if (index >= 2) {
3300								index = 0;
3301								printf("\n");
3302							} else {
3303								printf(" ");
3304							}
3305						}
3306					}
3307				}
3308			}
3309		}
3310	}
3311	ALLPROC_LOCK(AP_RELEASE);
3312	return npte;
3313}
3314#endif
3315
3316#if defined(DEBUG)
3317
3318static void	pads __P((pmap_t pm));
3319void		pmap_pvdump __P((vm_offset_t pa));
3320
3321/* print address space of pmap*/
3322static void
3323pads(pm)
3324	pmap_t pm;
3325{
3326	unsigned va, i, j;
3327	unsigned *ptep;
3328
3329	if (pm == kernel_pmap)
3330		return;
3331	for (i = 0; i < 1024; i++)
3332		if (pm->pm_pdir[i])
3333			for (j = 0; j < 1024; j++) {
3334				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
3335				if (pm == kernel_pmap && va < KERNBASE)
3336					continue;
3337				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
3338					continue;
3339				ptep = pmap_pte_quick(pm, va);
3340				if (pmap_pte_v(ptep))
3341					printf("%x:%x ", va, *(int *) ptep);
3342			};
3343
3344}
3345
3346void
3347pmap_pvdump(pa)
3348	vm_offset_t pa;
3349{
3350	register pv_entry_t pv;
3351	vm_page_t m;
3352
3353	printf("pa %x", pa);
3354	m = PHYS_TO_VM_PAGE(pa);
3355	for (pv = TAILQ_FIRST(&m->md.pv_list);
3356		pv;
3357		pv = TAILQ_NEXT(pv, pv_list)) {
3358#ifdef used_to_be
3359		printf(" -> pmap %p, va %x, flags %x",
3360		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
3361#endif
3362		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3363		pads(pv->pv_pmap);
3364	}
3365	printf(" ");
3366}
3367#endif
3368