pmap.c revision 16057
1224145Sdim/*
2224145Sdim * Copyright (c) 1991 Regents of the University of California.
3246259Sdim * All rights reserved.
4246259Sdim * Copyright (c) 1994 John S. Dyson
5224145Sdim * All rights reserved.
6224145Sdim * Copyright (c) 1994 David Greenman
7224145Sdim * All rights reserved.
8226633Sdim *
9234353Sdim * This code is derived from software contributed to Berkeley by
10263508Sdim * the Systems Programming Group of the University of Utah Computer
11226633Sdim * Science Department and William Jolitz of UUNET Technologies Inc.
12226633Sdim *
13234353Sdim * Redistribution and use in source and binary forms, with or without
14263508Sdim * modification, are permitted provided that the following conditions
15263508Sdim * are met:
16234353Sdim * 1. Redistributions of source code must retain the above copyright
17226633Sdim *    notice, this list of conditions and the following disclaimer.
18224145Sdim * 2. Redistributions in binary form must reproduce the above copyright
19224145Sdim *    notice, this list of conditions and the following disclaimer in the
20224145Sdim *    documentation and/or other materials provided with the distribution.
21224145Sdim * 3. All advertising materials mentioning features or use of this software
22224145Sdim *    must display the following acknowledgement:
23224145Sdim *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.96 1996/05/31 00:37:48 dyson Exp $
43 */
44
45/*
46 * Derived from hp300 version by Mike Hibler, this version by William
47 * Jolitz uses a recursive map [a pde points to the page directory] to
48 * map the page tables using the pagetables themselves. This is done to
49 * reduce the impact on kernel virtual memory for lots of sparse address
50 * space, and to reduce the cost of memory to each process.
51 *
52 *	Derived from: hp300/@(#)pmap.c	7.1 (Berkeley) 12/5/90
53 */
54
55/*
56 *	Manages physical address maps.
57 *
58 *	In addition to hardware address maps, this
59 *	module is called upon to provide software-use-only
60 *	maps which may or may not be stored in the same
61 *	form as hardware maps.  These pseudo-maps are
62 *	used to store intermediate results from copy
63 *	operations to and from address spaces.
64 *
65 *	Since the information managed by this module is
66 *	also stored by the logical address mapping module,
67 *	this module may throw away valid virtual-to-physical
68 *	mappings at almost any time.  However, invalidations
69 *	of virtual-to-physical mappings must be done as
70 *	requested.
71 *
72 *	In order to cope with hardware architectures which
73 *	make virtual-to-physical map invalidates expensive,
74 *	this module may delay invalidate or reduced protection
75 *	operations until such time as they are actually
76 *	necessary.  This module is given full information as
77 *	to which processors are currently using which maps,
78 *	and to when physical maps must be made correct.
79 */
80
81#include <sys/param.h>
82#include <sys/systm.h>
83#include <sys/proc.h>
84#include <sys/malloc.h>
85#include <sys/msgbuf.h>
86#include <sys/queue.h>
87#include <sys/vmmeter.h>
88#include <sys/mman.h>
89
90#include <vm/vm.h>
91#include <vm/vm_param.h>
92#include <vm/vm_prot.h>
93#include <vm/lock.h>
94#include <vm/vm_kern.h>
95#include <vm/vm_page.h>
96#include <vm/vm_map.h>
97#include <vm/vm_object.h>
98#include <vm/vm_extern.h>
99#include <vm/vm_pageout.h>
100
101#include <machine/pcb.h>
102#include <machine/cputypes.h>
103#include <machine/md_var.h>
104
105#include <i386/isa/isa.h>
106
107#define PMAP_KEEP_PDIRS
108
109#if defined(DIAGNOSTIC)
110#define PMAP_DIAGNOSTIC
111#endif
112
113static void	init_pv_entries __P((int));
114
115/*
116 * Get PDEs and PTEs for user/kernel address space
117 */
118#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
119#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
120
121#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
122#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
123#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
124#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
125#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
126
127#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
128#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
129
130/*
131 * Given a map and a machine independent protection code,
132 * convert to a vax protection code.
133 */
134#define pte_prot(m, p)	(protection_codes[p])
135static int protection_codes[8];
136
137static struct pmap kernel_pmap_store;
138pmap_t kernel_pmap;
139
140vm_offset_t avail_start;	/* PA of first available physical page */
141vm_offset_t avail_end;		/* PA of last available physical page */
142vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
143vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
144static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
145static vm_offset_t vm_first_phys;
146
147static int nkpt;
148static vm_page_t nkpg;
149vm_offset_t kernel_vm_end;
150
151extern vm_offset_t clean_sva, clean_eva;
152extern int cpu_class;
153
154#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2)
155
156/*
157 * Data for the pv entry allocation mechanism
158 */
159static int pv_freelistcnt;
160static pv_entry_t pv_freelist;
161static vm_offset_t pvva;
162static int npvvapg;
163
164/*
165 * All those kernel PT submaps that BSD is so fond of
166 */
167pt_entry_t *CMAP1;
168static pt_entry_t *CMAP2, *ptmmap;
169static pv_entry_t *pv_table;
170caddr_t CADDR1, ptvmmap;
171static caddr_t CADDR2;
172static pt_entry_t *msgbufmap;
173struct msgbuf *msgbufp;
174
175static void	free_pv_entry __P((pv_entry_t pv));
176static __inline unsigned * get_ptbase __P((pmap_t pmap));
177static pv_entry_t get_pv_entry __P((void));
178static void	i386_protection_init __P((void));
179static void	pmap_alloc_pv_entry __P((void));
180static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
181static void	pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
182				      vm_offset_t pa));
183static int	pmap_is_managed __P((vm_offset_t pa));
184static void	pmap_remove_all __P((vm_offset_t pa));
185static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
186static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv,
187					vm_offset_t va));
188static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
189					vm_offset_t sva));
190static boolean_t
191		pmap_testbit __P((vm_offset_t pa, int bit));
192static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
193		vm_page_t mpte, vm_offset_t pa));
194
195static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
196static void pmap_remove_pte_mapping __P((vm_offset_t pa));
197static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
198static vm_page_t _pmap_allocpte __P((pmap_t pmap, vm_offset_t va, int ptepindex));
199
200#define PDSTACKMAX 16
201static vm_offset_t pdstack[PDSTACKMAX];
202static int pdstackptr;
203
204#if defined(PMAP_DIAGNOSTIC)
205
206/*
207 * This code checks for non-writeable/modified pages.
208 * This should be an invalid condition.
209 */
210static int
211pmap_nw_modified(pt_entry_t ptea) {
212	int pte;
213
214	pte = (int) ptea;
215
216	if ((pte & (PG_M|PG_RW)) == PG_M)
217		return 1;
218	else
219		return 0;
220}
221#endif
222
223/*
224 * The below are finer grained pmap_update routines.  These eliminate
225 * the gratuitious tlb flushes on non-i386 architectures.
226 */
227static __inline void
228pmap_update_1pg( vm_offset_t va) {
229#if defined(I386_CPU)
230	if (cpu_class == CPUCLASS_386)
231		pmap_update();
232	else
233#endif
234		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va));
235}
236
237static __inline void
238pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) {
239#if defined(I386_CPU)
240	if (cpu_class == CPUCLASS_386) {
241		pmap_update();
242	} else
243#endif
244	{
245		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1));
246		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2));
247	}
248}
249
250static __inline __pure unsigned *
251get_ptbase(pmap)
252	pmap_t pmap;
253{
254	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
255
256	/* are we current address space or kernel? */
257	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
258		return (unsigned *) PTmap;
259	}
260	/* otherwise, we are alternate address space */
261	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
262		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
263		pmap_update();
264	}
265	return (unsigned *) APTmap;
266}
267
268/*
269 *	Routine:	pmap_pte
270 *	Function:
271 *		Extract the page table entry associated
272 *		with the given map/virtual_address pair.
273 */
274
275__inline unsigned * __pure
276pmap_pte(pmap, va)
277	register pmap_t pmap;
278	vm_offset_t va;
279{
280	if (pmap && *pmap_pde(pmap, va)) {
281		return get_ptbase(pmap) + i386_btop(va);
282	}
283	return (0);
284}
285
286/*
287 *	Routine:	pmap_extract
288 *	Function:
289 *		Extract the physical page address associated
290 *		with the given map/virtual_address pair.
291 */
292vm_offset_t __pure
293pmap_extract(pmap, va)
294	register pmap_t pmap;
295	vm_offset_t va;
296{
297	if (pmap && *pmap_pde(pmap, va)) {
298		unsigned *pte;
299		pte = get_ptbase(pmap) + i386_btop(va);
300		return ((*pte & PG_FRAME) | (va & PAGE_MASK));
301	}
302	return 0;
303
304}
305
306/*
307 * Add a list of wired pages to the kva
308 * this routine is only used for temporary
309 * kernel mappings that do not need to have
310 * page modification or references recorded.
311 * Note that old mappings are simply written
312 * over.  The page *must* be wired.
313 */
314void
315pmap_qenter(va, m, count)
316	vm_offset_t va;
317	vm_page_t *m;
318	int count;
319{
320	int i;
321	register unsigned *pte;
322
323	for (i = 0; i < count; i++) {
324		vm_offset_t tva = va + i * PAGE_SIZE;
325		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V;
326		unsigned opte;
327		pte = (unsigned *)vtopte(tva);
328		opte = *pte;
329		*pte = npte;
330		if (opte)
331			pmap_update_1pg(tva);
332	}
333}
334/*
335 * this routine jerks page mappings from the
336 * kernel -- it is meant only for temporary mappings.
337 */
338void
339pmap_qremove(va, count)
340	vm_offset_t va;
341	int count;
342{
343	int i;
344	register unsigned *pte;
345
346	for (i = 0; i < count; i++) {
347		pte = (unsigned *)vtopte(va);
348		*pte = 0;
349		pmap_update_1pg(va);
350		va += PAGE_SIZE;
351	}
352}
353
354/*
355 * add a wired page to the kva
356 * note that in order for the mapping to take effect -- you
357 * should do a pmap_update after doing the pmap_kenter...
358 */
359__inline void
360pmap_kenter(va, pa)
361	vm_offset_t va;
362	register vm_offset_t pa;
363{
364	register unsigned *pte;
365	unsigned npte, opte;
366
367	npte = pa | PG_RW | PG_V;
368	pte = (unsigned *)vtopte(va);
369	opte = *pte;
370	*pte = npte;
371	if (opte)
372		pmap_update_1pg(va);
373}
374
375/*
376 * remove a page from the kernel pagetables
377 */
378__inline void
379pmap_kremove(va)
380	vm_offset_t va;
381{
382	register unsigned *pte;
383
384	pte = (unsigned *)vtopte(va);
385	*pte = 0;
386	pmap_update_1pg(va);
387}
388
389/*
390 * determine if a page is managed (memory vs. device)
391 */
392static __inline __pure int
393pmap_is_managed(pa)
394	vm_offset_t pa;
395{
396	int i;
397
398	if (!pmap_initialized)
399		return 0;
400
401	for (i = 0; phys_avail[i + 1]; i += 2) {
402		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
403			return 1;
404	}
405	return 0;
406}
407
408static __inline int
409pmap_unwire_pte_hold(vm_page_t m) {
410	vm_page_unhold(m);
411	if (m->hold_count == 0) {
412		--m->wire_count;
413		if (m->wire_count == 0) {
414			--cnt.v_wire_count;
415			m->dirty = 0;
416			vm_page_deactivate(m);
417		}
418		return 1;
419	}
420	return 0;
421}
422
423#if !defined(PMAP_DIAGNOSTIC)
424__inline
425#endif
426int
427pmap_unuse_pt(pmap, va, mpte)
428	pmap_t pmap;
429	vm_offset_t va;
430	vm_page_t mpte;
431{
432	if (va >= UPT_MIN_ADDRESS)
433		return 0;
434
435	if (mpte == NULL) {
436		vm_offset_t ptepa;
437		ptepa = ((vm_offset_t) *pmap_pde(pmap, va)) /* & PG_FRAME */;
438#if defined(PMAP_DIAGNOSTIC)
439		if (!ptepa)
440			panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va);
441#endif
442		mpte = PHYS_TO_VM_PAGE(ptepa);
443	}
444
445#if defined(PMAP_DIAGNOSTIC)
446	if (mpte->hold_count == 0) {
447		panic("pmap_unuse_pt: hold count < 0, va: 0x%x", va);
448	}
449#endif
450
451/*
452 * We don't free page-table-pages anymore because it can have a negative
453 * impact on perf at times.  Now we just deactivate, and it'll get cleaned
454 * up if needed...  Also, if the page ends up getting used, it will be
455 * brought back into the process address space by pmap_allocpte and be
456 * reactivated.
457 */
458	return pmap_unwire_pte_hold(mpte);
459}
460
461/*
462 *	Bootstrap the system enough to run with virtual memory.
463 *
464 *	On the i386 this is called after mapping has already been enabled
465 *	and just syncs the pmap module with what has already been done.
466 *	[We can't call it easily with mapping off since the kernel is not
467 *	mapped with PA == VA, hence we would have to relocate every address
468 *	from the linked base (virtual) address "KERNBASE" to the actual
469 *	(physical) address starting relative to 0]
470 */
471void
472pmap_bootstrap(firstaddr, loadaddr)
473	vm_offset_t firstaddr;
474	vm_offset_t loadaddr;
475{
476	vm_offset_t va;
477	pt_entry_t *pte;
478
479	avail_start = firstaddr;
480
481	/*
482	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
483	 * large. It should instead be correctly calculated in locore.s and
484	 * not based on 'first' (which is a physical address, not a virtual
485	 * address, for the start of unused physical memory). The kernel
486	 * page tables are NOT double mapped and thus should not be included
487	 * in this calculation.
488	 */
489	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
490	virtual_end = VM_MAX_KERNEL_ADDRESS;
491
492	/*
493	 * Initialize protection array.
494	 */
495	i386_protection_init();
496
497	/*
498	 * The kernel's pmap is statically allocated so we don't have to use
499	 * pmap_create, which is unlikely to work correctly at this part of
500	 * the boot sequence (XXX and which no longer exists).
501	 */
502	kernel_pmap = &kernel_pmap_store;
503
504	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
505
506	kernel_pmap->pm_count = 1;
507	nkpt = NKPT;
508
509	/*
510	 * Reserve some special page table entries/VA space for temporary
511	 * mapping of pages.
512	 */
513#define	SYSMAP(c, p, v, n)	\
514	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
515
516	va = virtual_avail;
517	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
518
519	/*
520	 * CMAP1/CMAP2 are used for zeroing and copying pages.
521	 */
522	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
523	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
524
525	/*
526	 * ptmmap is used for reading arbitrary physical pages via /dev/mem.
527	 */
528	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
529
530	/*
531	 * msgbufmap is used to map the system message buffer.
532	 */
533	SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1)
534
535	virtual_avail = va;
536
537	*(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0;
538	pmap_update();
539
540}
541
542/*
543 *	Initialize the pmap module.
544 *	Called by vm_init, to initialize any structures that the pmap
545 *	system needs to map virtual memory.
546 *	pmap_init has been enhanced to support in a fairly consistant
547 *	way, discontiguous physical memory.
548 */
549void
550pmap_init(phys_start, phys_end)
551	vm_offset_t phys_start, phys_end;
552{
553	vm_offset_t addr;
554	vm_size_t npg, s;
555	int i;
556
557	/*
558	 * calculate the number of pv_entries needed
559	 */
560	vm_first_phys = phys_avail[0];
561	for (i = 0; phys_avail[i + 1]; i += 2);
562	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
563
564	/*
565	 * Allocate memory for random pmap data structures.  Includes the
566	 * pv_head_table.
567	 */
568	s = (vm_size_t) (sizeof(struct pv_entry *) * npg);
569	s = round_page(s);
570	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
571	pv_table = (pv_entry_t *) addr;
572
573	/*
574	 * init the pv free list
575	 */
576	init_pv_entries(npg);
577	/*
578	 * Now it is safe to enable pv_table recording.
579	 */
580	pmap_initialized = TRUE;
581}
582
583/*
584 *	Used to map a range of physical addresses into kernel
585 *	virtual address space.
586 *
587 *	For now, VM is already on, we only need to map the
588 *	specified memory.
589 */
590vm_offset_t
591pmap_map(virt, start, end, prot)
592	vm_offset_t virt;
593	vm_offset_t start;
594	vm_offset_t end;
595	int prot;
596{
597	while (start < end) {
598		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
599		virt += PAGE_SIZE;
600		start += PAGE_SIZE;
601	}
602	return (virt);
603}
604
605/*
606 * Initialize a preallocated and zeroed pmap structure,
607 * such as one in a vmspace structure.
608 */
609void
610pmap_pinit(pmap)
611	register struct pmap *pmap;
612{
613	vm_page_t ptdpg;
614	/*
615	 * No need to allocate page table space yet but we do need a valid
616	 * page directory table.
617	 */
618
619	if (pdstackptr > 0) {
620		--pdstackptr;
621		pmap->pm_pdir =
622			(pd_entry_t *)pdstack[pdstackptr];
623	} else {
624		pmap->pm_pdir =
625			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
626	}
627
628	/*
629	 * allocate object for the ptes
630	 */
631	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
632
633	/*
634	 * allocate the page directory page
635	 */
636retry:
637	ptdpg = vm_page_alloc( pmap->pm_pteobj, PTDPTDI, VM_ALLOC_ZERO);
638	if (ptdpg == NULL) {
639		VM_WAIT;
640		goto retry;
641	}
642	vm_page_wire(ptdpg);
643	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
644	ptdpg->valid = VM_PAGE_BITS_ALL;
645
646	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
647	if ((ptdpg->flags & PG_ZERO) == 0)
648		bzero(pmap->pm_pdir, PAGE_SIZE);
649
650	/* wire in kernel global address entries */
651	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
652
653	/* install self-referential address mapping entry */
654	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
655		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
656
657	pmap->pm_count = 1;
658}
659
660static __inline int
661pmap_release_free_page(pmap, p)
662	struct pmap *pmap;
663	vm_page_t p;
664{
665	int s;
666	/*
667	 * This code optimizes the case of freeing non-busy
668	 * page-table pages.  Those pages are zero now, and
669	 * might as well be placed directly into the zero queue.
670	 */
671	s = splvm();
672	if (p->flags & PG_BUSY) {
673		p->flags |= PG_WANTED;
674		tsleep(p, PVM, "pmaprl", 0);
675		splx(s);
676		return 0;
677	}
678
679	if (p->flags & PG_MAPPED) {
680		p->flags &= ~PG_MAPPED;
681		pmap_remove_pte_mapping(VM_PAGE_TO_PHYS(p));
682	}
683
684#if defined(PMAP_DIAGNOSTIC)
685	if (p->hold_count)
686		panic("pmap_release: freeing held page table page");
687#endif
688	/*
689	 * Page directory pages need to have the kernel
690	 * stuff cleared, so they can go into the zero queue also.
691	 */
692	if (p->pindex == PTDPTDI) {
693		unsigned *pde = (unsigned *) pmap->pm_pdir;
694		bzero(pde + KPTDI, nkpt * PTESIZE);
695		pde[APTDPTDI] = 0;
696		pde[PTDPTDI] = 0;
697		pmap_kremove((vm_offset_t) pmap->pm_pdir);
698	}
699
700	vm_page_free(p);
701	TAILQ_REMOVE(&vm_page_queue_free, p, pageq);
702	TAILQ_INSERT_HEAD(&vm_page_queue_zero, p, pageq);
703	p->queue = PQ_ZERO;
704	splx(s);
705	++vm_page_zero_count;
706	return 1;
707}
708
709/*
710 * Release any resources held by the given physical map.
711 * Called when a pmap initialized by pmap_pinit is being released.
712 * Should only be called if the map contains no valid mappings.
713 */
714void
715pmap_release(pmap)
716	register struct pmap *pmap;
717{
718	vm_page_t p,n,ptdpg;
719	vm_object_t object = pmap->pm_pteobj;
720
721	ptdpg = NULL;
722retry:
723	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
724		n = TAILQ_NEXT(p, listq);
725		if (p->pindex == PTDPTDI) {
726			ptdpg = p;
727			continue;
728		}
729		if (!pmap_release_free_page(pmap, p))
730			goto retry;
731	}
732	pmap_release_free_page(pmap, ptdpg);
733
734	vm_object_deallocate(object);
735	if (pdstackptr < PDSTACKMAX) {
736		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
737		++pdstackptr;
738	} else {
739		kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE);
740	}
741}
742
743/*
744 * grow the number of kernel page table entries, if needed
745 */
746
747void
748pmap_growkernel(vm_offset_t addr)
749{
750	struct proc *p;
751	struct pmap *pmap;
752	int s;
753
754	s = splhigh();
755	if (kernel_vm_end == 0) {
756		kernel_vm_end = KERNBASE;
757		nkpt = 0;
758		while (pdir_pde(PTD, kernel_vm_end)) {
759			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
760			++nkpt;
761		}
762	}
763	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
764	while (kernel_vm_end < addr) {
765		if (pdir_pde(PTD, kernel_vm_end)) {
766			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
767			continue;
768		}
769		++nkpt;
770		if (!nkpg) {
771			nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM);
772			if (!nkpg)
773				panic("pmap_growkernel: no memory to grow kernel");
774			vm_page_wire(nkpg);
775			vm_page_remove(nkpg);
776			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
777		}
778		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW);
779		nkpg = NULL;
780
781		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
782			if (p->p_vmspace) {
783				pmap = &p->p_vmspace->vm_pmap;
784				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
785			}
786		}
787		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
788		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
789	}
790	splx(s);
791}
792
793/*
794 *	Retire the given physical map from service.
795 *	Should only be called if the map contains
796 *	no valid mappings.
797 */
798void
799pmap_destroy(pmap)
800	register pmap_t pmap;
801{
802	int count;
803
804	if (pmap == NULL)
805		return;
806
807	count = --pmap->pm_count;
808	if (count == 0) {
809		pmap_release(pmap);
810		free((caddr_t) pmap, M_VMPMAP);
811	}
812}
813
814/*
815 *	Add a reference to the specified pmap.
816 */
817void
818pmap_reference(pmap)
819	pmap_t pmap;
820{
821	if (pmap != NULL) {
822		pmap->pm_count++;
823	}
824}
825
826/*
827 * free the pv_entry back to the free list
828 */
829static __inline void
830free_pv_entry(pv)
831	pv_entry_t pv;
832{
833	++pv_freelistcnt;
834	pv->pv_next = pv_freelist;
835	pv_freelist = pv;
836}
837
838/*
839 * get a new pv_entry, allocating a block from the system
840 * when needed.
841 * the memory allocation is performed bypassing the malloc code
842 * because of the possibility of allocations at interrupt time.
843 */
844static __inline pv_entry_t
845get_pv_entry()
846{
847	pv_entry_t tmp;
848
849	/*
850	 * get more pv_entry pages if needed
851	 */
852	if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
853		pmap_alloc_pv_entry();
854	}
855	/*
856	 * get a pv_entry off of the free list
857	 */
858	--pv_freelistcnt;
859	tmp = pv_freelist;
860	pv_freelist = tmp->pv_next;
861	return tmp;
862}
863
864/*
865 * this *strange* allocation routine *statistically* eliminates the
866 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure.
867 * also -- this code is MUCH MUCH faster than the malloc equiv...
868 */
869static void
870pmap_alloc_pv_entry()
871{
872	/*
873	 * do we have any pre-allocated map-pages left?
874	 */
875	if (npvvapg) {
876		vm_page_t m;
877
878		/*
879		 * allocate a physical page out of the vm system
880		 */
881		m = vm_page_alloc(kernel_object,
882		    OFF_TO_IDX(pvva - vm_map_min(kernel_map)),
883		    VM_ALLOC_INTERRUPT);
884		if (m) {
885			int newentries;
886			int i;
887			pv_entry_t entry;
888
889			newentries = (PAGE_SIZE / sizeof(struct pv_entry));
890			/*
891			 * wire the page
892			 */
893			vm_page_wire(m);
894			m->flags &= ~PG_BUSY;
895			/*
896			 * let the kernel see it
897			 */
898			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
899
900			entry = (pv_entry_t) pvva;
901			/*
902			 * update the allocation pointers
903			 */
904			pvva += PAGE_SIZE;
905			--npvvapg;
906
907			/*
908			 * free the entries into the free list
909			 */
910			for (i = 0; i < newentries; i++) {
911				free_pv_entry(entry);
912				entry++;
913			}
914		}
915	}
916	if (!pv_freelist)
917		panic("get_pv_entry: cannot get a pv_entry_t");
918}
919
920/*
921 * init the pv_entry allocation system
922 */
923#define PVSPERPAGE 64
924void
925init_pv_entries(npg)
926	int npg;
927{
928	/*
929	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
930	 * kvm space is fairly cheap, be generous!!!  (the system can panic if
931	 * this is too small.)
932	 */
933	npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry)
934		+ PAGE_SIZE - 1) / PAGE_SIZE;
935	pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE);
936	/*
937	 * get the first batch of entries
938	 */
939	pmap_alloc_pv_entry();
940}
941
942/*
943 * If it is the first entry on the list, it is actually
944 * in the header and we must copy the following entry up
945 * to the header.  Otherwise we must search the list for
946 * the entry.  In either case we free the now unused entry.
947 */
948static __inline int
949pmap_remove_entry(pmap, ppv, va)
950	struct pmap *pmap;
951	pv_entry_t *ppv;
952	vm_offset_t va;
953{
954	pv_entry_t npv;
955	int s;
956
957	s = splvm();
958	for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) {
959		if (pmap == npv->pv_pmap && va == npv->pv_va) {
960			int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem);
961			*ppv = npv->pv_next;
962			free_pv_entry(npv);
963			splx(s);
964			return rtval;
965		}
966	}
967	splx(s);
968	return 0;
969}
970
971/*
972 * pmap_remove_pte: do the things to unmap a page in a process
973 */
974static
975#if !defined(PMAP_DIAGNOSTIC)
976__inline
977#endif
978int
979pmap_remove_pte(pmap, ptq, va)
980	struct pmap *pmap;
981	unsigned *ptq;
982	vm_offset_t va;
983{
984	unsigned oldpte;
985	pv_entry_t *ppv;
986	int rtval;
987
988	oldpte = *ptq;
989	*ptq = 0;
990	if (oldpte & PG_W)
991		pmap->pm_stats.wired_count -= 1;
992	pmap->pm_stats.resident_count -= 1;
993	if (oldpte & PG_MANAGED) {
994		if (oldpte & PG_M) {
995#if defined(PMAP_DIAGNOSTIC)
996			if (pmap_nw_modified((pt_entry_t) oldpte)) {
997				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
998			}
999#endif
1000			if (va < clean_sva || va >= clean_eva) {
1001				PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL;
1002			}
1003		}
1004		ppv = pa_to_pvh(oldpte);
1005		rtval = pmap_remove_entry(pmap, ppv, va);
1006#if defined(notyet)
1007		if (*ppv == NULL) {
1008			PHYS_TO_VM_PAGE(oldpte)->flags &= ~PG_MAPPED;
1009		}
1010#endif
1011		return rtval;
1012	} else {
1013		return pmap_unuse_pt(pmap, va, NULL);
1014	}
1015
1016	return 0;
1017}
1018
1019/*
1020 * Remove a single page from a process address space
1021 */
1022static __inline void
1023pmap_remove_page(pmap, va)
1024	struct pmap *pmap;
1025	register vm_offset_t va;
1026{
1027	register unsigned *ptq;
1028
1029	/*
1030	 * if there is no pte for this address, just skip it!!!
1031	 */
1032	if (*pmap_pde(pmap, va) == 0) {
1033		return;
1034	}
1035
1036	/*
1037	 * get a local va for mappings for this pmap.
1038	 */
1039	ptq = get_ptbase(pmap) + i386_btop(va);
1040	if (*ptq) {
1041		(void) pmap_remove_pte(pmap, ptq, va);
1042		pmap_update_1pg(va);
1043	}
1044	return;
1045}
1046
1047/*
1048 *	Remove the given range of addresses from the specified map.
1049 *
1050 *	It is assumed that the start and end are properly
1051 *	rounded to the page size.
1052 */
1053void
1054pmap_remove(pmap, sva, eva)
1055	struct pmap *pmap;
1056	register vm_offset_t sva;
1057	register vm_offset_t eva;
1058{
1059	register unsigned *ptbase;
1060	vm_offset_t pdnxt;
1061	vm_offset_t ptpaddr;
1062	vm_offset_t sindex, eindex;
1063	vm_page_t mpte;
1064	int anyvalid;
1065
1066	if (pmap == NULL)
1067		return;
1068
1069	/*
1070	 * special handling of removing one page.  a very
1071	 * common operation and easy to short circuit some
1072	 * code.
1073	 */
1074	if ((sva + PAGE_SIZE) == eva) {
1075		pmap_remove_page(pmap, sva);
1076		return;
1077	}
1078
1079	anyvalid = 0;
1080
1081	/*
1082	 * Get a local virtual address for the mappings that are being
1083	 * worked with.
1084	 */
1085	ptbase = get_ptbase(pmap);
1086
1087	sindex = i386_btop(sva);
1088	eindex = i386_btop(eva);
1089
1090	for (; sindex < eindex; sindex = pdnxt) {
1091
1092		/*
1093		 * Calculate index for next page table.
1094		 */
1095		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1096		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
1097
1098		/*
1099		 * Weed out invalid mappings. Note: we assume that the page
1100		 * directory table is always allocated, and in kernel virtual.
1101		 */
1102		if (ptpaddr == 0)
1103			continue;
1104
1105		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
1106		/*
1107		 * get the vm_page_t for the page table page
1108		 */
1109			mpte = PHYS_TO_VM_PAGE(ptpaddr);
1110
1111		/*
1112		 * if the pte isn't wired, just skip it.
1113		 */
1114			if (mpte->wire_count == 0)
1115				continue;
1116		}
1117
1118		/*
1119		 * Limit our scan to either the end of the va represented
1120		 * by the current page table page, or to the end of the
1121		 * range being removed.
1122		 */
1123		if (pdnxt > eindex) {
1124			pdnxt = eindex;
1125		}
1126
1127		for ( ;sindex != pdnxt; sindex++) {
1128			vm_offset_t va;
1129			if (ptbase[sindex] == 0) {
1130				continue;
1131			}
1132			va = i386_ptob(sindex);
1133			anyvalid = 1;
1134			if (pmap_remove_pte(pmap,
1135				ptbase + sindex, va))
1136				break;
1137		}
1138	}
1139
1140	if (anyvalid) {
1141		pmap_update();
1142	}
1143}
1144
1145
1146void
1147pmap_remove_pte_mapping(pa)
1148	vm_offset_t pa;
1149{
1150	register pv_entry_t pv, *ppv, npv;
1151	register unsigned *pte;
1152	vm_offset_t va;
1153	int anyvalid = 0;
1154
1155	ppv = pa_to_pvh(pa);
1156
1157	for (pv = *ppv; pv; pv=pv->pv_next) {
1158		unsigned tpte;
1159		struct pmap *pmap;
1160
1161		anyvalid = 1;
1162		pmap = pv->pv_pmap;
1163		pte = get_ptbase(pmap) + i386_btop(pv->pv_va);
1164		if (tpte = *pte) {
1165			pmap->pm_stats.resident_count--;
1166			*pte = 0;
1167			if (tpte & PG_W)
1168				pmap->pm_stats.wired_count--;
1169		}
1170	}
1171
1172	if (anyvalid) {
1173		for (pv = *ppv; pv; pv = npv) {
1174			npv = pv->pv_next;
1175			free_pv_entry(pv);
1176		}
1177		*ppv = NULL;
1178	}
1179}
1180
1181/*
1182 *	Routine:	pmap_remove_all
1183 *	Function:
1184 *		Removes this physical page from
1185 *		all physical maps in which it resides.
1186 *		Reflects back modify bits to the pager.
1187 *
1188 *	Notes:
1189 *		Original versions of this routine were very
1190 *		inefficient because they iteratively called
1191 *		pmap_remove (slow...)
1192 */
1193static __inline void
1194pmap_remove_all(pa)
1195	vm_offset_t pa;
1196{
1197	register pv_entry_t pv, *ppv, npv;
1198	register unsigned *pte, *ptbase;
1199	vm_offset_t va;
1200	vm_page_t m;
1201	int s;
1202
1203#if defined(PMAP_DIAGNOSTIC)
1204	/*
1205	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1206	 * pages!
1207	 */
1208	if (!pmap_is_managed(pa)) {
1209		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
1210	}
1211#endif
1212
1213	m = PHYS_TO_VM_PAGE(pa);
1214	ppv = pa_to_pvh(pa);
1215
1216	s = splvm();
1217	for (pv = *ppv; pv; pv=pv->pv_next) {
1218		int tpte;
1219		struct pmap *pmap;
1220
1221		pmap = pv->pv_pmap;
1222		ptbase = get_ptbase(pmap);
1223		va = pv->pv_va;
1224		if (*pmap_pde(pmap, va) == 0)
1225			continue;
1226		pte = ptbase + i386_btop(va);
1227		if (tpte = ((int) *pte)) {
1228			pmap->pm_stats.resident_count--;
1229			*pte = 0;
1230			if (tpte & PG_W)
1231				pmap->pm_stats.wired_count--;
1232			/*
1233			 * Update the vm_page_t clean and reference bits.
1234			 */
1235			if (tpte & PG_M) {
1236#if defined(PMAP_DIAGNOSTIC)
1237				if (pmap_nw_modified((pt_entry_t) tpte)) {
1238					printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, tpte);
1239				}
1240#endif
1241				if (va < clean_sva || va >= clean_eva) {
1242					m->dirty = VM_PAGE_BITS_ALL;
1243				}
1244			}
1245		}
1246	}
1247
1248	for (pv = *ppv; pv; pv = npv) {
1249		npv = pv->pv_next;
1250		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1251		free_pv_entry(pv);
1252	}
1253	*ppv = NULL;
1254
1255	splx(s);
1256}
1257
1258/*
1259 *	Set the physical protection on the
1260 *	specified range of this map as requested.
1261 */
1262void
1263pmap_protect(pmap, sva, eva, prot)
1264	register pmap_t pmap;
1265	vm_offset_t sva, eva;
1266	vm_prot_t prot;
1267{
1268	register unsigned *pte;
1269	register vm_offset_t va;
1270	register unsigned *ptbase;
1271	vm_offset_t pdnxt;
1272	vm_offset_t ptpaddr;
1273	vm_offset_t sindex, eindex;
1274	vm_page_t mpte;
1275	int anyvalid;
1276
1277
1278	if (pmap == NULL)
1279		return;
1280
1281	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1282		pmap_remove(pmap, sva, eva);
1283		return;
1284	}
1285	if (prot & VM_PROT_WRITE)
1286		return;
1287
1288	anyvalid = 0;
1289
1290	ptbase = get_ptbase(pmap);
1291
1292	sindex = i386_btop(sva);
1293	eindex = i386_btop(eva);
1294
1295	for (; sindex < eindex; sindex = pdnxt) {
1296
1297		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
1298		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
1299
1300		/*
1301		 * Weed out invalid mappings. Note: we assume that the page
1302		 * directory table is always allocated, and in kernel virtual.
1303		 */
1304		if (ptpaddr == 0)
1305			continue;
1306
1307		/*
1308		 * Don't look at kernel page table pages
1309		 */
1310		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
1311			mpte = PHYS_TO_VM_PAGE(ptpaddr);
1312
1313			if (mpte->wire_count == 0)
1314				continue;
1315		}
1316
1317		if (pdnxt > eindex) {
1318			pdnxt = eindex;
1319		}
1320
1321		for (; sindex != pdnxt; sindex++) {
1322
1323			unsigned pbits = ptbase[sindex];
1324
1325			if (pbits & PG_RW) {
1326				if (pbits & PG_M) {
1327					vm_page_t m = PHYS_TO_VM_PAGE(pbits);
1328					m->dirty = VM_PAGE_BITS_ALL;
1329				}
1330				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
1331				anyvalid = 1;
1332			}
1333		}
1334	}
1335	if (anyvalid)
1336		pmap_update();
1337}
1338
1339/*
1340 * Create a pv entry for page at pa for
1341 * (pmap, va).
1342 */
1343static __inline void
1344pmap_insert_entry(pmap, va, mpte, pa)
1345	pmap_t pmap;
1346	vm_offset_t va;
1347	vm_page_t mpte;
1348	vm_offset_t pa;
1349{
1350
1351	int s;
1352	pv_entry_t *ppv, pv;
1353
1354	s = splvm();
1355	pv = get_pv_entry();
1356	pv->pv_va = va;
1357	pv->pv_pmap = pmap;
1358	pv->pv_ptem = mpte;
1359
1360	ppv = pa_to_pvh(pa);
1361	if (*ppv)
1362		pv->pv_next = *ppv;
1363	else
1364		pv->pv_next = NULL;
1365	*ppv = pv;
1366	splx(s);
1367}
1368
1369/*
1370 * this routine is called if the page table page is not
1371 * mapped correctly.
1372 */
1373static vm_page_t
1374_pmap_allocpte(pmap, va, ptepindex)
1375	pmap_t	pmap;
1376	vm_offset_t va;
1377	int ptepindex;
1378{
1379	vm_offset_t pteva, ptepa;
1380	vm_page_t m;
1381	int s;
1382
1383	/*
1384	 * Find or fabricate a new pagetable page
1385	 */
1386retry:
1387	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
1388	if (m == NULL) {
1389		m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO);
1390		if (m == NULL) {
1391			VM_WAIT;
1392			goto retry;
1393		}
1394		if ((m->flags & PG_ZERO) == 0)
1395			pmap_zero_page(VM_PAGE_TO_PHYS(m));
1396		m->flags &= ~(PG_ZERO|PG_BUSY);
1397		m->valid = VM_PAGE_BITS_ALL;
1398	}
1399
1400	/*
1401	 * mark the object writeable
1402	 */
1403	pmap->pm_pteobj->flags |= OBJ_WRITEABLE;
1404
1405	if (m->hold_count == 0) {
1406		s = splvm();
1407		vm_page_unqueue(m);
1408		splx(s);
1409		++m->wire_count;
1410		++cnt.v_wire_count;
1411	}
1412
1413	/*
1414	 * Increment the hold count for the page table page
1415	 * (denoting a new mapping.)
1416	 */
1417	++m->hold_count;
1418
1419	/*
1420	 * Map the pagetable page into the process address space, if
1421	 * it isn't already there.
1422	 */
1423	pteva = ((vm_offset_t) vtopte(va)) & PG_FRAME;
1424	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1425	if (ptepa == 0) {
1426		pv_entry_t pv, *ppv;
1427
1428		pmap->pm_stats.resident_count++;
1429
1430		s = splvm();
1431		pv = get_pv_entry();
1432
1433		pv->pv_va = pteva;
1434		pv->pv_pmap = pmap;
1435		pv->pv_next = NULL;
1436		pv->pv_ptem = NULL;
1437
1438		ptepa = VM_PAGE_TO_PHYS(m);
1439		pmap->pm_pdir[ptepindex] =
1440			(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED);
1441		ppv = pa_to_pvh(ptepa);
1442#if defined(PMAP_DIAGNOSTIC)
1443		if (*ppv)
1444			panic("pmap_allocpte: page is already mapped");
1445#endif
1446		*ppv = pv;
1447		splx(s);
1448		m->flags |= PG_MAPPED;
1449	} else {
1450#if defined(PMAP_DIAGNOSTIC)
1451		if (VM_PAGE_TO_PHYS(m) != (ptepa & PG_FRAME))
1452			panic("pmap_allocpte: mismatch");
1453#endif
1454		pmap->pm_pdir[ptepindex] =
1455			(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_MANAGED);
1456		pmap_update_1pg(pteva);
1457		m->flags |= PG_MAPPED;
1458	}
1459	return m;
1460}
1461
1462static __inline vm_page_t
1463pmap_allocpte(pmap, va)
1464	pmap_t	pmap;
1465	vm_offset_t va;
1466{
1467	int ptepindex;
1468	vm_offset_t ptepa;
1469	vm_page_t m;
1470
1471	/*
1472	 * Calculate pagetable page index
1473	 */
1474	ptepindex = va >> PDRSHIFT;
1475
1476	/*
1477	 * Get the page directory entry
1478	 */
1479	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
1480
1481	/*
1482	 * If the page table page is mapped, we just increment the
1483	 * hold count, and activate it.
1484	 */
1485	if ((ptepa & (PG_RW|PG_U|PG_V)) == (PG_RW|PG_U|PG_V)) {
1486		m = PHYS_TO_VM_PAGE(ptepa);
1487		if (m->hold_count == 0) {
1488			int s = splvm();
1489			vm_page_unqueue(m);
1490			splx(s);
1491			++m->wire_count;
1492			++cnt.v_wire_count;
1493		}
1494		++m->hold_count;
1495		return m;
1496	}
1497	return _pmap_allocpte(pmap, va, ptepindex);
1498}
1499
1500/*
1501 *	Insert the given physical page (p) at
1502 *	the specified virtual address (v) in the
1503 *	target physical map with the protection requested.
1504 *
1505 *	If specified, the page will be wired down, meaning
1506 *	that the related pte can not be reclaimed.
1507 *
1508 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1509 *	or lose information.  That is, this routine must actually
1510 *	insert this page into the given map NOW.
1511 */
1512void
1513pmap_enter(pmap, va, pa, prot, wired)
1514	register pmap_t pmap;
1515	vm_offset_t va;
1516	register vm_offset_t pa;
1517	vm_prot_t prot;
1518	boolean_t wired;
1519{
1520	register unsigned *pte;
1521	vm_offset_t opa;
1522	vm_offset_t origpte, newpte;
1523	vm_page_t mpte;
1524
1525	if (pmap == NULL)
1526		return;
1527
1528	va &= PG_FRAME;
1529	if (va > VM_MAX_KERNEL_ADDRESS)
1530		panic("pmap_enter: toobig");
1531
1532	mpte = NULL;
1533	/*
1534	 * In the case that a page table page is not
1535	 * resident, we are creating it here.
1536	 */
1537	if (va < UPT_MIN_ADDRESS)
1538		mpte = pmap_allocpte(pmap, va);
1539
1540	pte = pmap_pte(pmap, va);
1541	/*
1542	 * Page Directory table entry not valid, we need a new PT page
1543	 */
1544	if (pte == NULL) {
1545		printf("kernel page directory invalid pdir=%p, va=0x%lx\n",
1546			pmap->pm_pdir[PTDPTDI], va);
1547		panic("invalid kernel page directory");
1548	}
1549
1550	origpte = *(vm_offset_t *)pte;
1551	pa &= PG_FRAME;
1552	opa = origpte & PG_FRAME;
1553
1554	/*
1555	 * Mapping has not changed, must be protection or wiring change.
1556	 */
1557	if (opa == pa) {
1558		/*
1559		 * Wiring change, just update stats. We don't worry about
1560		 * wiring PT pages as they remain resident as long as there
1561		 * are valid mappings in them. Hence, if a user page is wired,
1562		 * the PT page will be also.
1563		 */
1564		if (wired && ((origpte & PG_W) == 0))
1565			pmap->pm_stats.wired_count++;
1566		else if (!wired && (origpte & PG_W))
1567			pmap->pm_stats.wired_count--;
1568
1569#if defined(PMAP_DIAGNOSTIC)
1570		if (pmap_nw_modified((pt_entry_t) origpte)) {
1571			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
1572		}
1573#endif
1574
1575		/*
1576		 * We might be turning off write access to the page,
1577		 * so we go ahead and sense modify status.
1578		 */
1579		if (origpte & PG_MANAGED) {
1580			vm_page_t m;
1581			if (origpte & PG_M) {
1582				m = PHYS_TO_VM_PAGE(pa);
1583				m->dirty = VM_PAGE_BITS_ALL;
1584			}
1585			pa |= PG_MANAGED;
1586		}
1587
1588		if (mpte)
1589			--mpte->hold_count;
1590
1591		goto validate;
1592	}
1593	/*
1594	 * Mapping has changed, invalidate old range and fall through to
1595	 * handle validating new mapping.
1596	 */
1597	if (opa)
1598		(void) pmap_remove_pte(pmap, pte, va);
1599
1600	/*
1601	 * Enter on the PV list if part of our managed memory Note that we
1602	 * raise IPL while manipulating pv_table since pmap_enter can be
1603	 * called at interrupt time.
1604	 */
1605	if (pmap_is_managed(pa)) {
1606		pmap_insert_entry(pmap, va, mpte, pa);
1607		pa |= PG_MANAGED;
1608	}
1609
1610	/*
1611	 * Increment counters
1612	 */
1613	pmap->pm_stats.resident_count++;
1614	if (wired)
1615		pmap->pm_stats.wired_count++;
1616
1617validate:
1618	/*
1619	 * Now validate mapping with desired protection/wiring.
1620	 */
1621	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
1622
1623	if (wired)
1624		newpte |= PG_W;
1625	if (va < UPT_MIN_ADDRESS)
1626		newpte |= PG_U;
1627
1628	/*
1629	 * if the mapping or permission bits are different, we need
1630	 * to update the pte.
1631	 */
1632	if ((origpte & ~(PG_M|PG_A)) != newpte) {
1633		*pte = newpte;
1634		if (origpte)
1635			pmap_update_1pg(va);
1636	}
1637}
1638
1639/*
1640 * this code makes some *MAJOR* assumptions:
1641 * 1. Current pmap & pmap exists.
1642 * 2. Not wired.
1643 * 3. Read access.
1644 * 4. No page table pages.
1645 * 5. Tlbflush is deferred to calling procedure.
1646 * 6. Page IS managed.
1647 * but is *MUCH* faster than pmap_enter...
1648 */
1649
1650static void
1651pmap_enter_quick(pmap, va, pa)
1652	register pmap_t pmap;
1653	vm_offset_t va;
1654	register vm_offset_t pa;
1655{
1656	register unsigned *pte;
1657	vm_page_t mpte;
1658
1659	mpte = NULL;
1660	/*
1661	 * In the case that a page table page is not
1662	 * resident, we are creating it here.
1663	 */
1664	if (va < UPT_MIN_ADDRESS)
1665		mpte = pmap_allocpte(pmap, va);
1666
1667	pte = (unsigned *)vtopte(va);
1668	if (*pte)
1669		(void) pmap_remove_pte(pmap, pte, va);
1670
1671	/*
1672	 * Enter on the PV list if part of our managed memory Note that we
1673	 * raise IPL while manipulating pv_table since pmap_enter can be
1674	 * called at interrupt time.
1675	 */
1676	pmap_insert_entry(pmap, va, mpte, pa);
1677
1678	/*
1679	 * Increment counters
1680	 */
1681	pmap->pm_stats.resident_count++;
1682
1683	/*
1684	 * Now validate mapping with RO protection
1685	 */
1686	*pte = pa | PG_V | PG_U | PG_MANAGED;
1687
1688	return;
1689}
1690
1691#define MAX_INIT_PT (96)
1692/*
1693 * pmap_object_init_pt preloads the ptes for a given object
1694 * into the specified pmap.  This eliminates the blast of soft
1695 * faults on process startup and immediately after an mmap.
1696 */
1697void
1698pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
1699	pmap_t pmap;
1700	vm_offset_t addr;
1701	vm_object_t object;
1702	vm_pindex_t pindex;
1703	vm_size_t size;
1704	int limit;
1705{
1706	vm_offset_t tmpidx;
1707	int psize;
1708	vm_page_t p;
1709	int objpgs;
1710
1711	psize = (size >> PAGE_SHIFT);
1712
1713	if (!pmap || (object->type != OBJT_VNODE) ||
1714		(limit && (psize > MAX_INIT_PT) &&
1715			(object->resident_page_count > MAX_INIT_PT))) {
1716		return;
1717	}
1718
1719	/*
1720	 * if we are processing a major portion of the object, then scan the
1721	 * entire thing.
1722	 */
1723	if (psize > (object->size >> 2)) {
1724		objpgs = psize;
1725
1726		for (p = TAILQ_FIRST(&object->memq);
1727		    ((objpgs > 0) && (p != NULL));
1728		    p = TAILQ_NEXT(p, listq)) {
1729
1730			tmpidx = p->pindex;
1731			if (tmpidx < pindex) {
1732				continue;
1733			}
1734			tmpidx -= pindex;
1735			if (tmpidx >= psize) {
1736				continue;
1737			}
1738			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1739			    (p->busy == 0) &&
1740			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1741				if (p->queue == PQ_CACHE)
1742					vm_page_deactivate(p);
1743				p->flags |= PG_BUSY;
1744				pmap_enter_quick(pmap,
1745					addr + (tmpidx << PAGE_SHIFT),
1746					VM_PAGE_TO_PHYS(p));
1747				p->flags |= PG_MAPPED;
1748				PAGE_WAKEUP(p);
1749			}
1750			objpgs -= 1;
1751		}
1752	} else {
1753		/*
1754		 * else lookup the pages one-by-one.
1755		 */
1756		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1757			p = vm_page_lookup(object, tmpidx + pindex);
1758			if (p &&
1759			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1760			    (p->busy == 0) &&
1761			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1762				p->flags |= PG_BUSY;
1763				pmap_enter_quick(pmap,
1764					addr + (tmpidx << PAGE_SHIFT),
1765					VM_PAGE_TO_PHYS(p));
1766				p->flags |= PG_MAPPED;
1767				PAGE_WAKEUP(p);
1768			}
1769		}
1770	}
1771	return;
1772}
1773
1774/*
1775 * pmap_prefault provides a quick way of clustering
1776 * pagefaults into a processes address space.  It is a "cousin"
1777 * of pmap_object_init_pt, except it runs at page fault time instead
1778 * of mmap time.
1779 */
1780#define PFBAK 2
1781#define PFFOR 2
1782#define PAGEORDER_SIZE (PFBAK+PFFOR)
1783
1784static int pmap_prefault_pageorder[] = {
1785	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
1786};
1787
1788void
1789pmap_prefault(pmap, addra, entry, object)
1790	pmap_t pmap;
1791	vm_offset_t addra;
1792	vm_map_entry_t entry;
1793	vm_object_t object;
1794{
1795	int i;
1796	vm_offset_t starta;
1797	vm_offset_t addr;
1798	vm_pindex_t pindex;
1799	vm_page_t m;
1800
1801	if (entry->object.vm_object != object)
1802		return;
1803
1804	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
1805		return;
1806
1807	starta = addra - PFBAK * PAGE_SIZE;
1808	if (starta < entry->start) {
1809		starta = entry->start;
1810	} else if (starta > addra) {
1811		starta = 0;
1812	}
1813
1814	for (i = 0; i < PAGEORDER_SIZE; i++) {
1815		vm_object_t lobject;
1816		unsigned *pte;
1817
1818		addr = addra + pmap_prefault_pageorder[i];
1819		if (addr < starta || addr >= entry->end)
1820			continue;
1821
1822		if ((*pmap_pde(pmap, addr)) == NULL)
1823			continue;
1824
1825		pte = (unsigned *) vtopte(addr);
1826		if (*pte)
1827			continue;
1828
1829		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1830		lobject = object;
1831		for (m = vm_page_lookup(lobject, pindex);
1832		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1833		    lobject = lobject->backing_object) {
1834			if (lobject->backing_object_offset & PAGE_MASK)
1835				break;
1836			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1837			m = vm_page_lookup(lobject->backing_object, pindex);
1838		}
1839
1840		/*
1841		 * give-up when a page is not in memory
1842		 */
1843		if (m == NULL)
1844			break;
1845
1846		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1847		    (m->busy == 0) &&
1848		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1849
1850			if (m->queue == PQ_CACHE) {
1851				vm_page_deactivate(m);
1852			}
1853			m->flags |= PG_BUSY;
1854			pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m));
1855			m->flags |= PG_MAPPED;
1856			PAGE_WAKEUP(m);
1857		}
1858	}
1859}
1860
1861/*
1862 *	Routine:	pmap_change_wiring
1863 *	Function:	Change the wiring attribute for a map/virtual-address
1864 *			pair.
1865 *	In/out conditions:
1866 *			The mapping must already exist in the pmap.
1867 */
1868void
1869pmap_change_wiring(pmap, va, wired)
1870	register pmap_t pmap;
1871	vm_offset_t va;
1872	boolean_t wired;
1873{
1874	register unsigned *pte;
1875
1876	if (pmap == NULL)
1877		return;
1878
1879	pte = pmap_pte(pmap, va);
1880
1881	if (wired && !pmap_pte_w(pte))
1882		pmap->pm_stats.wired_count++;
1883	else if (!wired && pmap_pte_w(pte))
1884		pmap->pm_stats.wired_count--;
1885
1886	/*
1887	 * Wiring is not a hardware characteristic so there is no need to
1888	 * invalidate TLB.
1889	 */
1890	pmap_pte_set_w(pte, wired);
1891}
1892
1893
1894
1895/*
1896 *	Copy the range specified by src_addr/len
1897 *	from the source map to the range dst_addr/len
1898 *	in the destination map.
1899 *
1900 *	This routine is only advisory and need not do anything.
1901 */
1902void
1903pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
1904	pmap_t dst_pmap, src_pmap;
1905	vm_offset_t dst_addr;
1906	vm_size_t len;
1907	vm_offset_t src_addr;
1908{
1909	vm_offset_t addr;
1910	vm_offset_t end_addr = src_addr + len;
1911	vm_offset_t pdnxt;
1912	unsigned src_frame, dst_frame;
1913
1914	if (dst_addr != src_addr)
1915		return;
1916
1917	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
1918	if (src_frame != (((unsigned) PTDpde) & PG_FRAME))
1919		return;
1920
1921	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
1922	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
1923		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
1924		pmap_update();
1925	}
1926
1927	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
1928		unsigned *src_pte, *dst_pte;
1929		vm_page_t dstmpte, srcmpte;
1930		vm_offset_t srcptepaddr;
1931
1932		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
1933		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT];
1934		if (srcptepaddr == 0) {
1935			continue;
1936		}
1937
1938		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
1939		if (srcmpte->hold_count == 0)
1940			continue;
1941
1942		if (pdnxt > end_addr)
1943			pdnxt = end_addr;
1944
1945		src_pte = (unsigned *) vtopte(addr);
1946		dst_pte = (unsigned *) avtopte(addr);
1947		while (addr < pdnxt) {
1948			unsigned ptetemp;
1949			ptetemp = *src_pte;
1950			/*
1951			 * we only virtual copy managed pages
1952			 */
1953			if ((ptetemp & PG_MANAGED) != 0) {
1954				/*
1955				 * We have to check after allocpte for the
1956				 * pte still being around...  allocpte can
1957				 * block.
1958				 */
1959				dstmpte = pmap_allocpte(dst_pmap, addr);
1960				if (ptetemp = *src_pte) {
1961					*dst_pte = ptetemp;
1962					dst_pmap->pm_stats.resident_count++;
1963					pmap_insert_entry(dst_pmap, addr, dstmpte,
1964						(ptetemp & PG_FRAME));
1965	 			} else {
1966					pmap_unwire_pte_hold(dstmpte);
1967				}
1968				if (dstmpte->hold_count >= srcmpte->hold_count)
1969					break;
1970			}
1971			addr += PAGE_SIZE;
1972			++src_pte;
1973			++dst_pte;
1974		}
1975	}
1976}
1977
1978/*
1979 *	Routine:	pmap_kernel
1980 *	Function:
1981 *		Returns the physical map handle for the kernel.
1982 */
1983pmap_t
1984pmap_kernel()
1985{
1986	return (kernel_pmap);
1987}
1988
1989/*
1990 *	pmap_zero_page zeros the specified (machine independent)
1991 *	page by mapping the page into virtual memory and using
1992 *	bzero to clear its contents, one machine dependent page
1993 *	at a time.
1994 */
1995void
1996pmap_zero_page(phys)
1997	vm_offset_t phys;
1998{
1999	if (*(int *) CMAP2)
2000		panic("pmap_zero_page: CMAP busy");
2001
2002	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
2003	bzero(CADDR2, PAGE_SIZE);
2004	*(int *) CMAP2 = 0;
2005	pmap_update_1pg((vm_offset_t) CADDR2);
2006}
2007
2008/*
2009 *	pmap_copy_page copies the specified (machine independent)
2010 *	page by mapping the page into virtual memory and using
2011 *	bcopy to copy the page, one machine dependent page at a
2012 *	time.
2013 */
2014void
2015pmap_copy_page(src, dst)
2016	vm_offset_t src;
2017	vm_offset_t dst;
2018{
2019	if (*(int *) CMAP1 || *(int *) CMAP2)
2020		panic("pmap_copy_page: CMAP busy");
2021
2022	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
2023	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
2024
2025#if __GNUC__ > 1
2026	memcpy(CADDR2, CADDR1, PAGE_SIZE);
2027#else
2028	bcopy(CADDR1, CADDR2, PAGE_SIZE);
2029#endif
2030	*(int *) CMAP1 = 0;
2031	*(int *) CMAP2 = 0;
2032	pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
2033}
2034
2035
2036/*
2037 *	Routine:	pmap_pageable
2038 *	Function:
2039 *		Make the specified pages (by pmap, offset)
2040 *		pageable (or not) as requested.
2041 *
2042 *		A page which is not pageable may not take
2043 *		a fault; therefore, its page table entry
2044 *		must remain valid for the duration.
2045 *
2046 *		This routine is merely advisory; pmap_enter
2047 *		will specify that these pages are to be wired
2048 *		down (or not) as appropriate.
2049 */
2050void
2051pmap_pageable(pmap, sva, eva, pageable)
2052	pmap_t pmap;
2053	vm_offset_t sva, eva;
2054	boolean_t pageable;
2055{
2056}
2057
2058/*
2059 * this routine returns true if a physical page resides
2060 * in the given pmap.
2061 */
2062boolean_t
2063pmap_page_exists(pmap, pa)
2064	pmap_t pmap;
2065	vm_offset_t pa;
2066{
2067	register pv_entry_t *ppv, pv;
2068	int s;
2069
2070	if (!pmap_is_managed(pa))
2071		return FALSE;
2072
2073	s = splvm();
2074
2075	ppv = pa_to_pvh(pa);
2076	/*
2077	 * Not found, check current mappings returning immediately if found.
2078	 */
2079	for (pv = *ppv; pv; pv = pv->pv_next) {
2080		if (pv->pv_pmap == pmap) {
2081			splx(s);
2082			return TRUE;
2083		}
2084	}
2085	splx(s);
2086	return (FALSE);
2087}
2088
2089/*
2090 * pmap_testbit tests bits in pte's
2091 * note that the testbit/changebit routines are inline,
2092 * and a lot of things compile-time evaluate.
2093 */
2094static __inline boolean_t
2095pmap_testbit(pa, bit)
2096	register vm_offset_t pa;
2097	int bit;
2098{
2099	register pv_entry_t *ppv, pv;
2100	unsigned *pte;
2101	int s;
2102
2103	if (!pmap_is_managed(pa))
2104		return FALSE;
2105
2106	s = splvm();
2107
2108	ppv = pa_to_pvh(pa);
2109	/*
2110	 * Not found, check current mappings returning immediately if found.
2111	 */
2112	for (pv = *ppv ;pv; pv = pv->pv_next) {
2113		/*
2114		 * if the bit being tested is the modified bit, then
2115		 * mark UPAGES as always modified, and ptes as never
2116		 * modified.
2117		 */
2118		if (bit & (PG_A|PG_M)) {
2119			if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) {
2120				continue;
2121			}
2122		}
2123		if (!pv->pv_pmap) {
2124#if defined(PMAP_DIAGNOSTIC)
2125			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
2126#endif
2127			continue;
2128		}
2129		pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2130		if (pte == NULL)
2131			continue;
2132		if ((int) *pte & bit) {
2133			splx(s);
2134			return TRUE;
2135		}
2136	}
2137	splx(s);
2138	return (FALSE);
2139}
2140
2141/*
2142 * this routine is used to modify bits in ptes
2143 */
2144static __inline void
2145pmap_changebit(pa, bit, setem)
2146	vm_offset_t pa;
2147	int bit;
2148	boolean_t setem;
2149{
2150	register pv_entry_t pv, *ppv;
2151	register unsigned *pte;
2152	vm_offset_t va;
2153	int changed;
2154	int s;
2155
2156	if (!pmap_is_managed(pa))
2157		return;
2158
2159	s = splvm();
2160
2161	changed = 0;
2162	ppv = pa_to_pvh(pa);
2163	/*
2164	 * Loop over all current mappings setting/clearing as appropos If
2165	 * setting RO do we need to clear the VAC?
2166	 */
2167	for ( pv = *ppv; pv; pv = pv->pv_next) {
2168		va = pv->pv_va;
2169
2170		/*
2171		 * don't write protect pager mappings
2172		 */
2173		if (!setem && (bit == PG_RW)) {
2174			if (va >= clean_sva && va < clean_eva)
2175				continue;
2176		}
2177		if (!pv->pv_pmap) {
2178#if defined(PMAP_DIAGNOSTIC)
2179			printf("Null pmap (cb) at va: 0x%lx\n", va);
2180#endif
2181			continue;
2182		}
2183
2184		pte = pmap_pte(pv->pv_pmap, va);
2185		if (pte == NULL)
2186			continue;
2187		if (setem) {
2188			*(int *)pte |= bit;
2189			changed = 1;
2190		} else {
2191			vm_offset_t pbits = *(vm_offset_t *)pte;
2192			if (pbits & bit)
2193				changed = 1;
2194			if (bit == PG_RW) {
2195				if (pbits & PG_M) {
2196					vm_page_t m;
2197					vm_offset_t pa = pbits & PG_FRAME;
2198					m = PHYS_TO_VM_PAGE(pa);
2199					m->dirty = VM_PAGE_BITS_ALL;
2200				}
2201				*(int *)pte = pbits & ~(PG_M|PG_RW);
2202			} else {
2203				*(int *)pte = pbits & ~bit;
2204			}
2205		}
2206	}
2207	splx(s);
2208	if (changed)
2209		pmap_update();
2210}
2211
2212/*
2213 *      pmap_page_protect:
2214 *
2215 *      Lower the permission for all mappings to a given page.
2216 */
2217void
2218pmap_page_protect(phys, prot)
2219	vm_offset_t phys;
2220	vm_prot_t prot;
2221{
2222	if ((prot & VM_PROT_WRITE) == 0) {
2223		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
2224			pmap_changebit(phys, PG_RW, FALSE);
2225		else {
2226			pmap_remove_all(phys);
2227			pmap_update();
2228		}
2229	}
2230}
2231
2232vm_offset_t
2233pmap_phys_address(ppn)
2234	int ppn;
2235{
2236	return (i386_ptob(ppn));
2237}
2238
2239/*
2240 *	pmap_is_referenced:
2241 *
2242 *	Return whether or not the specified physical page was referenced
2243 *	by any physical maps.
2244 */
2245boolean_t
2246pmap_is_referenced(vm_offset_t pa)
2247{
2248	return pmap_testbit((pa), PG_A);
2249}
2250
2251/*
2252 *	pmap_is_modified:
2253 *
2254 *	Return whether or not the specified physical page was modified
2255 *	in any physical maps.
2256 */
2257boolean_t
2258pmap_is_modified(vm_offset_t pa)
2259{
2260	return pmap_testbit((pa), PG_M);
2261}
2262
2263/*
2264 *	Clear the modify bits on the specified physical page.
2265 */
2266void
2267pmap_clear_modify(vm_offset_t pa)
2268{
2269	pmap_changebit((pa), PG_M, FALSE);
2270}
2271
2272/*
2273 *	pmap_clear_reference:
2274 *
2275 *	Clear the reference bit on the specified physical page.
2276 */
2277void
2278pmap_clear_reference(vm_offset_t pa)
2279{
2280	pmap_changebit((pa), PG_A, FALSE);
2281}
2282
2283/*
2284 * Miscellaneous support routines follow
2285 */
2286
2287static void
2288i386_protection_init()
2289{
2290	register int *kp, prot;
2291
2292	kp = protection_codes;
2293	for (prot = 0; prot < 8; prot++) {
2294		switch (prot) {
2295		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2296			/*
2297			 * Read access is also 0. There isn't any execute bit,
2298			 * so just make it readable.
2299			 */
2300		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2301		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2302		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2303			*kp++ = 0;
2304			break;
2305		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2306		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2307		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2308		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2309			*kp++ = PG_RW;
2310			break;
2311		}
2312	}
2313}
2314
2315/*
2316 * Map a set of physical memory pages into the kernel virtual
2317 * address space. Return a pointer to where it is mapped. This
2318 * routine is intended to be used for mapping device memory,
2319 * NOT real memory. The non-cacheable bits are set on each
2320 * mapped page.
2321 */
2322void *
2323pmap_mapdev(pa, size)
2324	vm_offset_t pa;
2325	vm_size_t size;
2326{
2327	vm_offset_t va, tmpva;
2328	unsigned *pte;
2329
2330	size = roundup(size, PAGE_SIZE);
2331
2332	va = kmem_alloc_pageable(kernel_map, size);
2333	if (!va)
2334		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2335
2336	pa = pa & PG_FRAME;
2337	for (tmpva = va; size > 0;) {
2338		pte = (unsigned *)vtopte(tmpva);
2339		*pte = pa | PG_RW | PG_V | PG_N;
2340		size -= PAGE_SIZE;
2341		tmpva += PAGE_SIZE;
2342		pa += PAGE_SIZE;
2343	}
2344	pmap_update();
2345
2346	return ((void *) va);
2347}
2348
2349int
2350pmap_mincore(pmap, addr)
2351	pmap_t pmap;
2352	vm_offset_t addr;
2353{
2354
2355	unsigned *ptep, pte;
2356	int val = 0;
2357
2358	ptep = pmap_pte(pmap, addr);
2359	if (ptep == 0) {
2360		return 0;
2361	}
2362
2363	if ((pte = *ptep)) {
2364		vm_offset_t pa;
2365		val = MINCORE_INCORE;
2366		pa = pte & PG_FRAME;
2367
2368		/*
2369		 * Modified by us
2370		 */
2371		if (pte & PG_M)
2372			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2373		/*
2374		 * Modified by someone
2375		 */
2376		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
2377			pmap_is_modified(pa))
2378			val |= MINCORE_MODIFIED_OTHER;
2379		/*
2380		 * Referenced by us
2381		 */
2382		if (pte & PG_U)
2383			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2384
2385		/*
2386		 * Referenced by someone
2387		 */
2388		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
2389			pmap_is_referenced(pa))
2390			val |= MINCORE_REFERENCED_OTHER;
2391	}
2392	return val;
2393}
2394
2395#if defined(PMAP_DEBUG)
2396pmap_pid_dump(int pid) {
2397	pmap_t pmap;
2398	struct proc *p;
2399	int npte = 0;
2400	int index;
2401	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
2402		if (p->p_pid != pid)
2403			continue;
2404
2405		if (p->p_vmspace) {
2406			int i,j;
2407			index = 0;
2408			pmap = &p->p_vmspace->vm_pmap;
2409			for(i=0;i<1024;i++) {
2410				pd_entry_t *pde;
2411				unsigned *pte;
2412				unsigned base = i << PDRSHIFT;
2413
2414				pde = &pmap->pm_pdir[i];
2415				if (pde && pmap_pde_v(pde)) {
2416					for(j=0;j<1024;j++) {
2417						unsigned va = base + (j << PAGE_SHIFT);
2418						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
2419							if (index) {
2420								index = 0;
2421								printf("\n");
2422							}
2423							return npte;
2424						}
2425						pte = pmap_pte( pmap, va);
2426						if (pte && pmap_pte_v(pte)) {
2427							vm_offset_t pa;
2428							vm_page_t m;
2429							pa = *(int *)pte;
2430							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
2431							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
2432								va, pa, m->hold_count, m->wire_count, m->flags);
2433							npte++;
2434							index++;
2435							if (index >= 2) {
2436								index = 0;
2437								printf("\n");
2438							} else {
2439								printf(" ");
2440							}
2441						}
2442					}
2443				}
2444			}
2445		}
2446	}
2447	return npte;
2448}
2449#endif
2450
2451#if defined(DEBUG)
2452
2453static void	pads __P((pmap_t pm));
2454static void	pmap_pvdump __P((vm_offset_t pa));
2455
2456/* print address space of pmap*/
2457static void
2458pads(pm)
2459	pmap_t pm;
2460{
2461	unsigned va, i, j;
2462	unsigned *ptep;
2463
2464	if (pm == kernel_pmap)
2465		return;
2466	for (i = 0; i < 1024; i++)
2467		if (pm->pm_pdir[i])
2468			for (j = 0; j < 1024; j++) {
2469				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
2470				if (pm == kernel_pmap && va < KERNBASE)
2471					continue;
2472				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
2473					continue;
2474				ptep = pmap_pte(pm, va);
2475				if (pmap_pte_v(ptep))
2476					printf("%x:%x ", va, *(int *) ptep);
2477			};
2478
2479}
2480
2481static void
2482pmap_pvdump(pa)
2483	vm_offset_t pa;
2484{
2485	register pv_entry_t pv;
2486
2487	printf("pa %x", pa);
2488	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
2489#ifdef used_to_be
2490		printf(" -> pmap %x, va %x, flags %x",
2491		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
2492#endif
2493		printf(" -> pmap %x, va %x",
2494		    pv->pv_pmap, pv->pv_va);
2495		pads(pv->pv_pmap);
2496	}
2497	printf(" ");
2498}
2499#endif
2500
2501
2502