pmap.c revision 1262
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
42 *	$Id: pmap.c,v 1.20 1994/03/07 11:38:34 davidg Exp $
43 */
44
45/*
46 * Derived from hp300 version by Mike Hibler, this version by William
47 * Jolitz uses a recursive map [a pde points to the page directory] to
48 * map the page tables using the pagetables themselves. This is done to
49 * reduce the impact on kernel virtual memory for lots of sparse address
50 * space, and to reduce the cost of memory to each process.
51 *
52 *	Derived from: hp300/@(#)pmap.c	7.1 (Berkeley) 12/5/90
53 */
54/*
55 * Major modifications by John S. Dyson primarily to support
56 * pageable page tables, eliminating pmap_attributes,
57 * discontiguous memory pages, and using more efficient string
58 * instructions. Jan 13, 1994.  Further modifications on Mar 2, 1994,
59 * general clean-up and efficiency mods.
60 */
61
62/*
63 *	Manages physical address maps.
64 *
65 *	In addition to hardware address maps, this
66 *	module is called upon to provide software-use-only
67 *	maps which may or may not be stored in the same
68 *	form as hardware maps.  These pseudo-maps are
69 *	used to store intermediate results from copy
70 *	operations to and from address spaces.
71 *
72 *	Since the information managed by this module is
73 *	also stored by the logical address mapping module,
74 *	this module may throw away valid virtual-to-physical
75 *	mappings at almost any time.  However, invalidations
76 *	of virtual-to-physical mappings must be done as
77 *	requested.
78 *
79 *	In order to cope with hardware architectures which
80 *	make virtual-to-physical map invalidates expensive,
81 *	this module may delay invalidate or reduced protection
82 *	operations until such time as they are actually
83 *	necessary.  This module is given full information as
84 *	to which processors are currently using which maps,
85 *	and to when physical maps must be made correct.
86 */
87
88#include "param.h"
89#include "systm.h"
90#include "proc.h"
91#include "malloc.h"
92#include "user.h"
93#include "i386/include/cpufunc.h"
94
95#include "vm/vm.h"
96#include "vm/vm_kern.h"
97#include "vm/vm_page.h"
98
99#include "i386/isa/isa.h"
100
101/*
102 * Allocate various and sundry SYSMAPs used in the days of old VM
103 * and not yet converted.  XXX.
104 */
105#define BSDVM_COMPAT	1
106
107/*
108 * Get PDEs and PTEs for user/kernel address space
109 */
110#define	pmap_pde(m, v)	(&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023]))
111#define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023])
112
113#define pmap_pte_pa(pte)	(*(int *)(pte) & PG_FRAME)
114
115#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
116#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
117#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
118#define pmap_pte_u(pte)		((*(int *)pte & PG_U) != 0)
119#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
120
121#define pmap_pte_set_w(pte, v)		((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
122#define pmap_pte_set_prot(pte, v)	((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
123
124/*
125 * Given a map and a machine independent protection code,
126 * convert to a vax protection code.
127 */
128#define pte_prot(m, p)	(protection_codes[p])
129int	protection_codes[8];
130
131struct pmap	kernel_pmap_store;
132pmap_t		kernel_pmap;
133
134vm_offset_t	phys_avail[6];	/* 2 entries + 1 null */
135vm_offset_t    	avail_start;	/* PA of first available physical page */
136vm_offset_t	avail_end;	/* PA of last available physical page */
137vm_size_t	mem_size;	/* memory size in bytes */
138vm_offset_t	virtual_avail;  /* VA of first avail page (after kernel bss)*/
139vm_offset_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
140int		i386pagesperpage;	/* PAGE_SIZE / I386_PAGE_SIZE */
141boolean_t	pmap_initialized = FALSE;	/* Has pmap_init completed? */
142vm_offset_t	vm_first_phys, vm_last_phys;
143
144static inline boolean_t		pmap_testbit();
145static inline void		pmap_changebit();
146static inline int		pmap_is_managed();
147static inline void		*vm_get_pmap();
148static inline void		vm_put_pmap();
149inline void			pmap_use_pt();
150inline void			pmap_unuse_pt();
151inline pt_entry_t * const	pmap_pte();
152static inline pv_entry_t	get_pv_entry();
153void				pmap_alloc_pv_entry();
154void				pmap_clear_modify();
155void				i386_protection_init();
156
157#if BSDVM_COMPAT
158#include "msgbuf.h"
159
160/*
161 * All those kernel PT submaps that BSD is so fond of
162 */
163pt_entry_t *CMAP1, *CMAP2, *mmap;
164caddr_t		CADDR1, CADDR2, vmmap;
165pt_entry_t *msgbufmap;
166struct msgbuf	*msgbufp;
167#endif
168
169void init_pv_entries(int) ;
170
171/*
172 *	Routine:	pmap_pte
173 *	Function:
174 *		Extract the page table entry associated
175 *		with the given map/virtual_address pair.
176 * [ what about induced faults -wfj]
177 */
178
179inline pt_entry_t *
180const pmap_pte(pmap, va)
181	register pmap_t	pmap;
182	vm_offset_t va;
183{
184
185	if (pmap && *pmap_pde(pmap, va)) {
186		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
187		/* are we current address space or kernel? */
188		if ( (pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME)))
189			return ((pt_entry_t *) vtopte(va));
190		/* otherwise, we are alternate address space */
191		else {
192			if ( frame != ((int) APTDpde & PG_FRAME) ) {
193				APTDpde = pmap->pm_pdir[PTDPTDI];
194				tlbflush();
195			}
196			return((pt_entry_t *) avtopte(va));
197		}
198	}
199	return(0);
200}
201
202/*
203 *	Routine:	pmap_extract
204 *	Function:
205 *		Extract the physical page address associated
206 *		with the given map/virtual_address pair.
207 */
208
209vm_offset_t
210pmap_extract(pmap, va)
211	register pmap_t	pmap;
212	vm_offset_t va;
213{
214	pd_entry_t save;
215	vm_offset_t pa;
216	int s;
217
218	if (pmap && *pmap_pde(pmap, va)) {
219		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
220		/* are we current address space or kernel? */
221		if ( (pmap == kernel_pmap)
222			|| (frame == ((int) PTDpde & PG_FRAME)) ) {
223			pa = *(int *) vtopte(va);
224		/* otherwise, we are alternate address space */
225		} else {
226			if ( frame != ((int) APTDpde & PG_FRAME)) {
227				APTDpde = pmap->pm_pdir[PTDPTDI];
228				tlbflush();
229			}
230			pa = *(int *) avtopte(va);
231		}
232		pa = (pa & PG_FRAME) | (va & ~PG_FRAME);
233		return pa;
234	}
235	return 0;
236
237}
238
239/*
240 * determine if a page is managed (memory vs. device)
241 */
242static inline int
243pmap_is_managed(pa)
244	vm_offset_t pa;
245{
246	int i;
247
248	if (!pmap_initialized)
249		return 0;
250
251	for (i = 0; phys_avail[i + 1]; i += 2) {
252		if (pa >= phys_avail[i] && pa < phys_avail[i + 1])
253			return 1;
254	}
255	return 0;
256}
257
258/*
259 * find the vm_page_t of a pte (only) given va of pte and pmap
260 */
261inline vm_page_t
262pmap_pte_vm_page(pmap, pt)
263	pmap_t pmap;
264	vm_offset_t pt;
265{
266	pt = i386_trunc_page( pt);
267	pt = (pt - UPT_MIN_ADDRESS) / NBPG;
268	pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME;
269	return PHYS_TO_VM_PAGE(pt);
270}
271
272/*
273 * Wire a page table page
274 */
275inline void
276pmap_use_pt(pmap, va)
277	pmap_t pmap;
278	vm_offset_t va;
279{
280	vm_offset_t pt;
281
282	if (va >= VM_MAX_ADDRESS || !pmap_initialized)
283		return;
284
285	pt = (vm_offset_t) vtopte(va);
286	/* vm_page_wire( pmap_pte_vm_page(pmap, pt)); */
287	vm_page_hold( pmap_pte_vm_page(pmap, pt));
288}
289
290/*
291 * Unwire a page table page
292 */
293inline void
294pmap_unuse_pt(pmap, va)
295	pmap_t pmap;
296	vm_offset_t va;
297{
298	vm_offset_t pt;
299
300	if (va >= VM_MAX_ADDRESS || !pmap_initialized)
301		return;
302
303	pt = (vm_offset_t) vtopte(va);
304/*	vm_page_unwire( pmap_pte_vm_page(pmap, pt)); */
305	vm_page_unhold( pmap_pte_vm_page(pmap, pt));
306}
307
308/* [ macro again?, should I force kstack into user map here? -wfj ] */
309void
310pmap_activate(pmap, pcbp)
311	register pmap_t pmap;
312	struct pcb *pcbp;
313{
314	PMAP_ACTIVATE(pmap, pcbp);
315}
316
317/*
318 *	Bootstrap the system enough to run with virtual memory.
319 *	Map the kernel's code and data, and allocate the system page table.
320 *
321 *	On the I386 this is called after mapping has already been enabled
322 *	and just syncs the pmap module with what has already been done.
323 *	[We can't call it easily with mapping off since the kernel is not
324 *	mapped with PA == VA, hence we would have to relocate every address
325 *	from the linked base (virtual) address "KERNBASE" to the actual
326 *	(physical) address starting relative to 0]
327 */
328
329#define DMAPAGES 8
330void
331pmap_bootstrap(firstaddr, loadaddr)
332	vm_offset_t firstaddr;
333	vm_offset_t loadaddr;
334{
335#if BSDVM_COMPAT
336	vm_offset_t va;
337	pt_entry_t *pte;
338#endif
339	extern int IdlePTD;
340
341	avail_start = firstaddr + DMAPAGES*NBPG;
342
343	virtual_avail = (vm_offset_t) KERNBASE + avail_start;
344	virtual_end = VM_MAX_KERNEL_ADDRESS;
345	i386pagesperpage = PAGE_SIZE / NBPG;
346
347	/*
348	 * Initialize protection array.
349	 */
350	i386_protection_init();
351
352	/*
353	 * The kernel's pmap is statically allocated so we don't
354	 * have to use pmap_create, which is unlikely to work
355	 * correctly at this part of the boot sequence.
356	 */
357	kernel_pmap = &kernel_pmap_store;
358
359	kernel_pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD);
360
361	simple_lock_init(&kernel_pmap->pm_lock);
362	kernel_pmap->pm_count = 1;
363
364#if BSDVM_COMPAT
365	/*
366	 * Allocate all the submaps we need
367	 */
368#define	SYSMAP(c, p, v, n)	\
369	v = (c)va; va += ((n)*NBPG); p = pte; pte += (n);
370
371	va = virtual_avail;
372	pte = pmap_pte(kernel_pmap, va);
373
374	SYSMAP(caddr_t		,CMAP1		,CADDR1	   ,1		)
375	SYSMAP(caddr_t		,CMAP2		,CADDR2	   ,1		)
376	SYSMAP(caddr_t		,mmap		,vmmap	   ,1		)
377	SYSMAP(struct msgbuf *	,msgbufmap	,msgbufp   ,1		)
378	virtual_avail = va;
379#endif
380	/*
381	 * reserve special hunk of memory for use by bus dma as a bounce
382	 * buffer (contiguous virtual *and* physical memory). for now,
383	 * assume vm does not use memory beneath hole, and we know that
384	 * the bootstrap uses top 32k of base memory. -wfj
385	 */
386	{
387		extern vm_offset_t isaphysmem;
388		isaphysmem = va;
389
390		virtual_avail = pmap_map(va, firstaddr,
391				firstaddr + DMAPAGES*NBPG, VM_PROT_ALL);
392	}
393
394	*(int *)PTD = 0;
395	tlbflush();
396
397}
398
399/*
400 *	Initialize the pmap module.
401 *	Called by vm_init, to initialize any structures that the pmap
402 *	system needs to map virtual memory.
403 *	pmap_init has been enhanced to support in a fairly consistant
404 *	way, discontiguous physical memory.
405 */
406void
407pmap_init(phys_start, phys_end)
408	vm_offset_t	phys_start, phys_end;
409{
410	vm_offset_t	addr, addr2;
411	vm_size_t	npg, s;
412	int		rv;
413	int i;
414	extern int KPTphys;
415	extern int IdlePTD;
416
417	/*
418	 * Now that kernel map has been allocated, we can mark as
419	 * unavailable regions which we have mapped in locore.
420	 */
421	addr = atdevbase;
422	(void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0,
423			   &addr, (0x100000-0xa0000), FALSE);
424
425	addr = (vm_offset_t) KERNBASE + IdlePTD;
426	vm_object_reference(kernel_object);
427	(void) vm_map_find(kernel_map, kernel_object, addr,
428			   &addr, (4 + NKPT) * NBPG, FALSE);
429
430
431	/*
432	 * calculate the number of pv_entries needed
433	 */
434	vm_first_phys = phys_avail[0];
435	for (i = 0; phys_avail[i + 1]; i += 2) ;
436	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG;
437
438	/*
439	 * Allocate memory for random pmap data structures.  Includes the
440	 * pv_head_table.
441	 */
442	s = (vm_size_t) (sizeof(struct pv_entry) * npg);
443	s = i386_round_page(s);
444	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
445	pv_table = (pv_entry_t) addr;
446
447	/*
448	 * init the pv free list
449	 */
450	init_pv_entries(npg);
451	/*
452	 * Now it is safe to enable pv_table recording.
453	 */
454	pmap_initialized = TRUE;
455}
456
457/*
458 *	Used to map a range of physical addresses into kernel
459 *	virtual address space.
460 *
461 *	For now, VM is already on, we only need to map the
462 *	specified memory.
463 */
464vm_offset_t
465pmap_map(virt, start, end, prot)
466	vm_offset_t	virt;
467	vm_offset_t	start;
468	vm_offset_t	end;
469	int		prot;
470{
471	while (start < end) {
472		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
473		virt += PAGE_SIZE;
474		start += PAGE_SIZE;
475	}
476	return(virt);
477}
478
479/*
480 *	Create and return a physical map.
481 *
482 *	If the size specified for the map
483 *	is zero, the map is an actual physical
484 *	map, and may be referenced by the
485 *	hardware.
486 *
487 *	If the size specified is non-zero,
488 *	the map will be used in software only, and
489 *	is bounded by that size.
490 *
491 * [ just allocate a ptd and mark it uninitialize -- should we track
492 *   with a table which process has which ptd? -wfj ]
493 */
494
495pmap_t
496pmap_create(size)
497	vm_size_t	size;
498{
499	register pmap_t pmap;
500
501	/*
502	 * Software use map does not need a pmap
503	 */
504	if (size)
505		return(NULL);
506
507	pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
508	bzero(pmap, sizeof(*pmap));
509	pmap_pinit(pmap);
510	return (pmap);
511}
512
513
514struct pmaplist {
515	struct pmaplist *next;
516};
517
518static inline void *
519vm_get_pmap()
520{
521	struct pmaplist *rtval;
522
523	rtval = (struct pmaplist *)kmem_alloc(kernel_map, ctob(1));
524	bzero(rtval, ctob(1));
525	return rtval;
526}
527
528static inline void
529vm_put_pmap(up)
530	struct pmaplist *up;
531{
532	kmem_free(kernel_map, up, ctob(1));
533}
534
535/*
536 * Initialize a preallocated and zeroed pmap structure,
537 * such as one in a vmspace structure.
538 */
539void
540pmap_pinit(pmap)
541	register struct pmap *pmap;
542{
543	/*
544	 * No need to allocate page table space yet but we do need a
545	 * valid page directory table.
546	 */
547	pmap->pm_pdir = (pd_entry_t *) vm_get_pmap();
548
549	/* wire in kernel global address entries */
550	bcopy(PTD+KPTDI, pmap->pm_pdir+KPTDI, NKPT*PTESIZE);
551
552	/* install self-referential address mapping entry */
553	*(int *)(pmap->pm_pdir+PTDPTDI) =
554		((int)pmap_extract(kernel_pmap, (vm_offset_t)pmap->pm_pdir)) | PG_V | PG_KW;
555
556	pmap->pm_count = 1;
557	simple_lock_init(&pmap->pm_lock);
558}
559
560/*
561 *	Retire the given physical map from service.
562 *	Should only be called if the map contains
563 *	no valid mappings.
564 */
565void
566pmap_destroy(pmap)
567	register pmap_t pmap;
568{
569	int count;
570
571	if (pmap == NULL)
572		return;
573
574	simple_lock(&pmap->pm_lock);
575	count = --pmap->pm_count;
576	simple_unlock(&pmap->pm_lock);
577	if (count == 0) {
578		pmap_release(pmap);
579		free((caddr_t)pmap, M_VMPMAP);
580	}
581}
582
583/*
584 * Release any resources held by the given physical map.
585 * Called when a pmap initialized by pmap_pinit is being released.
586 * Should only be called if the map contains no valid mappings.
587 */
588void
589pmap_release(pmap)
590	register struct pmap *pmap;
591{
592	vm_put_pmap((struct pmaplist *) pmap->pm_pdir);
593}
594
595/*
596 *	Add a reference to the specified pmap.
597 */
598void
599pmap_reference(pmap)
600	pmap_t	pmap;
601{
602	if (pmap != NULL) {
603		simple_lock(&pmap->pm_lock);
604		pmap->pm_count++;
605		simple_unlock(&pmap->pm_lock);
606	}
607}
608
609#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2)
610
611/*
612 * Data for the pv entry allocation mechanism
613 */
614int pv_freelistcnt;
615pv_entry_t pv_freelist;
616vm_offset_t pvva;
617int npvvapg;
618
619/*
620 * free the pv_entry back to the free list
621 */
622inline static void
623free_pv_entry(pv)
624	pv_entry_t pv;
625{
626	if (!pv) return;
627	++pv_freelistcnt;
628	pv->pv_next = pv_freelist;
629	pv_freelist = pv;
630}
631
632/*
633 * get a new pv_entry, allocating a block from the system
634 * when needed.
635 * the memory allocation is performed bypassing the malloc code
636 * because of the possibility of allocations at interrupt time.
637 */
638static inline pv_entry_t
639get_pv_entry()
640{
641	pv_entry_t tmp;
642
643	/*
644	 * get more pv_entry pages if needed
645	 */
646	while (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
647		pmap_alloc_pv_entry();
648	}
649
650	/*
651	 * get a pv_entry off of the free list
652	 */
653	--pv_freelistcnt;
654	tmp = pv_freelist;
655	pv_freelist = tmp->pv_next;
656	tmp->pv_pmap = 0;
657	tmp->pv_va = 0;
658	tmp->pv_next = 0;
659	return tmp;
660}
661
662/*
663 * this *strange* allocation routine *statistically* eliminates the
664 * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure.
665 * also -- this code is MUCH MUCH faster than the malloc equiv...
666 */
667void
668pmap_alloc_pv_entry()
669{
670	/*
671	 * do we have any pre-allocated map-pages left?
672	 */
673	if (npvvapg) {
674		vm_page_t m;
675		/*
676		 * we do this to keep recursion away
677		 */
678		pv_freelistcnt += PV_FREELIST_MIN;
679		/*
680		 * allocate a physical page out of the vm system
681		 */
682		if (m = vm_page_alloc(kernel_object, pvva-vm_map_min(kernel_map))) {
683			int newentries;
684			int i;
685			pv_entry_t entry;
686			newentries = (NBPG/sizeof (struct pv_entry));
687			/*
688			 * wire the page
689			 */
690			vm_page_wire(m);
691			m->flags &= ~PG_BUSY;
692			/*
693			 * let the kernel see it
694			 */
695			pmap_enter(vm_map_pmap(kernel_map), pvva,
696				VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT,1);
697
698			entry = (pv_entry_t) pvva;
699			/*
700			 * update the allocation pointers
701			 */
702			pvva += NBPG;
703			--npvvapg;
704
705			/*
706			 * free the entries into the free list
707			 */
708			for (i = 0; i < newentries; i++) {
709				free_pv_entry(entry);
710				entry++;
711			}
712		}
713		pv_freelistcnt -= PV_FREELIST_MIN;
714	}
715	if (!pv_freelist)
716		panic("get_pv_entry: cannot get a pv_entry_t");
717}
718
719
720
721/*
722 * init the pv_entry allocation system
723 */
724#define PVSPERPAGE 16
725void
726init_pv_entries(npg)
727	int npg;
728{
729	/*
730	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
731	 * kvm space is fairly cheap, be generous!!!  (the system can panic
732	 * if this is too small.)
733	 */
734	npvvapg = ((npg*PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1)/NBPG;
735	pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG);
736	/*
737	 * get the first batch of entries
738	 */
739	free_pv_entry(get_pv_entry());
740}
741
742static pt_entry_t *
743get_pt_entry(pmap)
744	pmap_t pmap;
745{
746	pt_entry_t *ptp;
747	vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
748	/* are we current address space or kernel? */
749	if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) {
750		ptp=PTmap;
751	/* otherwise, we are alternate address space */
752	} else {
753		if ( frame != ((int) APTDpde & PG_FRAME)) {
754			APTDpde = pmap->pm_pdir[PTDPTDI];
755			tlbflush();
756		}
757		ptp=APTmap;
758	     }
759	return ptp;
760}
761
762/*
763 * If it is the first entry on the list, it is actually
764 * in the header and we must copy the following entry up
765 * to the header.  Otherwise we must search the list for
766 * the entry.  In either case we free the now unused entry.
767 */
768void
769pmap_remove_entry(pmap, pv, va)
770	struct pmap *pmap;
771	pv_entry_t pv;
772	vm_offset_t va;
773{
774	pv_entry_t npv;
775	int wired;
776	disable_intr();
777	if (pmap == pv->pv_pmap && va == pv->pv_va) {
778		npv = pv->pv_next;
779		if (npv) {
780			*pv = *npv;
781			free_pv_entry(npv);
782		} else {
783			pv->pv_pmap = NULL;
784		}
785	} else {
786		for (npv = pv->pv_next; npv; npv = npv->pv_next) {
787			if (pmap == npv->pv_pmap && va == npv->pv_va) {
788				break;
789			}
790			pv = npv;
791		}
792		if (npv) {
793			pv->pv_next = npv->pv_next;
794			free_pv_entry(npv);
795		}
796	}
797	enable_intr();
798}
799
800/*
801 *	Remove the given range of addresses from the specified map.
802 *
803 *	It is assumed that the start and end are properly
804 *	rounded to the page size.
805 */
806void
807pmap_remove(pmap, sva, eva)
808	struct pmap *pmap;
809	register vm_offset_t sva;
810	register vm_offset_t eva;
811{
812	register pt_entry_t *ptp,*ptq;
813	vm_offset_t pa;
814	register pv_entry_t pv;
815	vm_offset_t va;
816	vm_page_t m;
817	pt_entry_t oldpte;
818	int reqactivate = 0;
819
820	if (pmap == NULL)
821		return;
822
823	ptp = get_pt_entry(pmap);
824
825/*
826 * special handling of removing one page.  a very
827 * common operation and easy to short circuit some
828 * code.
829 */
830	if( (sva + NBPG) == eva) {
831
832		if( *pmap_pde( pmap, sva) == 0)
833			return;
834
835		ptq = ptp + i386_btop(sva);
836
837		if( !*ptq)
838			return;
839		/*
840		 * Update statistics
841		 */
842		if (pmap_pte_w(ptq))
843			pmap->pm_stats.wired_count--;
844		pmap->pm_stats.resident_count--;
845
846		pa = pmap_pte_pa(ptq);
847		oldpte = *ptq;
848		*ptq = 0;
849
850		if (pmap_is_managed(pa)) {
851			if ((((int) oldpte & PG_M) && (sva < USRSTACK || sva > UPT_MAX_ADDRESS))
852				|| (sva >= USRSTACK && sva < USRSTACK+(UPAGES*NBPG))) {
853				m = PHYS_TO_VM_PAGE(pa);
854				m->flags &= ~PG_CLEAN;
855			}
856
857			pv = pa_to_pvh(pa);
858			pmap_remove_entry(pmap, pv, sva);
859			pmap_unuse_pt(pmap, sva);
860		}
861		/*
862		 * Pageout daemon is the process that calls pmap_remove
863		 * most often when the page is not owned by the current
864		 * process. there are slightly more accurate checks, but
865		 * they are not nearly as fast.
866		 */
867		if( (curproc != pageproc) || (pmap == kernel_pmap))
868			tlbflush();
869		return;
870	}
871
872	sva = i386_btop(sva);
873	eva = i386_btop(eva);
874
875	while (sva < eva) {
876		/*
877		 * Weed out invalid mappings.
878		 * Note: we assume that the page directory table is
879	 	 * always allocated, and in kernel virtual.
880		 */
881
882		if ( *pmap_pde(pmap, i386_ptob(sva)) == 0 ) {
883			/* We can race ahead here, straight to next pde.. */
884	nextpde:
885			sva = ((sva + NPTEPG) & ~(NPTEPG - 1));
886			continue;
887		}
888
889		ptq = ptp + sva;
890
891		/*
892		 * search for page table entries, use string operations
893		 * that are much faster than
894		 * explicitly scanning when page tables are not fully
895		 * populated.
896		 */
897		if ( *ptq == 0) {
898			vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1));
899			vm_offset_t nscan = pdnxt - sva;
900			int found = 0;
901
902			if ((nscan + sva) > eva)
903				nscan = eva - sva;
904
905			asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;"
906				:"=D"(ptq),"=a"(found)
907				:"c"(nscan),"0"(ptq)
908				:"cx");
909
910			if( !found) {
911				sva = pdnxt;
912				continue;
913			}
914			ptq -= 1;
915
916			sva = ptq - ptp;
917		}
918
919		/*
920		 * Update statistics
921		 */
922		oldpte = *ptq;
923		if (((int)oldpte) & PG_W)
924			pmap->pm_stats.wired_count--;
925		pmap->pm_stats.resident_count--;
926
927		/*
928		 * Invalidate the PTEs.
929		 * XXX: should cluster them up and invalidate as many
930		 * as possible at once.
931		 */
932		*ptq = 0;
933
934		/*
935		 * Remove from the PV table (raise IPL since we
936		 * may be called at interrupt time).
937		 */
938		pa = ((int)oldpte) & PG_FRAME;
939		if (!pmap_is_managed(pa)) {
940			++sva;
941			continue;
942		}
943
944		va = i386_ptob(sva);
945
946		if ((((int) oldpte & PG_M) && (va < USRSTACK || va > UPT_MAX_ADDRESS))
947			|| (va >= USRSTACK && va < USRSTACK+(UPAGES*NBPG))) {
948			m = PHYS_TO_VM_PAGE(pa);
949			m->flags &= ~PG_CLEAN;
950		}
951
952		pv = pa_to_pvh(pa);
953		pmap_remove_entry(pmap, pv, va);
954		pmap_unuse_pt(pmap, va);
955		++sva;
956		reqactivate = 1;
957	}
958endofloop:
959	/*
960	 * only call tlbflush if the pmap has changed and the tlb
961	 * *really* needs to be updated.
962	 */
963	if( reqactivate &&
964		(curproc != pageproc) || (pmap == kernel_pmap))
965		tlbflush();
966}
967
968/*
969 *	Routine:	pmap_remove_all
970 *	Function:
971 *		Removes this physical page from
972 *		all physical maps in which it resides.
973 *		Reflects back modify bits to the pager.
974 *
975 *	Notes:
976 *		Original versions of this routine were very
977 *		inefficient because they iteratively called
978 *		pmap_remove (slow...)
979 */
980void
981pmap_remove_all(pa)
982	vm_offset_t pa;
983{
984	register pv_entry_t pv, npv;
985	register pt_entry_t *pte, *ptp;
986	vm_offset_t va;
987	struct pmap *pmap;
988	struct map *map;
989	vm_page_t m;
990	int rqactivate = 0;
991	int s;
992
993	/*
994	 * Not one of ours
995	 */
996	if (!pmap_is_managed(pa))
997		return;
998
999	pa = i386_trunc_page(pa);
1000	pv = pa_to_pvh(pa);
1001	m = PHYS_TO_VM_PAGE(pa);
1002
1003	s = splimp();
1004	while (pv->pv_pmap != NULL) {
1005		pmap = pv->pv_pmap;
1006		ptp = get_pt_entry(pmap);
1007		va = i386_btop(pv->pv_va);
1008		pte = ptp + va;
1009		if (pmap_pte_w(pte))
1010			pmap->pm_stats.wired_count--;
1011		if ( *pte)
1012			pmap->pm_stats.resident_count--;
1013
1014		/*
1015		 * update the vm_page_t clean bit
1016		 */
1017		if ( (m->flags & PG_CLEAN) &&
1018			((((int) *pte) & PG_M) && (pv->pv_va < USRSTACK || pv->pv_va > UPT_MAX_ADDRESS))
1019			|| (pv->pv_va >= USRSTACK && pv->pv_va < USRSTACK+(UPAGES*NBPG))) {
1020			m->flags &= ~PG_CLEAN;
1021		}
1022
1023		*pte = 0;
1024		pmap_unuse_pt(pmap, pv->pv_va);
1025
1026		npv = pv->pv_next;
1027		if (npv) {
1028			*pv = *npv;
1029			free_pv_entry(npv);
1030		} else {
1031			pv->pv_pmap = NULL;
1032		}
1033		if( (curproc != pageproc) || (pmap == kernel_pmap))
1034			rqactivate = 1;
1035	}
1036	splx(s);
1037
1038	if( rqactivate)
1039		tlbflush();
1040}
1041
1042
1043/*
1044 *	Set the physical protection on the
1045 *	specified range of this map as requested.
1046 */
1047void
1048pmap_protect(pmap, sva, eva, prot)
1049	register pmap_t	pmap;
1050	vm_offset_t	sva, eva;
1051	vm_prot_t	prot;
1052{
1053	register pt_entry_t *pte;
1054	register vm_offset_t va;
1055	int i386prot;
1056	register pt_entry_t *ptp;
1057	int reqactivate = 0;
1058	int evap = i386_btop(eva);
1059	int s;
1060
1061	if (pmap == NULL)
1062		return;
1063
1064	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1065		pmap_remove(pmap, sva, eva);
1066		return;
1067	}
1068	if (prot & VM_PROT_WRITE)
1069		return;
1070
1071	ptp = get_pt_entry(pmap);
1072
1073	va = sva;
1074	while (va < eva) {
1075		int found=0;
1076		int svap;
1077		vm_offset_t nscan;
1078		/*
1079		 * Page table page is not allocated.
1080		 * Skip it, we don't want to force allocation
1081		 * of unnecessary PTE pages just to set the protection.
1082		 */
1083		if (! *pmap_pde(pmap, va)) {
1084			/* XXX: avoid address wrap around */
1085nextpde:
1086			if (va >= i386_trunc_pdr((vm_offset_t)-1))
1087				break;
1088			va = i386_round_pdr(va + PAGE_SIZE);
1089			continue;
1090		}
1091
1092		pte = ptp + i386_btop(va);
1093
1094		if( *pte == 0) {
1095		/*
1096		 * scan for a non-empty pte
1097		 */
1098			svap = pte - ptp;
1099			nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap;
1100
1101			if (nscan + svap > evap)
1102				nscan = evap - svap;
1103
1104			found = 0;
1105			if (nscan)
1106				asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;"
1107					:"=D"(pte),"=a"(found)
1108					:"c"(nscan),"0"(pte):"cx");
1109
1110			if( !found)
1111				goto nextpde;
1112
1113			pte -= 1;
1114			svap = pte - ptp;
1115
1116			va = i386_ptob(svap);
1117		}
1118
1119		i386prot = pte_prot(pmap, prot);
1120		if (va < UPT_MAX_ADDRESS) {
1121			i386prot |= PG_u;
1122			if( va >= UPT_MIN_ADDRESS)
1123				i386prot |= PG_RW;
1124		}
1125		if (i386prot != ( (int) *pte & PG_PROT)) {
1126			reqactivate = 1;
1127			pmap_pte_set_prot(pte, i386prot);
1128		}
1129		va += PAGE_SIZE;
1130	}
1131endofloop:
1132	/*
1133	 * only if pte changed
1134	 */
1135	if( reqactivate)
1136		tlbflush();
1137}
1138
1139/*
1140 *	Insert the given physical page (p) at
1141 *	the specified virtual address (v) in the
1142 *	target physical map with the protection requested.
1143 *
1144 *	If specified, the page will be wired down, meaning
1145 *	that the related pte can not be reclaimed.
1146 *
1147 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1148 *	or lose information.  That is, this routine must actually
1149 *	insert this page into the given map NOW.
1150 */
1151void
1152pmap_enter(pmap, va, pa, prot, wired)
1153	register pmap_t pmap;
1154	vm_offset_t va;
1155	register vm_offset_t pa;
1156	vm_prot_t prot;
1157	boolean_t wired;
1158{
1159	register pt_entry_t *pte;
1160	register pt_entry_t npte;
1161	vm_offset_t opa;
1162	int cacheable=1;
1163
1164	if (pmap == NULL)
1165		return;
1166
1167	va = i386_trunc_page(va);
1168	pa = i386_trunc_page(pa);
1169	if (va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig");
1170
1171	/*
1172	 * Page Directory table entry not valid, we need a new PT page
1173	 */
1174	if ( *pmap_pde(pmap, va) == 0) {
1175		pg("ptdi %x, va %x", pmap->pm_pdir[PTDPTDI], va);
1176	}
1177
1178	pte = pmap_pte(pmap, va);
1179	opa = pmap_pte_pa(pte);
1180
1181	/*
1182	 * Mapping has not changed, must be protection or wiring change.
1183	 */
1184	if (opa == pa) {
1185		/*
1186		 * Wiring change, just update stats.
1187		 * We don't worry about wiring PT pages as they remain
1188		 * resident as long as there are valid mappings in them.
1189		 * Hence, if a user page is wired, the PT page will be also.
1190		 */
1191		if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) {
1192			if (wired)
1193				pmap->pm_stats.wired_count++;
1194			else
1195				pmap->pm_stats.wired_count--;
1196		}
1197		goto validate;
1198	}
1199
1200	/*
1201	 * Mapping has changed, invalidate old range and fall through to
1202	 * handle validating new mapping.
1203	 */
1204	if (opa) {
1205		pmap_remove(pmap, va, va + PAGE_SIZE);
1206	}
1207
1208	/*
1209	 * Enter on the PV list if part of our managed memory
1210	 * Note that we raise IPL while manipulating pv_table
1211	 * since pmap_enter can be called at interrupt time.
1212	 */
1213	if (pmap_is_managed(pa)) {
1214		register pv_entry_t pv, npv;
1215		int s;
1216
1217		pv = pa_to_pvh(pa);
1218		s = splimp();
1219		/*
1220		 * No entries yet, use header as the first entry
1221		 */
1222		if (pv->pv_pmap == NULL) {
1223			pv->pv_va = va;
1224			pv->pv_pmap = pmap;
1225			pv->pv_next = NULL;
1226		}
1227		/*
1228		 * There is at least one other VA mapping this page.
1229		 * Place this entry after the header.
1230		 */
1231		else {
1232			npv = get_pv_entry();
1233			npv->pv_va = va;
1234			npv->pv_pmap = pmap;
1235			npv->pv_next = pv->pv_next;
1236			pv->pv_next = npv;
1237		}
1238		splx(s);
1239		cacheable = 1;
1240	} else {
1241		cacheable = 0;
1242	}
1243
1244	pmap_use_pt(pmap, va);
1245
1246	/*
1247	 * Increment counters
1248	 */
1249	pmap->pm_stats.resident_count++;
1250	if (wired)
1251		pmap->pm_stats.wired_count++;
1252
1253validate:
1254	/*
1255	 * Now validate mapping with desired protection/wiring.
1256	 */
1257	npte = (pt_entry_t) ( (int) (pa | pte_prot(pmap, prot) | PG_V));
1258	/*
1259	 * for correctness:
1260	 */
1261	if( !cacheable)
1262		(int) npte |= PG_N;
1263
1264	/*
1265	 * When forking (copy-on-write, etc):
1266	 * A process will turn off write permissions for any of its writable
1267	 * pages.  If the data (object) is only referred to by one process, the
1268	 * processes map is modified directly as opposed to using the
1269	 * object manipulation routine.  When using pmap_protect, the
1270	 * modified bits are not kept in the vm_page_t data structure.
1271	 * Therefore, when using pmap_enter in vm_fault to bring back
1272	 * writability of a page, there has been no memory of the
1273	 * modified or referenced bits except at the pte level.
1274	 * this clause supports the carryover of the modified and
1275	 * used (referenced) bits.
1276	 */
1277	if (pa == opa)
1278		(int) npte |= (int) *pte & (PG_M|PG_U);
1279
1280
1281	if (wired)
1282		(int) npte |= PG_W;
1283	if (va < UPT_MIN_ADDRESS)
1284		(int) npte |= PG_u;
1285	else if (va < UPT_MAX_ADDRESS)
1286		(int) npte |= PG_u | PG_RW;
1287
1288	/*
1289	 * only if pte changed
1290	 */
1291	if ((int) npte != (int) *pte) {
1292		*pte = npte;
1293		tlbflush();
1294	}
1295}
1296
1297/*
1298 * this code makes some *MAJOR* assumptions:
1299 * 1. Current pmap & pmap exists.
1300 * 2. Not wired.
1301 * 3. Read access.
1302 * 4. No page table pages.
1303 * 5. Tlbflush is deferred to calling procedure.
1304 * 6. Page IS managed.
1305 * but is *MUCH* faster than pmap_enter...
1306 */
1307
1308static inline void
1309pmap_enter_quick(pmap, va, pa)
1310	register pmap_t pmap;
1311	vm_offset_t va;
1312	register vm_offset_t pa;
1313{
1314	register pt_entry_t *pte;
1315	register pv_entry_t pv, npv;
1316	int s;
1317
1318	/*
1319	 * Enter on the PV list if part of our managed memory
1320	 * Note that we raise IPL while manipulating pv_table
1321	 * since pmap_enter can be called at interrupt time.
1322	 */
1323
1324	pte = vtopte(va);
1325	if (pmap_pte_pa(pte)) {
1326		pmap_remove(pmap, va, va + PAGE_SIZE);
1327	}
1328
1329	pv = pa_to_pvh(pa);
1330	s = splimp();
1331	/*
1332	 * No entries yet, use header as the first entry
1333	 */
1334	if (pv->pv_pmap == NULL) {
1335		pv->pv_va = va;
1336		pv->pv_pmap = pmap;
1337		pv->pv_next = NULL;
1338	}
1339	/*
1340	 * There is at least one other VA mapping this page.
1341	 * Place this entry after the header.
1342	 */
1343	else {
1344		npv = get_pv_entry();
1345		npv->pv_va = va;
1346		npv->pv_pmap = pmap;
1347		npv->pv_next = pv->pv_next;
1348		pv->pv_next = npv;
1349	}
1350	splx(s);
1351
1352	pmap_use_pt(pmap, va);
1353
1354	/*
1355	 * Increment counters
1356	 */
1357	pmap->pm_stats.resident_count++;
1358
1359validate:
1360
1361	/*
1362	 * Now validate mapping with desired protection/wiring.
1363	 */
1364	*pte = (pt_entry_t) ( (int) (pa | PG_RO | PG_V | PG_u));
1365}
1366
1367/*
1368 * pmap_object_init_pt preloads the ptes for a given object
1369 * into the specified pmap.  This eliminates the blast of soft
1370 * faults on process startup and immediately after an mmap.
1371 */
1372void
1373pmap_object_init_pt(pmap, addr, object, offset, size)
1374	pmap_t pmap;
1375	vm_offset_t addr;
1376	vm_object_t object;
1377	vm_offset_t offset;
1378	vm_offset_t size;
1379{
1380
1381	vm_offset_t tmpoff;
1382	vm_page_t p;
1383	int s;
1384	vm_offset_t v, lastv=0;
1385	pt_entry_t pte;
1386	extern vm_map_t kernel_map;
1387	vm_offset_t objbytes;
1388
1389	if (!pmap)
1390		return;
1391
1392	/*
1393	 * if we are processing a major portion of the object, then
1394	 * scan the entire thing.
1395	 */
1396	if( size > object->size / 2) {
1397		objbytes = size;
1398		p = (vm_page_t) queue_first(&object->memq);
1399		while (!queue_end(&object->memq, (queue_entry_t) p) && objbytes != 0) {
1400			tmpoff = p->offset;
1401			if( tmpoff < offset) {
1402				p = (vm_page_t) queue_next(&p->listq);
1403				continue;
1404			}
1405			tmpoff -= offset;
1406			if( tmpoff >= size) {
1407				p = (vm_page_t) queue_next(&p->listq);
1408				continue;
1409			}
1410
1411			if ((p->flags & (PG_BUSY|PG_FICTITIOUS)) == 0 ) {
1412				vm_page_hold(p);
1413				v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff)));
1414				/* a fault might occur here */
1415				*(volatile char *)v += 0;
1416				vm_page_unhold(p);
1417				pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p));
1418			}
1419			p = (vm_page_t) queue_next(&p->listq);
1420			objbytes -= NBPG;
1421		}
1422	} else {
1423	/*
1424	 * else lookup the pages one-by-one.
1425	 */
1426		for(tmpoff = 0; tmpoff < size; tmpoff += NBPG) {
1427			if( p = vm_page_lookup(object, tmpoff + offset)) {
1428				if( (p->flags & (PG_BUSY|PG_FICTITIOUS)) == 0) {
1429					vm_page_hold(p);
1430					v = i386_trunc_page(((vm_offset_t)vtopte( addr+tmpoff)));
1431					/* a fault might occur here */
1432					*(volatile char *)v += 0;
1433					vm_page_unhold(p);
1434					pmap_enter_quick(pmap, addr+tmpoff, VM_PAGE_TO_PHYS(p));
1435				}
1436			}
1437		}
1438	}
1439
1440	tlbflush();
1441}
1442
1443/*
1444 *	Routine:	pmap_change_wiring
1445 *	Function:	Change the wiring attribute for a map/virtual-address
1446 *			pair.
1447 *	In/out conditions:
1448 *			The mapping must already exist in the pmap.
1449 */
1450void
1451pmap_change_wiring(pmap, va, wired)
1452	register pmap_t	pmap;
1453	vm_offset_t	va;
1454	boolean_t	wired;
1455{
1456	register pt_entry_t *pte;
1457
1458	if (pmap == NULL)
1459		return;
1460
1461	pte = pmap_pte(pmap, va);
1462	if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) {
1463		if (wired)
1464			pmap->pm_stats.wired_count++;
1465		else
1466			pmap->pm_stats.wired_count--;
1467	}
1468	/*
1469	 * Wiring is not a hardware characteristic so there is no need
1470	 * to invalidate TLB.
1471	 */
1472	pmap_pte_set_w(pte, wired);
1473	/*
1474 	 * When unwiring, set the modified bit in the pte -- could have
1475	 * been changed by the kernel
1476 	 */
1477	if (!wired)
1478		(int) *pte |= PG_M;
1479}
1480
1481
1482
1483/*
1484 *	Copy the range specified by src_addr/len
1485 *	from the source map to the range dst_addr/len
1486 *	in the destination map.
1487 *
1488 *	This routine is only advisory and need not do anything.
1489 */
1490void
1491pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
1492	pmap_t dst_pmap, src_pmap;
1493	vm_offset_t	dst_addr;
1494	vm_size_t	len;
1495	vm_offset_t	src_addr;
1496{
1497}
1498/*
1499 *	Require that all active physical maps contain no
1500 *	incorrect entries NOW.  [This update includes
1501 *	forcing updates of any address map caching.]
1502 *
1503 *	Generally used to insure that a thread about
1504 *	to run will see a semantically correct world.
1505 */
1506void
1507pmap_update()
1508{
1509	tlbflush();
1510}
1511
1512/*
1513 *	Routine:	pmap_kernel
1514 *	Function:
1515 *		Returns the physical map handle for the kernel.
1516 */
1517pmap_t
1518pmap_kernel()
1519{
1520    	return (kernel_pmap);
1521}
1522
1523/*
1524 *	pmap_zero_page zeros the specified (machine independent)
1525 *	page by mapping the page into virtual memory and using
1526 *	bzero to clear its contents, one machine dependent page
1527 *	at a time.
1528 */
1529void
1530pmap_zero_page(phys)
1531	vm_offset_t phys;
1532{
1533	*(int *)CMAP2 = PG_V | PG_KW | i386_trunc_page(phys);
1534	tlbflush();
1535	bzero(CADDR2,NBPG);
1536}
1537
1538/*
1539 *	pmap_copy_page copies the specified (machine independent)
1540 *	page by mapping the page into virtual memory and using
1541 *	bcopy to copy the page, one machine dependent page at a
1542 *	time.
1543 */
1544void
1545pmap_copy_page(src, dst)
1546	vm_offset_t src;
1547	vm_offset_t dst;
1548{
1549	*(int *)CMAP1 = PG_V | PG_KW | i386_trunc_page(src);
1550	*(int *)CMAP2 = PG_V | PG_KW | i386_trunc_page(dst);
1551	tlbflush();
1552
1553#if __GNUC__ > 1
1554	memcpy(CADDR2, CADDR1, NBPG);
1555#else
1556	bcopy(CADDR1, CADDR2, NBPG);
1557#endif
1558}
1559
1560
1561/*
1562 *	Routine:	pmap_pageable
1563 *	Function:
1564 *		Make the specified pages (by pmap, offset)
1565 *		pageable (or not) as requested.
1566 *
1567 *		A page which is not pageable may not take
1568 *		a fault; therefore, its page table entry
1569 *		must remain valid for the duration.
1570 *
1571 *		This routine is merely advisory; pmap_enter
1572 *		will specify that these pages are to be wired
1573 *		down (or not) as appropriate.
1574 */
1575void
1576pmap_pageable(pmap, sva, eva, pageable)
1577	pmap_t		pmap;
1578	vm_offset_t	sva, eva;
1579	boolean_t	pageable;
1580{
1581}
1582
1583/*
1584 * this routine returns true if a physical page resides
1585 * in the given pmap.
1586 */
1587boolean_t
1588pmap_page_exists(pmap, pa)
1589	pmap_t pmap;
1590	vm_offset_t pa;
1591{
1592	register pv_entry_t pv;
1593	int s;
1594
1595	if (!pmap_is_managed(pa))
1596		return FALSE;
1597
1598	pv = pa_to_pvh(pa);
1599	s = splimp();
1600
1601	/*
1602	 * Not found, check current mappings returning
1603	 * immediately if found.
1604	 */
1605	if (pv->pv_pmap != NULL) {
1606		for (; pv; pv = pv->pv_next) {
1607			if (pv->pv_pmap == pmap) {
1608				splx(s);
1609				return TRUE;
1610			}
1611		}
1612	}
1613	splx(s);
1614	return(FALSE);
1615}
1616
1617/*
1618 * pmap_testbit tests bits in pte's
1619 * note that the testbit/changebit routines are inline,
1620 * and a lot of things compile-time evaluate.
1621 */
1622static inline boolean_t
1623pmap_testbit(pa, bit)
1624	register vm_offset_t pa;
1625	int bit;
1626{
1627	register pv_entry_t pv;
1628	pt_entry_t *pte;
1629
1630	if (!pmap_is_managed(pa))
1631		return FALSE;
1632
1633	pv = pa_to_pvh(pa);
1634	disable_intr();
1635
1636	/*
1637	 * Not found, check current mappings returning
1638	 * immediately if found.
1639	 */
1640	if (pv->pv_pmap != NULL) {
1641		for (; pv; pv = pv->pv_next) {
1642			/*
1643			 * if the bit being tested is the modified bit,
1644			 * then mark UPAGES as always modified, and
1645			 * ptes as never modified.
1646			 */
1647			if (bit & PG_M ) {
1648				if (pv->pv_va >= USRSTACK) {
1649					if (pv->pv_va < USRSTACK+(UPAGES*NBPG)) {
1650						enable_intr();
1651						return TRUE;
1652					}
1653					else if (pv->pv_va < UPT_MAX_ADDRESS) {
1654						enable_intr();
1655						return FALSE;
1656					}
1657				}
1658			}
1659			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1660			if ((int) *pte & bit) {
1661				enable_intr();
1662				return TRUE;
1663			}
1664		}
1665	}
1666	enable_intr();
1667	return(FALSE);
1668}
1669
1670/*
1671 * this routine is used to modify bits in ptes
1672 */
1673static inline void
1674pmap_changebit(pa, bit, setem)
1675	vm_offset_t pa;
1676	int bit;
1677	boolean_t setem;
1678{
1679	register pv_entry_t pv;
1680	register pt_entry_t *pte, npte;
1681	vm_offset_t va;
1682	int s;
1683	int reqactivate = 0;
1684
1685	if (!pmap_is_managed(pa))
1686		return;
1687
1688	pv = pa_to_pvh(pa);
1689	disable_intr();
1690
1691	/*
1692	 * Loop over all current mappings setting/clearing as appropos
1693	 * If setting RO do we need to clear the VAC?
1694	 */
1695	if (pv->pv_pmap != NULL) {
1696		for (; pv; pv = pv->pv_next) {
1697			va = pv->pv_va;
1698
1699			/*
1700			 * don't write protect pager mappings
1701			 */
1702			if (!setem && (bit == PG_RW)) {
1703				extern vm_offset_t pager_sva, pager_eva;
1704
1705				if (va >= pager_sva && va < pager_eva)
1706					continue;
1707			}
1708
1709			pte = pmap_pte(pv->pv_pmap, va);
1710			if (setem)
1711				(int) npte = (int) *pte | bit;
1712			else
1713				(int) npte = (int) *pte & ~bit;
1714			if (*pte != npte) {
1715				*pte = npte;
1716				reqactivate = 1;
1717			}
1718		}
1719	}
1720	enable_intr();
1721	/*
1722	 * tlbflush only if we need to
1723	 */
1724	if( reqactivate && (curproc != pageproc))
1725		tlbflush();
1726}
1727
1728/*
1729 *      pmap_page_protect:
1730 *
1731 *      Lower the permission for all mappings to a given page.
1732 */
1733void
1734pmap_page_protect(phys, prot)
1735        vm_offset_t     phys;
1736        vm_prot_t       prot;
1737{
1738	if ((prot & VM_PROT_WRITE) == 0) {
1739		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
1740			pmap_changebit(phys, PG_RW, FALSE);
1741		else
1742			pmap_remove_all(phys);
1743	}
1744}
1745
1746/*
1747 *	Clear the modify bits on the specified physical page.
1748 */
1749void
1750pmap_clear_modify(pa)
1751	vm_offset_t	pa;
1752{
1753	pmap_changebit(pa, PG_M, FALSE);
1754}
1755
1756/*
1757 *	pmap_clear_reference:
1758 *
1759 *	Clear the reference bit on the specified physical page.
1760 */
1761void
1762pmap_clear_reference(pa)
1763	vm_offset_t	pa;
1764{
1765	pmap_changebit(pa, PG_U, FALSE);
1766}
1767
1768/*
1769 *	pmap_is_referenced:
1770 *
1771 *	Return whether or not the specified physical page is referenced
1772 *	by any physical maps.
1773 */
1774
1775boolean_t
1776pmap_is_referenced(pa)
1777	vm_offset_t	pa;
1778{
1779	return(pmap_testbit(pa, PG_U));
1780}
1781
1782/*
1783 *	pmap_is_modified:
1784 *
1785 *	Return whether or not the specified physical page is modified
1786 *	by any physical maps.
1787 */
1788
1789boolean_t
1790pmap_is_modified(pa)
1791	vm_offset_t	pa;
1792{
1793	return(pmap_testbit(pa, PG_M));
1794}
1795
1796/*
1797 *	Routine:	pmap_copy_on_write
1798 *	Function:
1799 *		Remove write privileges from all
1800 *		physical maps for this physical page.
1801 */
1802void
1803pmap_copy_on_write(pa)
1804	vm_offset_t pa;
1805{
1806	pmap_changebit(pa, PG_RW, FALSE);
1807}
1808
1809
1810vm_offset_t
1811pmap_phys_address(ppn)
1812	int ppn;
1813{
1814	return(i386_ptob(ppn));
1815}
1816
1817/*
1818 * Miscellaneous support routines follow
1819 */
1820
1821void
1822i386_protection_init()
1823{
1824	register int *kp, prot;
1825
1826	kp = protection_codes;
1827	for (prot = 0; prot < 8; prot++) {
1828		switch (prot) {
1829		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
1830			*kp++ = 0;
1831			break;
1832		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
1833		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
1834		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
1835			*kp++ = PG_RO;
1836			break;
1837		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
1838		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
1839		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
1840		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
1841			*kp++ = PG_RW;
1842			break;
1843		}
1844	}
1845}
1846
1847#ifdef DEBUG
1848void
1849pmap_pvdump(pa)
1850	vm_offset_t pa;
1851{
1852	register pv_entry_t pv;
1853
1854	printf("pa %x", pa);
1855	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
1856		printf(" -> pmap %x, va %x, flags %x",
1857		       pv->pv_pmap, pv->pv_va, pv->pv_flags);
1858		pads(pv->pv_pmap);
1859	}
1860	printf(" ");
1861}
1862
1863/* print address space of pmap*/
1864void
1865pads(pm)
1866	pmap_t pm;
1867{
1868	unsigned va, i, j;
1869	pt_entry_t *ptep;
1870
1871	if (pm == kernel_pmap) return;
1872	for (i = 0; i < 1024; i++)
1873		if (pm->pm_pdir[i])
1874			for (j = 0; j < 1024 ; j++) {
1875				va = (i<<PD_SHIFT)+(j<<PG_SHIFT);
1876				if (pm == kernel_pmap && va < KERNBASE)
1877						continue;
1878				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
1879						continue;
1880				ptep = pmap_pte(pm, va);
1881				if (pmap_pte_v(ptep))
1882					printf("%x:%x ", va, *(int *)ptep);
1883			} ;
1884
1885}
1886#endif
1887